[
  {
    "path": ".gitignore",
    "content": "/bin/\n/target/\n.classpath\n.project\n.settings/\n"
  },
  {
    "path": "LICENSE.txt",
    "content": "http://www.apache.org/licenses/LICENSE-2.0.html\n\nApache License\n\nVersion 2.0, January 2004\n\nhttp://www.apache.org/licenses/\n\nTERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n1. Definitions.\n\n\"License\" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.\n\n\"Licensor\" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.\n\n\"Legal Entity\" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, \"control\" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.\n\n\"You\" (or \"Your\") shall mean an individual or Legal Entity exercising permissions granted by this License.\n\n\"Source\" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.\n\n\"Object\" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.\n\n\"Work\" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).\n\n\"Derivative Works\" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.\n\n\"Contribution\" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, \"submitted\" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as \"Not a Contribution.\"\n\n\"Contributor\" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.\n\n2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.\n\n3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.\n\n4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:\n\n    You must give any other recipients of the Work or Derivative Works a copy of this License; and\n    You must cause any modified files to carry prominent notices stating that You changed the files; and\n    You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and\n    If the Work includes a \"NOTICE\" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.\n\n    You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.\n\n5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.\n\n6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.\n\n7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.\n\n8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.\n\n9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.\n\nNote: Other license terms may apply to certain, identified software files contained within or distributed with the accompanying software if such terms are included in the directory containing the accompanying software. Such other license terms will then apply in lieu of the terms of the software license above.\n\nEND OF TERMS AND CONDITIONS"
  },
  {
    "path": "NOTICE.txt",
    "content": "amazon-kinesis-aggregators\n\nCopyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.\n\nLicensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at\n\n    http://aws.amazon.com/apache2.0/\n\nor in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. "
  },
  {
    "path": "README.md",
    "content": "# Amazon Kinesis Aggregators\n\n----\n\n*This project is now deprecated, and only updates for security vulnerabilities in dependencies will be made. We advise the use of Apache Flink on Amazon Kinesis Analytics instead.*\n\n----\n\nAmazon Kinesis Aggregators is a Java framework that enables the automatic creation of real-time aggregated time series data from Amazon Kinesis streams. \n\nYou can use this data to answer questions such as ‘how many times per second has ‘x’ occurred’ or ‘what was the breakdown by hour over the day of the streamed data containing ‘y'. Using this framework, you simply describe the format of the data on your stream (CSV, JSON, and so on), the granularity of times series that you require (seconds, minutes, hours, and so on), and how the data elements that are streamed should be grouped; the framework handles all the time series calculations and data persistence. You then simply consume the time series aggregates in your application using Amazon DynamoDB, or interact with the time series using Amazon CloudWatch or the Web Query API. \n\nYou can also analyze the data using Hive on Amazon Elastic MapReduce, or bulk import it to Amazon Redshift. The process runs as a standalone Amazon Kinesis-enabled application which only requires configuration, or can be integrated into existing Amazon Kinesis applications.\n\nThe data is stored in a time series based on how you aggregate it. A dataset aggregating Telecoms Call Data Records in DynamoDB might look like this:\n\n![Dynamo Real Time Aggregate Table](https://s3.amazonaws.com/amazon-kinesis-aggregators/img/DynamoTable.png)\n\nThe corresponding data in CloudWatch would look like this:\n\n![CloudWatch Dashboard View](https://s3.amazonaws.com/amazon-kinesis-aggregators/img/CloudWatch.png)\n\n## Building Aggregators\n\nAmazon Kinesis Aggregators is built using Apache Maven. To build, simply run Maven from the amazon-kinesis-aggregators directory. The target directory contains the following build artifacts:\n\n* **amazon-kinesis-aggregators-.9.2.7.4.jar** - Includes no compiled dependencies\n* **AmazonKinesisAggregators.jar-complete.jar** - Includes all required dependencies\n* **AmazonKinesisAggregator.war** - The web application archive file\n\n## Running Aggregators\n\nAmazon Kinesis Aggregators ships with several deployment options, which should enable you to run with minimal operational overhead while also accommodating advanced deployment use cases. You can run Amazon Kinesis Aggregators as:\n\n* A fully-managed Elastic Beanstalk application. All you need to do is deploy the KinesisAggregators.war file, and provide a configuration file that is accessible using HTTP.\n* A managed Java client, running through the host orchestration of your choice. For example, you can deploy this managed Java client as part of an Amazon EC2 fleet that uses Auto Scaling.\n* As part of an existing Amazon Kinesis-enabled application. This enables an existing application to 'sideload' aggregator processing, as an augmentation to an already-established application.\n\n### Running Amazon Kinesis Aggregators Using Elastic Beanstalk\n\nAmazon Kinesis Aggregators compiles a web application archive (WAR) file, which enables easy deployment on Java application servers, such as Apache Tomcat, using Elastic Beanstalk (http://aws.amazon.com/elasticbeanstalk). Amazon Kinesis Aggregators also includes configuration options that instruct Elastic Beanstalk to scale the application on CPU load, which is typically the bottleneck for applications as they scale up. This is the recommended deployment method.\n\nTo deploy Amazon Kinesis Aggregators as an Elastic Beanstalk application, start by creating a new Elastic Beanstalk web server application with the pre-configured Tomcat stack. When prompted by the AWS Management Console, upload the KinesisAggregators.war file from your local build. Select an instance type that is suitable for the type of aggregation that you are running (specifically, the higher the granularity of label items and the more fine-grained the TimeHorizon value, the larger the instance type you require). After deployment, click the URL for the application environment; the following message is displayed:\n\n```OK - Kinesis Aggregators Managed Application hosted in Elastic Beanstalk Online ```\n\nFurthermore, if you request a log snapshot from the Elastic Beanstalk console, you see a log line indicating the following:\n\n```No Aggregators Configuration File found in Beanstalk Configuration config-file-url. Application is Idle```\n\nThis indicates that the application is deployed but not configured. To configure the application, add these Elastic Beanstalk configuration parameters as required: \n\n* **stream-name** - The name of the stream.\n* **application-name** - The name of the Amazon Kinesis application.\n* **failures-tolerated** - The number of worker exceptions allowed before the worker terminates.\n* **position-in-stream** - The position in the stream to start consuming data from. The possible values are 'LATEST' and 'TRIM_HORIZON'.\n* **max-records** - The maximum number of records to consume from a stream in a single cycle. You can set this value if your stream processing (in addition to aggregation) is slow.\n* **region** - The region to use for the stream, the DynamoDB lease tables, and the CloudWatch and aggregate data stores. Amazon Kinesis Aggregators does not currently support cross-region deployment.\n* **environment** - The name of the environment. This ensures that all DynamoDB tables are prefixed with the environment, enabling you to keep data sets separate for test and production (for example).\n* **config-file-url** - The URL for the configuration file.\n\nThis is typically done by adding `-D` flags to the JVM command line options. Then, choose 'Save' and Elastic Beanstalk applies the changes to the environment. Wait a minute or so, and then snapshot logs to confirm that Amazon Kinesis Aggregators is running.\n\n### Running the Managed Java Client Application\n\nThis is a great option if you have data in Amazon Kinesis, but don’t want to use Elastic Beanstalk. You can start the application from a server using the following command:\n\n```java -cp AmazonKinesisAggregators.jar-complete.jar -Dconfig-file-path=<configuration> -Dstream-name=<stream name> -Dapplication-name=<application name> -Dregion=<region name - us-east-1, eu-west-1, etc> com.amazonaws.services.kinesis.aggregators.consumer.AggregatorConsumer```\n\nIn addition to the configuration items outlined in the Elastic Beanstalk section, use the following configuration item:\n\n* **config-file-path** - The path to the configuration file.\n\nWe recommend that you run your servers in an Auto Scaling group to ensure fault tolerance if the host fails.\n\n### Configuration\n\nYou can use the configuration file to create one or more aggregations against the same stream. It is a JSON file that creates a set of aggregator objects managed by the framework. Create one aggregator for each distinct label that you want to aggregate on. Each aggregator can then have its own properties of time granularity, aggregator type, and so on.\n\nThe core structure of the configuration file is an array of aggregator objects. For example, the following configuration creates two aggregators:\n\n```[{aggregatorDef1}, {aggregatorDef2}]```\n\nNote that aggregatorDef*N* is an aggregator configuration. An aggregator configuration must include the following attributes:\n\n* **namespace** (String) - Enables you to create separate time series data stores. This namespace is used with the application name and environment to create the underlying data tables for the time series, as well as the namespace for custom CloudWatch metrics. Use something that's meaningful based upon the label and time granularity.\n* **labelItems** (array&lt;String&gt;) - Includes a list of the elements of the data stream to aggregate on. The data stored in the time series is aggregated by the unique values from the stream for these attributes, and by time. For instance, to aggregate data for searches made against a car website, you might have a label item set of [\"Make\",\"Model\",\"Year\"]. If you are using CSV data, then this same configuration might be positional based on the fields in the line, such as [0,3,5].\n* **labelAttributeAlias** (String) - Enables you to name the target database attribute for the label. This is particularly useful when you are using CSV or regex-extracted data, and would otherwise end up with a label attribute named the same as the label attribute index.\n* **type** (enum) - The type of aggregation to run. The available types are 'COUNT' and 'SUM'. Counting aggregators simply counts the instances of unique values in Label Items by time. Using the previous example, it would generate a count of searches by configured time period for each unique combination of Make, Model, and Year. Building on this 'SUM' type, aggregators also calculate summaries of other numeric values on the stream. For more information, see the configuration option **'summaryItems**'.\n* **timeHorizons** (array&lt;enum&gt;) - Because the data is captured as a time series, you must tell the aggregator which definition of time you require. To have the data on the stream aggregated by minute, specify 'MINUTE'. To put data into buckets of 5 minutes duration, specify MINUTES_GROUPED(5). You can specify multiple timeHorizon values, and the aggregator automatically maintains the time series data at that granularity. A common configuration might be [\"SECOND\",\"HOUR\",\"FOREVER\"], which gives per-second aggregates, a rollup by Hour, and a simple data set to view everything that ever occurred in a single value. The possible values are:\n  * SECOND\n  * MINUTE\n  * MINUTES\\_GROUPED(int minutePeriod) - Groups data into time buckets using a minute period. For 4 buckets per hour, use '15', or use '5' for buckets of aggregation that are 5 minutes long.\n  * HOUR\n  * DAY\n  * MONTH\n  * YEAR\n  * FOREVER - Rolls up everything that occurred in a single value '*'. \n* **dataExtractor** (enum) - Tells the aggregator how to parse and extract the Label Items from your stream. Currently, the following data formats are supported for external configurations using the configuration file:\n  * **CSV** - Character-separated UTF-8 data. The default delimiter is a comma. To override the delimiter, set the configuration option 'delimiter' to the character value to use as the field terminator. Also, note that all data extractors support multi-value events. This means that you can have many CSV 'lines' within a single event, which are extracted with a line terminator of \"\\n\". To override the line terminator on any data extractor that is text-based, set the configuration option 'lineTerminator' to the character to use as the line terminator. When this data extractor is used, indicate the Label Items using zero-index position values of the fields.\n  * **JSON** - UTF-8 encoded JSON data. This data can either reside in a JSON array on the event (for example [{object1},{object2},{object3}]) or can be a single object per 'line' (for example {object1}\\n {object2}). To control the object delimiter, use the configuration option 'lineTerminator'.\n  * **REGEX** - UTF-8 encoded strings of arbitrary data. With this configuration option, you must include the 'regularExpression' configuration option. This data extraction method also uses zero-indexed positional values for Label Items.\n  * **OBJECT** - Serialized objects using Jackson JSON binary data. With this configuration option, you must include the 'class' configuration option. Using this data extraction method, an event can include only 1 serialized object.\n* **dateItem** (String) - The attribute or field index that defines when the event occurred. This is used to generate the aggregate for the correct time period for the event. This can be formatted as a long value of epoch seconds, or a String value. If you provide a String value in the event, you must also set the configuration option 'dateFormat'. If it is omitted, then the timestamp of the event is set to the timestamp of the server instance when it processes the item.\n* **dateAttributeAlias** (String) - Similar to labelAttributeAlias, this enables you to set the name of the date attribute in the aggregated data table.\n* **dateFormat** (String) - The date format of the dateItem, using date format strings as specified at ```http://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html```.\n\nYou can also include the following options in the configuration:\n\n* **summaryItems** (array&lt;String&gt;) - If the aggregator 'type' is SUM, then the aggregator automatically performs a time series aggregation on the summary items configured. These summary items must be numeric values that you want aggregated for the indicated time period and values of Label Items. For example, if your stream includes call data records, you might want to sum the duration of all calls made by mobile network by hour. Along with configuration of the Label Items and time, you would include a summary item of 'callDuration'. As with the configuration of the Label Items, summary items are zero-index positional values for CSV and regex data extractions, attribute names for JSON, and method names for OBJECT. For more information, see the Summary Items Mini-Language section.\n* **filterRegex** (String) - Filters the stream data using a filtering regular expression. If provided, only the data that conforms to the regular expression is passed in for subsequent parsing. Note this step is applied on the raw underlying stream data as String values, and is not available for object serialization.\n* **failOnDataExtraction** (boolean) - By default, the aggregator fails if it can't understand the data on the stream, to ensure that all events are properly accounted for. If you have a data stream that contains internally inconsistent data, and you want to perform a simple aggregation whenever you can successfully parse the data stream, set this value to 'true'. Alternatively, consider writing a **filterRegex** expression that extracts only the data that fits the configuration of Label, Date, and Summary Items.\n* **tableName** (String) - Sets the name of the underlying time series data table in the data store.\n* **environment** (String) - Runs an aggregator with a specified environment type. This enables you to separate the underlying data stores used for the time series data into production and test, for example.\n* **readIOPS** (int) - Use with the default DynamoDB IDataStore to set up the number of read I/O operations per second (IOPS) you want on the time series data store.\n* **writeIOPS** (int) - Use with the default DynamoDB IDataStore to set up how many write IOPS you want on the time series data store.\n* **IDataStore** (String) - Configures alternative backing data stores other than DynamoDB. If you have written your own data store implementation, specify the full class name, including the package, to have this data store used. You can also specify the internal alternate data store 'com.amazonaws.services.kinesis.aggregators.datastore.DevNullDataStore', which does NOT store the time series data, and is useful only to consume the time series from CloudWatch.\n* **emitMetrics** (boolean) - Emits the time series aggregated data as a custom CloudWatch domain of metrics. Set this value to 'true' to create a custom CloudWatch metric for the application name and namespace of the aggregator, with dimensions on the label and summary items.\n\n### Summary Items Mini-Language\n\nYou can configure summary items and the type of summary using a miniature specification language, and navigate complex document structures in JSON data. You can apply the following type of summary transformations:\n\n* **SUM** - Applies the default summary if you do not specify a summary type. This sums up all values seen for label and time values.\n* **MIN** - Calculates the minimum value observed for the time period and label values.\n* **MAX** - Calculates the maximum value observed for the time period and label values.\n* **FIRST** - Stores the first observed value for the time period and label values.\n* **LAST** - Is equal to the latest value for the time period and label values.\n\nSummary items can have aliases applied, as in SQL, to control the name of the generated attribute in the data store you write to. You simply add the name of the item you require to the definition of the summary item, including functions.\n\nYou can also navigate an entity structure in a JSON-formatted stream data using dot notation; for example, given the following object, you can access the calculated duration using a summary item of 'timeValues.durations.calculated':\n\n```\n{\n  \"name\": \"Object To Be Aggregated\",\n  \"timeValues\": {\n    \"durations\": {\n      \"calculated\": 60,\n      \"recorded\": 58\n    },\n    \"endTime\": \"01/01/1970 01:00:00\",\n    \"startTime\": \"01/01/1970 00:00:00\"\n  }\n}\n```\n\nThese concepts can be combined into a mini-specification:\n\nExample 1 - Calculate the min, max, and sum of value 7 in a CSV stream, giving them friendly names - ```[\"min(7) min-purchase-price\",\"max(7) max-purchase-price\",\"sum(7) total-sales]\"```\n\nExample 2 - Calculate the sum and maximum value of the calculated duration in the JSON stream -  ```[\"sum(timeValues.durations.calculated)\",\"max(timeValues.durations.calculated)\"]```\n\n### Sample Configurations\n\n* **JSON** - http://amazon-kinesis-aggregators.s3.amazonaws.com/sample/json-aggregator.json\n* **CSV** - http://amazon-kinesis-aggregators.s3.amazonaws.com/sample/csv-aggregator.json\n* **Regular Expression** - http://amazon-kinesis-aggregators.s3.amazonaws.com/sample/regex-aggregator.json\n* **Object Serialized Data** - http://amazon-kinesis-aggregators.s3.amazonaws.com/sample/object-aggregator.json\n\n### Aggregator Data Structure\n\nThe data structure for aggregated data is arranged as a hash/range table in DynamoDB on the Label attributes and Date attribute at the configured granularity of time. Every table also includes the following:\n\n* **eventCount** - The number of events consumed during the period.\n* **lastWriteSeq** - The last sequence value from the Amazon Kinesis stream that generated an update to the time period and aggregate label.\n* **lastWriteTime** - The time on the consumer application when the update was made to the aggregate data.\n* **scatterPrefix** - A random number between 0 and 99 used to ensure that there are no write bottlenecks on global secondary indexes for the time period and last write sequence.\n\nOf course, the table also includes any summary values that were added to the aggregator configuration. The format of these summary attributes in DynamoDB follow the pattern &lt;attribute&gt;-&lt;summary type&gt;, or use the alias provided.\n\n* For JSON streams, the attribute is the attribute name configured.\n* For object-serialized streams, the attribute is the summary method converted to a user-friendly name. For example 'getComputedValue' is written to the data store as 'computedValue'.\n* For CSV and String data parsed using regular expressions, the attribute value is the position in the stream, indexed from 0.\n* The summary type is one of the following values: MIN, MAX, SUM, FIRST, or LAST.\n\n#### Indexes\n\nAll aggregator data stores have global secondary indexes (logically) on the date value and on lastWriteSeq. To ensure adequate write performance, these indexes are structured as hash/range on the scatterPrefix (a random number between 0 and 99) and the value is indexed.\n\n#### Web-based Query API\n\nThe Amazon Kinesis Aggregators web application also provides several query API operations, which return data in the JSON format. When deployed, you can make an HTTP request to a variety of endpoints to retrieve different types of data. Currently, there is no security offered for the Web API operations, so you must ensure that they are only accessible from within your VPC using security group rules or similar. Do NOT make these endpoints publicly accessible.\n\n##### Viewing the Running Configuration\n\nYou can view the configuration of your aggregators at the URL ```<web application>/configuration```, which returns an object such as:\n\n```\n{\n  \"application-name\": \"EnergyRealTimeDataConsumer\",\n  \"config-file-url\": \"s3://mybucket/kinesis/sensor-consumer-regex.json\",\n  \"environment\": null,\n  \"failures-tolerated\": null,\n  \"max-records\": \"2500\",\n  \"position-in-stream\": \"LATEST\",\n  \"region\": \"eu-west-1\",\n  \"stream-name\": \"EnergyPipelineSensors\",\n  \"version\": \".9.2.7.4\"\n}\n```\n\n##### Date-based Queries\n\nUse the Date query to find data that has been aggregated on the basis of the stream timestamp value. For example, use this interface to periodically retrieve all new data that has been processed, or to pull data for specific time ranges for comparative analysis. The URL is:\n\n```\n<web application>/dateQuery?params\n```\n\nParameters:\n\n* **namespace** - The namespace for the aggregator configuration.\n* **operator** - The condition to query for, from the DynamoDB ComparisonOperator enum: EQ, GT, GE, and so on. Note that BETWEEN is not yet supported.\n* **granularity** – The granularity of time required, from the TimeHorizon enum: SECOND, MINUTE, HOUR, and so on.\n* **date–value** – The date value to query relative to, in yyyy-MM-dd+hh:mm:ss format (for example, 2014–09–01+18:00:00).\n\nThis returns all data from the aggregated table for the date period specified.\n\nYou can also use the internal Java API:\n\n```\npublic List<Map<String, AttributeValue>> queryByDate(Date dateValue, TimeHorizon h,\nComparisonOperator comp, int threads) throws Exception\n```\n\nThis method queries by the Date, TimeHorizon, and ComparisonOperator values you select. For example, to find all hourly aggregates after 3pm, use:\n\n```\ndateValue=Date('2014-01-01 15:00:00'), TimeHorizon.HOUR, ComparisonOperator.GT\n```\n\nThe Threads parameter is the number of threads used to do the query. This is due to the index being organized on hash/range of scatterPrefix/DateValue.\n\n##### Query for Label/Date Values\n\nTo query the application to find the unique set of labels and date values that have been aggregated, use the following URL:\n\n```<web application>/keyQuery?params```\n\nParameters:\n\n* **namespace** - The namespace for the aggregator configuration.\n* **scope** - Use 'HashKey' to get just the unique aggregate label values or 'HashAndRangeKey' to get both the label and date values.\n\nThis returns a unique list of all keys from the aggregated table.\n\nYou can also use the internal Java API:\n\n```\npublic Map<String, AttributeValue> queryValue(String label, Date dateValue, TimeHorizon h)\nthrows Exception\n```\n\nThis method takes the label you are interested in, as well as a date for the date value. If you have multiple TimeHorizon values configured on the aggregator, it generates the correct dateValue to query the underlying table with. You are likely to use this interface to query across aggregator data stores looking for related time-based values.\n\n## Integrating Aggregators into Existing Java Applications\n\nIn addition to running aggregators as stand-alone Amazon Kinesis applications, you can integrate them into existing Amazon Kinesis applications. You can:\n\n* Run the managed consumer from an existing control environment\n* Inject a set of aggregators into a managed IRecordProcessorFactory\n* Use an existing IRecordProcessor to send data to one or more aggregators\n\n### Managed IRecordProcessorFactory\n\nTo build your Amazon Kinesis worker and configure it explicitly, you can still use aggregators to create IRecordProcessorFactory. In this case, simply create a new instance of com.amazonaws.services.kinesis.aggregators.processor.AggregatorProcessorFactory with the configured aggregators.\n\n### Integration with Existing IRecordProcessors\n\nIf you have an existing worker application and you simply want to add the aggregation capability, you can directly integrate with one or more aggregators. To do this, simply construct the aggregators using a configuration file, or using a pure Java configuration. Then, to inject new data into the aggregator, simply call:\n\n```void aggregate(List<record> records)```\n\nThis causes the time series calculations to be done based upon the configuration of the aggregators. Then, when your worker normally calls checkpoint(), also call:\n\n```void checkpoint()```\n\nThis flushes the in-memory time series state to the backing data store. You must ensure that the aggregators are initialized correctly against the shard for the worker by calling this method in the existing KCL Application IRecordProcessor initialize() method:\n\n```void initialize(String shardId)```\n\nYou must also ensure that if the shutdown() method is invoked on your Amazon Kinesis application, you call:\n\n```void shutdown(boolean flushState)```\nIf the shutdown reason specified in the shutdown method for IRecordProcessor is ShutdownReason.ZOMBIE, set flushState to 'false' to allow the data to be re-aggregated by another worker. However, if the value is ShutdownReason.TERMINATE, you should flush the aggregator state on termination.\n\n### Configuring Aggregators in Existing Applications\n\nThere are a variety of ways to configure aggregators when you are integrating into existing applications. You might use a factory to create one or more aggregators from a simple set of arguments, or you can configure each aggregator directly and manage it as part of an aggregator group.\n\n#### Aggregator Factories\n\nThere are a variety of aggregator factories available in the com.amazonaws.services.kinesis.aggregators.factory package, which generally map to the configuration types found in the configuration file. In fact, you can use configuration files to configure aggregators from Java using the following:\n\n```\nExternallyConfiguredAggregatorFactory.buildFromConfig(  \nString streamName,  \nString applicationName,  \nKinesisClientLibConfiguration config,  \nString configFile)  \n```  \nYou can also take advantage of aggregators that are specific to the type of data to be aggregated:\n\n##### JSON Data\n\n```\nJsonAggregatorFactory.newInstance(String streamName  \n, String appName  \n, KinesisClientLibConfiguration config  \n, String namespace  \n, TimeHorizon timeHorizon  \n, AggregatorType aggregatorType  \n, List<string> labelAttributes  \n, String dateAttribute  \n, String dateFormat \n, List<string> summaryAttributes)  \n```\n##### CSV Data\n\n```\nCsvAggregatorFactory.newInstance(String streamName  \n, String appName  \n, KinesisClientLibConfiguration config  \n, String namespace  \n, TimeHorizon timeHorizon  \n, AggregatorType aggregatorType  \n, String delimiter  \n, List<integer> labelIndicies  \n, int dateIndex  \n, String dateFormat \n, List<object> summaryIndicies)  \n```\n##### String Data parsed with Regular Expressions\n\n```\nRegexAggregatorFactory.newInstance(String streamName  \n, String appName  \n, KinesisClientLibConfiguration config  \n, String namespace  \n, List<timehorizon> timeHorizons  \n, AggregatorType aggregatorType  \n, String regularExpression  \n, List<integer> labelIndicies  \n, int dateIndex  \n, String dateFormat  \n, List<object> summaryIndicies)  \n```\n##### Object Serialized Data\n\nYou can generate aggregators for object-serialized data using annotations:\n\n```\nObjectAggregatorFactory.newInstance(String streamName  \n, String appName  \n, KinesisClientLibConfiguration config  \n, Class clazz)  \n```\nNote that 'clazz' is a class that has been configured using annotations found in the com.amazonaws.services.kinesis.aggregators.annotations package. This factory method throws an error if the class is not annotated.\n\nAlternatively, you can configure the aggregator directly:\n\n```\nObjectAggregatorFactory.newInstance(String streamName  \n, String appName  \n, KinesisClientLibConfiguration config  \n, String namespace  \n, List<TimeHorizon> timeHorizons  \n, AggregatorType aggregatorType  \n, Class clazz  \n, List<String> labelMethods  \n, String dateMethod  \n, List<String> summaryMethods)\n```\n\n#### Direct Configuration\n\nIf you want even more control over the configuration of a given set of aggregators, then you can configure them directly. To effectively do this, you must understand how aggregators work. Aggregators are built around several subsystems that their factory methods configure automatically. When you build aggregators directly, you must construct an aggregator from its constituent subsystems. For more information, see the 'Extending Aggregators' section of this document.\n\nTo configure an aggregator directly, you must configure two of the subsystems: the aggregator and the IDataExtractor that extracts the data from the stream.\n\n##### IDataExtractor\n\nWhen you create an aggregator directly, you must specify the IDataExtractor to get data out of the stream for aggregation. There are IDataExtractors in the com.amazonaws.services.kinesis.aggregators.io package. Each of these map to the supported data formats, and provide relevant configuration options, including label, Date, and summary items. IDataExtractors use fluent builders for all optional configurations. For example, creating a JsonDataExtractor looks like this:\n\n```\nnew JsonDataExtractor(labelAttributes)  \n.withDateValueAttribute(dateAttribute)  \n.withSummaryAttributes(summaryAttributes)  \n.withDateFormat(dateFormat);  \n```\n\n##### Aggregator\n\nYou then create the aggregator with the options that are specific to it, including KinesisClientLibConfiguration, required TimeHorizon values, and options for emitting metrics. For example, using the example JsonDataExtractor, you might configure the aggregator as follows:\n\n```\nreturn new StreamAggregator(streamName, appName, namespace, config, dataExtractor)  \n.withTimeHorizon(timeHorizons)  \n.withAggregatorType(aggregatorType)  \n.withCloudWatchMetrics(true);  \n```\n\n## Extending Aggregators\n\nYou might want to extend aggregators for a variety of reasons. The use cases that we know of today that will require extension include supporting data on a stream that is compressed, encrypted, and uses an object serialization format other than Jackson/JSON, or implementing large objects. We designed aggregators with extensibility in mind. You can extend the framework at the following integration points.\n\n### Data Format & Handling\n\nThe ability to support CSV, JSON, arbitrary string data and object serialization is provided by the IDataExtractor and IKinesisSerializer interfaces, residing at com.amazonaws.services.kinesis.aggregators.io and io.Serializer.\n\n#### IKinesisSerializer\n\nThis interface interoperates between the internal data format used by IDataExtractors, and byte arrays are used on the stream. You implement IKinesisSerializer to support compressed stream data or if your data is encrypted, for example. The implementation would conform to the following interface, which is identical to the Amazon Kinesis Connector ITransformer class:\n\n```\n/**  \n* Transforms data from a Record (byte array) to the data  \n* model class (T) for processing in the application and from the data model  \n* class to the output type (U) for the emitter.  \n* \n* @param <T> the data type stored in the record  \n*/\npublic interface IKinesisSerializer<T, U> {  \n/**\n* Transform the record into an object of its original class.  \n* \n* @param record raw record from the stream  \n* @return data using its original class  \n* @throws IOException if it could not convert the record to a T  \n*/\npublic T toClass(InputEvent event) throws IOException;  \n\n/**\n* Transform the record from its original class to a byte array.  \n* \n* @param record data as its original class  \n* @return a data byte array  \n*/\npublic U fromClass(T record) throws IOException;  \n}  \n```\n#### IDataExtractor\n\nIDataExtractors take the deserialized data and extract the relevant Label, Date, and Summary items. They also typically do any filtering that is exposed by the IDataExtractor. Implement a new IDataExtractor if the type of data returned by a custom IKinesisSerializer implementation is not compatible with the existing IDataExtractors in the io package. This new IDataExtractor would conform to:\n\n```\n/**\n* Enables pluggable data extractors for different types of\n* stream data. Aggregators use IDataExtractor to interoperate between the\n* stream data format and the internal format required for aggregation.\n* IDataExtractors likely use IKinesisSerializers to read and write to and from\n* the stream\n*/\npublic interface IDataExtractor {  \n/**  \n* Gets the name of the label value to be extracted.  \n*   \n* @return  \n*/  \npublic String getAggregateLabelName();  \n\n/**  \n* Gets the name of the date value to be extracted.  \n*   \n* @return  \n*/  \npublic String getDateValueName();  \n\n/**\n* Extracts one or more aggregatable items from a Amazon Kinesis record.  \n*  \n* @param event The Amazon Kinesis record from which we want to extract data.  \n* @return A list of ExtractedData elements that have been resolved from  \n*         the input data.  \n* @throws SerializationException  \n*/\npublic List<AggregateData> getData(InputEvent event) throws SerialisationException;\n\n/**\n* Sets the type of aggregator that contains this IDataExtractor. Used to\n* boost efficiency in that the extractor will not extract summary items for\n* COUNT-based aggregator integration.\n* \n* @param type\n*/\npublic void setAggregatorType(AggregatorType type);\n\n/**\n* Validates that the extractor is well formed.\n* \n* @throws Exception\n*/\npublic void validate() throws Exception;\n\n/**\n* Gets the summary configuration that is driving data extraction against the\n* data stream.\n* \n* @return\n*/\npublic SummaryConfiguration getSummaryConfig();\n\npublic IDataExtractor copy() throws Exception;\n}\n```\n\nAlso note that an IDataExtractor returns multiple aggregatable objects from the stream. If you had a requirement to support M:N Kinesis Events to Aggregatable Events, an IDataExtractor could do the job using local state.\n\nNote that the IDataExtractor is STATEFUL for the life of an aggregator running on a shard, and contains the configuration of the data that is to be extracted. Because a new IDataExtractor is generated when a new aggregator is initialized on a shard, you must ensure that it is thread-safe and implement the copy() interface correctly to ensure that multiple instances can operate within a single JVM.\n\n### Data Store\n\nThe Amazon Kinesis Aggregators framework backs its data onto DynamoDB, and takes advantage of powerful DynamoDB features such as hash/range keys, atomic increment, and conditional updates. It also implements a defensive flush mechanism, which means that at any provisioned I/O rate, the aggregator can flush its state to DynamoDB without timing out.\n\nTo extend aggregators with support for an alternate backing store, such as a relational database or Redis, implement com.amazonaws.services.kinesis.aggregators.datastore.IDataStore. This implementation must meet the following service levels:\n\n* Flushes all internal state to the data store in 5 minutes or less (this is due to the Amazon Kinesis worker timeout)\n* Supports a composite primary key for all label values and date value\n* Performs an atomic, transactional increment operation\n* Conditionally updates a discrete value in the table\n\nThe implementation of a new IDataStore must conform to the following:\n\n```\n/**\n* Enables the in-memory cached aggregates \n* to be saved to a persistent store\n*/\npublic interface IDataStore {\n/**\n* Writes a set of Update key/value pairs to the backing store\n* \n* @param data The input dataset to be updated\n* @return A data structure that maps a set of\n*         AggregateAttributeModifications to the values that were\n*         affected on the underlying data store, by UpdateKey\n* @throws Exception\n*/\npublic Map<UpdateKey, Map<String, AggregateAttributeModification>> write(\nMap<UpdateKey, UpdateValue> data) throws Exception;\n\n/**\n* Method called on creation of the IDataStore\n* \n* @throws Exception\n*/\npublic void initialize() throws Exception;\n\n/**\n* Method that is periodically invoked to allow the IDataStore to\n* refresh tolerated limits for how often write() should be called\n* \n* @return\n* @throws Exception\n*/\npublic long refreshForceCheckpointThresholds() throws Exception;\n\n/**\n* Sets the region for the IDataStore\n* \n* @param region\n*/\npublic void setRegion(Region region);\n}\n```\n\n### Metrics Service\n\nBy default, Amazon Kinesis Aggregators integrates with CloudWatch for the purpose of metrics dashboards and alerts. However, you might want to push metrics to platforms such as Ganglia or New Relic. In these cases, you would provide an implementation of the com.amazonaws.services.kinesis.aggregators.metrics.IMetricsEmitter. This implementation would conform to the following:\n\n```\n/**\n* Provides classes that can write to metrics services. \n* Receives the output of the IDataStore modifications, and applies the data to\n* the metrics service.\n*/\npublic interface IMetricsEmitter {\n/**\n* Emits a new set of metrics to the metrics service\n* \n* @param metricData Input Data to be intrumented\n* @throws Exception\n*/\npublic void emit(Map<UpdateKey, Map<String, AggregateAttributeModification>> metricData)\nthrows Exception;\n\n/**\n* Sets the region of the metrics service\n* \n* @param region\n*/\npublic void setRegion(Region region);\n}\n```\n\n----\n\nCopyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.\n\nLicensed under the Amazon Software License (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at\n\nhttp://aws.amazon.com/asl/"
  },
  {
    "path": "assembly.xml",
    "content": "<assembly\n\txmlns=\"http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0\"\n\txmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n\txsi:schemaLocation=\"http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd\">\n\t<id>complete</id>\n\t<formats>\n\t\t<format>jar</format>\n\t</formats>\n\t<includeBaseDirectory>false</includeBaseDirectory>\n\t<dependencySets>\n\t\t<dependencySet>\n\t\t\t<outputDirectory>/</outputDirectory>\n\t\t\t<unpack>true</unpack>\n\t\t\t<scope>runtime</scope>\n\t\t</dependencySet>\n\t</dependencySets>\n</assembly>"
  },
  {
    "path": "pom.xml",
    "content": "\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\"\n\txmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n\txsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd\">\n\t<modelVersion>4.0.0</modelVersion>\n\t<groupId>com.amazonaws</groupId>\n\t<artifactId>amazon-kinesis-aggregators</artifactId>\n\t<version>.9.2.9</version>\n\t<properties>\n\t\t<sdk-version>1.11.745</sdk-version>\n\t</properties>\n\t<build>\n\t\t<defaultGoal>clean source:jar install assembly:assembly war:war</defaultGoal>\n\t\t<sourceDirectory>src/main/java</sourceDirectory>\n\t\t<resources>\n\t\t\t<resource>\n\t\t\t\t<directory>src/main/java</directory>\n\t\t\t\t<includes>\n\t\t\t\t\t<include>**/*.properties</include>\n\t\t\t\t</includes>\n\t\t\t</resource>\n\t\t\t<resource>\n\t\t\t\t<directory>sample/java</directory>\n\t\t\t</resource>\n\t\t\t<resource>\n\t\t\t\t<directory>sample/resources</directory>\n\t\t\t\t<includes>\n\t\t\t\t\t<include>**/*.properties</include>\n\t\t\t\t</includes>\n\t\t\t</resource>\n\t\t</resources>\n\t\t<testSourceDirectory>tst</testSourceDirectory>\n\t\t<testResources>\n\t\t\t<testResource>\n\t\t\t\t<directory>tst</directory>\n\t\t\t\t<filtering>false</filtering>\n\t\t\t\t<includes>\n\t\t\t\t\t<include>**/*.java</include>\n\t\t\t\t\t<include>**/*.properties</include>\n\t\t\t\t\t<include>**/*.json</include>\n\t\t\t\t</includes>\n\t\t\t</testResource>\n\t\t</testResources>\n\t\t<pluginManagement>\n\t\t\t<plugins>\n\t\t\t\t<plugin>\n\t\t\t\t\t<artifactId>maven-compiler-plugin</artifactId>\n\t\t\t\t\t<version>3.1</version>\n\t\t\t\t\t<configuration>\n\t\t\t\t\t\t<source>1.7</source>\n\t\t\t\t\t\t<target>1.7</target>\n\t\t\t\t\t</configuration>\n\t\t\t\t</plugin>\n\t\t\t\t<plugin>\n\t\t\t\t\t<artifactId>maven-war-plugin</artifactId>\n\t\t\t\t\t<version>2.3</version>\n\t\t\t\t\t<configuration>\n\t\t\t\t\t\t<warSourceDirectory>src/main/WebContent</warSourceDirectory>\n\t\t\t\t\t\t<failOnMissingWebXml>true</failOnMissingWebXml>\n\t\t\t\t\t\t<warName>AmazonKinesisAggregators</warName>\n\t\t\t\t\t</configuration>\n\t\t\t\t</plugin>\n\t\t\t\t<!-- delete this plugin to remove the AWS sample application from the \n\t\t\t\t\taggregator core build -->\n\t\t\t\t<plugin>\n\t\t\t\t\t<groupId>org.codehaus.mojo</groupId>\n\t\t\t\t\t<artifactId>build-helper-maven-plugin</artifactId>\n\t\t\t\t\t<version>1.8</version>\n\t\t\t\t\t<executions>\n\t\t\t\t\t\t<execution>\n\t\t\t\t\t\t\t<id>add-extra-source</id>\n\t\t\t\t\t\t\t<phase>generate-sources</phase>\n\t\t\t\t\t\t\t<goals>\n\t\t\t\t\t\t\t\t<goal>add-source</goal>\n\t\t\t\t\t\t\t</goals>\n\t\t\t\t\t\t\t<configuration>\n\t\t\t\t\t\t\t\t<sources>\n\t\t\t\t\t\t\t\t\t<source>sample</source>\n\t\t\t\t\t\t\t\t</sources>\n\t\t\t\t\t\t\t</configuration>\n\t\t\t\t\t\t</execution>\n\t\t\t\t\t</executions>\n\t\t\t\t</plugin>\n\t\t\t\t<plugin>\n\t\t\t\t\t<artifactId>maven-assembly-plugin</artifactId>\n\t\t\t\t\t<version>2.1</version>\n\t\t\t\t\t<configuration>\n\t\t\t\t\t\t<finalName>AmazonKinesisAggregators.jar</finalName>\n\t\t\t\t\t\t<descriptors>\n\t\t\t\t\t\t\t<descriptor>assembly.xml</descriptor>\n\t\t\t\t\t\t</descriptors>\n\t\t\t\t\t</configuration>\n\t\t\t\t</plugin>\n\t\t\t\t<plugin>\n\t\t\t\t\t<groupId>com.mycila.maven-license-plugin</groupId>\n\t\t\t\t\t<artifactId>maven-license-plugin</artifactId>\n\t\t\t\t\t<version>1.8.0</version>\n\t\t\t\t\t<configuration>\n\t\t\t\t\t\t<header>com/amazonaws/services/kinesis/aggregators/license.txt</header>\n\t\t\t\t\t\t<properties>\n\t\t\t\t\t\t\t<owner>Ian Meyers</owner>\n\t\t\t\t\t\t\t<year>2014</year>\n\t\t\t\t\t\t\t<email>meyersi@amazon.co.uk</email>\n\t\t\t\t\t\t</properties>\n\t\t\t\t\t\t<excludes>\n\t\t\t\t\t\t\t<exclude>**/README</exclude>\n\t\t\t\t\t\t\t<exclude>**/license.txt</exclude>\n\t\t\t\t\t\t\t<exclude>src/test/resources/**</exclude>\n\t\t\t\t\t\t\t<exclude>src/main/resources/**</exclude>\n\t\t\t\t\t\t</excludes>\n\t\t\t\t\t</configuration>\n\t\t\t\t</plugin>\n\t\t\t\t<plugin>\n\t\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t\t<artifactId>maven-surefire-plugin</artifactId>\n\t\t\t\t\t<version>2.17</version>\n\t\t\t\t\t<configuration>\n\t\t\t\t\t\t<!-- <parallel>classes</parallel> <threadCount>5</threadCount> -->\n\t\t\t\t\t\t<reuseForks>false</reuseForks>\n\t\t\t\t\t\t<forkCount>1</forkCount>\n\t\t\t\t\t</configuration>\n\t\t\t\t</plugin>\n\t\t\t\t<plugin>\n\t\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t\t<artifactId>maven-javadoc-plugin</artifactId>\n\t\t\t\t\t<version>2.9.1</version>\n\t\t\t\t\t<configuration>\n\t\t\t\t\t\t<show>private</show>\n\t\t\t\t\t\t<nohelp>true</nohelp>\n\t\t\t\t\t</configuration>\n\t\t\t\t</plugin>\n\t\t\t</plugins>\n\t\t</pluginManagement>\n\t</build>\n\t<dependencies>\n\t\t<dependency>\n\t\t\t<groupId>com.amazonaws</groupId>\n\t\t\t<artifactId>aws-java-sdk-core</artifactId>\n\t\t\t<version>${sdk-version}</version>\n\t\t</dependency>\n\t\t<dependency>\n\t\t\t<groupId>com.amazonaws</groupId>\n\t\t\t<artifactId>aws-java-sdk-dynamodb</artifactId>\n\t\t\t<version>${sdk-version}</version>\n\t\t</dependency>\n\t\t<dependency>\n\t\t\t<groupId>com.amazonaws</groupId>\n\t\t\t<artifactId>aws-java-sdk-kinesis</artifactId>\n\t\t\t<version>${sdk-version}</version>\n\t\t</dependency>\n\t\t<dependency>\n\t\t\t<groupId>com.amazonaws</groupId>\n\t\t\t<artifactId>aws-java-sdk-cloudwatch</artifactId>\n\t\t\t<version>${sdk-version}</version>\n\t\t</dependency>\n\t\t<dependency>\n\t\t\t<groupId>com.amazonaws</groupId>\n\t\t\t<artifactId>aws-java-sdk-cloudwatchmetrics</artifactId>\n\t\t\t<version>${sdk-version}</version>\n\t\t</dependency>\n\t\t<dependency>\n\t\t\t<groupId>com.amazonaws</groupId>\n\t\t\t<artifactId>amazon-kinesis-client</artifactId>\n\t\t\t<version>1.7.0</version>\n\t\t\t<exclusions>\n\t\t\t\t<exclusion>\n\t\t\t\t\t<artifactId>aws-java-sdk</artifactId>\n\t\t\t\t\t<groupId>com.amazonaws</groupId>\n\t\t\t\t</exclusion>\n\t\t\t</exclusions>\n\t\t</dependency>\n\t\t<dependency>\n\t\t\t<groupId>commons-logging</groupId>\n\t\t\t<artifactId>commons-logging</artifactId>\n\t\t\t<version>1.1.1</version>\n\t\t</dependency>\n\t\t<dependency>\n\t\t\t<groupId>commons-httpclient</groupId>\n\t\t\t<artifactId>commons-httpclient</artifactId>\n\t\t\t<version>3.1</version>\n\t\t</dependency>\n\t\t<dependency>\n\t\t\t<groupId>commons-collections</groupId>\n\t\t\t<artifactId>commons-collections</artifactId>\n\t\t\t<version>20040616</version>\n\t\t</dependency>\n\t\t<dependency>\n\t\t\t<groupId>joda-time</groupId>\n\t\t\t<artifactId>joda-time</artifactId>\n\t\t\t<version>2.2</version>\n\t\t</dependency>\n\t\t<dependency>\n\t\t\t<groupId>javax.servlet</groupId>\n\t\t\t<artifactId>javax.servlet-api</artifactId>\n\t\t\t<version>3.0.1</version>\n\t\t\t<scope>provided</scope>\n\t\t</dependency>\n\t\t<dependency>\n\t\t\t<groupId>commons-io</groupId>\n\t\t\t<artifactId>commons-io</artifactId>\n\t\t\t<version>[2.7,)</version>\n\t\t</dependency>\n\t\t<dependency>\n\t\t\t<groupId>com.fasterxml.jackson.core</groupId>\n\t\t\t<artifactId>jackson-core</artifactId>\n\t\t\t<version>[2.9.10.1,)</version>\n\t\t</dependency>\n\t\t<dependency>\n\t\t\t<groupId>com.fasterxml.jackson.core</groupId>\n\t\t\t<artifactId>jackson-databind</artifactId>\n\t\t\t<version>[2.9.10.1,)</version>\n\t\t</dependency>\n\t</dependencies>\n\t<organization>\n\t\t<name>Amazon Web Services UK Ltd</name>\n\t</organization>\n</project>"
  },
  {
    "path": "sample/bin/run-producer.sh",
    "content": "#!/bin/bash\n#\n# Amazon Kinesis Aggregators\n#\n# Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n#\n# Licensed under the Amazon Software License (the \"License\").\n# You may not use this file except in compliance with the License.\n# A copy of the License is located at\n#\n#  http://aws.amazon.com/asl/\n#\n# or in the \"license\" file accompanying this file. This file is distributed\n# on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n# express or implied. See the License for the specific language governing\n# permissions and limitations under the License.\n#\n\n\n# Number of messages for this producer to create\nnum_messages=$1\n\n# Format should be one of 'json','csv', or 'string'\nformat=$2\n\n# Stream to write messages to\nstream=$3\n\n# AWS Region Name to use, such as 'us-east-1' or 'eu-west=1'. US East is Default\nregion=$4\n\njava -cp ../../target/AmazonKinesisAggregators.jar-complete.jar producer.SensorReadingProducer $num_messages $format $stream $region"
  },
  {
    "path": "sample/java/model/SensorReading.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage model;\n\nimport com.fasterxml.jackson.databind.ObjectMapper;\n\npublic class SensorReading {\n\tprivate static ObjectMapper mapper = new ObjectMapper();\n\n\tpublic enum OutputFormat {\n\t\tjson, csv, string;\n\t}\n\n\tprivate OutputFormat outputAs = OutputFormat.json;\n\n\tprivate String id;\n\tprivate long captureTs;\n\tprivate String segment;\n\tprivate double lat;\n\tprivate double lng;\n\tprivate double pressure;\n\tprivate double temperature;\n\tprivate double flowRate;\n\tprivate double corrosionIndex;\n\tprivate double segmentIncline;\n\n\tprivate SensorReading() {\n\t}\n\n\tpublic SensorReading(String id, String segment, long captureTs, double lat,\n\t\t\tdouble lng, double pressure, double temperature, double flowRate,\n\t\t\tdouble corrosionIndex, double segmentIncline) {\n\t\tthis.id = id;\n\t\tthis.segment = segment;\n\t\tthis.captureTs = captureTs;\n\t\tthis.lat = lat;\n\t\tthis.lng = lng;\n\t\tthis.pressure = pressure;\n\t\tthis.temperature = temperature;\n\t\tthis.flowRate = flowRate;\n\t\tthis.corrosionIndex = corrosionIndex;\n\t\tthis.segmentIncline = segmentIncline;\n\t}\n\n\tpublic String getId() {\n\t\treturn this.id;\n\t}\n\n\tpublic String getSegment() {\n\t\treturn this.segment;\n\t}\n\n\tpublic long getCaptureTs() {\n\t\treturn this.captureTs;\n\t}\n\n\tpublic double getLat() {\n\t\treturn this.lat;\n\t}\n\n\tpublic double getLng() {\n\t\treturn this.lng;\n\t}\n\n\tpublic double getPressure() {\n\t\treturn this.pressure;\n\t}\n\n\tpublic double getTemp() {\n\t\treturn this.temperature;\n\t}\n\n\tpublic double getFlowRate() {\n\t\treturn this.flowRate;\n\t}\n\n\tpublic double getCorrosionIndex() {\n\t\treturn this.corrosionIndex;\n\t}\n\n\tpublic double getSegmentIncline() {\n\t\treturn this.segmentIncline;\n\t}\n\n\tpublic SensorReading withOutputFormat(OutputFormat format) {\n\t\tthis.outputAs = format;\n\t\treturn this;\n\t}\n\n\tpublic String asJson() throws Exception {\n\t\treturn mapper.writeValueAsString(this);\n\t}\n\n\tpublic String asString() throws Exception {\n\t\treturn String.format(\"%s (%s) ts-%s %sx%s %s at %s T:%s c:%10f deg%10f\",\n\t\t\t\tthis.id, this.segment, this.captureTs, this.lat, this.lng,\n\t\t\t\tthis.pressure, this.flowRate, this.temperature,\n\t\t\t\tthis.corrosionIndex, this.segmentIncline);\n\t}\n\n\tpublic String asCSV() throws Exception {\n\t\treturn String.format(\"%s|%s|%s|%s|%s|%s|%s|%s|%10f|%10f\", this.id,\n\t\t\t\tthis.segment, this.captureTs, this.lat, this.lng,\n\t\t\t\tthis.pressure, this.temperature, this.flowRate,\n\t\t\t\tthis.corrosionIndex, this.segmentIncline);\n\t}\n\n\t@Override\n\tpublic String toString() {\n\t\ttry {\n\t\t\tswitch (this.outputAs) {\n\t\t\tcase string:\n\t\t\t\treturn this.asString();\n\t\t\tcase csv:\n\t\t\t\treturn this.asCSV();\n\t\t\tdefault:\n\t\t\t\treturn this.asJson();\n\t\t\t}\n\t\t} catch (Exception e) {\n\t\t\te.printStackTrace();\n\t\t\treturn null;\n\t\t}\n\t}\n}\n"
  },
  {
    "path": "sample/java/model/SensorState.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage model;\n\npublic class SensorState {\n\tprivate String segment;\n\tprivate double lat;\n\tprivate double lng;\n\tprivate double pressure;\n\tprivate double flowRate;\n\tprivate double temp;\n\tprivate double corrosion;\n\tprivate double incline;\n\n\tpublic SensorState(String segment, double lat, double lng, double pressure,\n\t\t\tdouble flowRate, double temp, double corrosion, double incline) {\n\t\tthis.segment = segment;\n\t\tthis.lat = lat;\n\t\tthis.lng = lng;\n\t\tthis.pressure = pressure;\n\t\tthis.flowRate = flowRate;\n\t\tthis.temp = temp;\n\t\tthis.corrosion = corrosion;\n\t\tthis.incline = incline;\n\t}\n\n\tpublic String getSegment() {\n\t\treturn segment;\n\t}\n\n\tpublic double getLat() {\n\t\treturn lat;\n\t}\n\n\tpublic double getLng() {\n\t\treturn lng;\n\t}\n\n\tpublic double getPressure() {\n\t\treturn pressure;\n\t}\n\n\tpublic double getFlowRate() {\n\t\treturn flowRate;\n\t}\n\n\tpublic double getTemp() {\n\t\treturn temp;\n\t}\n\n\tpublic double getCorrosion() {\n\t\treturn corrosion;\n\t}\n\n\tpublic double getIncline() {\n\t\treturn incline;\n\t}\n}\n"
  },
  {
    "path": "sample/java/producer/SensorReadingProducer.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage producer;\n\nimport java.nio.ByteBuffer;\nimport java.security.SecureRandom;\nimport java.util.ArrayList;\nimport java.util.HashMap;\nimport java.util.List;\nimport java.util.Map;\nimport java.util.Random;\n\nimport model.SensorReading;\nimport model.SensorReading.OutputFormat;\nimport model.SensorState;\n\nimport com.amazonaws.auth.DefaultAWSCredentialsProviderChain;\nimport com.amazonaws.regions.Region;\nimport com.amazonaws.regions.Regions;\nimport com.amazonaws.services.kinesis.AmazonKinesis;\nimport com.amazonaws.services.kinesis.AmazonKinesisClient;\nimport com.amazonaws.services.kinesis.model.ProvisionedThroughputExceededException;\nimport com.amazonaws.services.kinesis.model.PutRecordRequest;\n\npublic class SensorReadingProducer {\n\tprivate final Random rand = new SecureRandom();\n\n\tprivate final int ID_SPACE_SIZE = 1000;\n\tprivate final int NUM_SEGMENTS = 40;\n\tprivate final double PRESSURE_BASE = 108D;\n\tprivate final double PRESSURE_VOLATILITY = 3D;\n\tprivate final double FLOW_BASE = 1D;\n\tprivate final double FLOW_VOLATILITY = 1D;\n\tprivate final double TEMP_BASE = 16D;\n\tprivate final double TEMP_VOLATILITY = .2D;\n\tprivate final double INCLINE_BASE = 0D;\n\tprivate final double CORROSION_BASE = .0000234D;\n\tprivate final int BATCH_SIZE = 20;\n\tprivate final int BACKOFF = 5;\n\tprivate int sensorsGenerated = 0;\n\n\tprivate Map<String, SensorState> sensorCache = new HashMap<>();\n\n\tfinal double londonLat = 51.50722D;\n\tfinal double londonLng = -0.12750D;\n\tfinal double aberdeenLat = 57.1436900D;\n\tfinal double aberdeenLng = -2.0981400D;\n\tfinal double lineATan = Math.atan((aberdeenLng - londonLng)\n\t\t\t/ (aberdeenLat - londonLat));\n\tfinal double lineLength = Math.sqrt((Math.pow(aberdeenLng - londonLng, 2))\n\t\t\t+ (Math.pow(aberdeenLat - londonLat, 2)));\n\tfinal double lineIncrement = lineLength / NUM_SEGMENTS;\n\n\tpublic SensorReadingProducer() {\n\t}\n\n\tpublic double[] getLinePoint() {\n\t\t// random distance\n\t\tdouble dist = Math.random() * lineLength;\n\n\t\t// calculate which segment the point is in\n\t\tdouble seg = Math.floor(dist / lineIncrement) + 1;\n\n\t\t// derive lat/lng\n\t\tdouble lat = (dist / Math.cos(lineATan)) + londonLat;\n\t\tdouble lng = (dist * Math.sin(lineATan)) + londonLng;\n\n\t\treturn new double[] { lat, lng, seg };\n\t}\n\n\tpublic SensorReading nextSensorReading(final OutputFormat format) {\n\t\treturn nextSensorReading(format, rand.nextInt(ID_SPACE_SIZE));\n\t}\n\n\tpublic SensorReading nextSensorReading(final OutputFormat format,\n\t\t\tint position) {\n\t\tString id = Integer.toHexString(position);\n\n\t\tSensorState sensorState = sensorCache.get(id);\n\t\tif (sensorState == null) {\n\t\t\tsensorsGenerated++;\n\n\t\t\tSystem.out.println(String.format(\"Generating Sensor %s\",\n\t\t\t\t\tsensorsGenerated));\n\n\t\t\tdouble[] location = getLinePoint();\n\n\t\t\tString segment = Integer.toHexString(new Double(location[2])\n\t\t\t\t\t.intValue());\n\n\t\t\tdouble pressure = PRESSURE_BASE\n\t\t\t\t\t+ (PRESSURE_VOLATILITY * rand.nextDouble());\n\t\t\tdouble flow = FLOW_BASE + (FLOW_VOLATILITY * rand.nextDouble());\n\t\t\tdouble temp = TEMP_BASE + (TEMP_VOLATILITY * rand.nextDouble());\n\t\t\tdouble corrosion = CORROSION_BASE + (rand.nextDouble() / 1000);\n\t\t\tdouble incline = INCLINE_BASE + (rand.nextDouble() / 1_000_000);\n\n\t\t\tsensorState = new SensorState(segment, location[0], location[1],\n\t\t\t\t\tpressure, flow, temp, corrosion, incline);\n\n\t\t\tsensorCache.put(id, sensorState);\n\t\t}\n\n\t\tdouble pressure = sensorState.getPressure()\n\t\t\t\t+ (PRESSURE_VOLATILITY * rand.nextDouble());\n\t\tdouble temp = sensorState.getTemp()\n\t\t\t\t+ (TEMP_VOLATILITY * rand.nextDouble());\n\t\tdouble flow = sensorState.getFlowRate()\n\t\t\t\t+ (FLOW_VOLATILITY * rand.nextDouble());\n\t\tdouble corrosion = sensorState.getCorrosion()\n\t\t\t\t+ (rand.nextDouble() / 1000);\n\t\tdouble incline = sensorState.getIncline()\n\t\t\t\t+ (rand.nextDouble() / 1_000_000);\n\n\t\tSensorReading reading = new SensorReading(id, sensorState.getSegment(),\n\t\t\t\tSystem.currentTimeMillis(), sensorState.getLat(),\n\t\t\t\tsensorState.getLng(), pressure, temp, flow, corrosion, incline);\n\t\treading.withOutputFormat(format);\n\t\treturn reading;\n\t}\n\n\tprivate void run(final int events, final OutputFormat format,\n\t\t\tfinal String streamName, final String region) throws Exception {\n\t\tAmazonKinesis kinesisClient = new AmazonKinesisClient(\n\t\t\t\tnew DefaultAWSCredentialsProviderChain());\n\t\tkinesisClient.setRegion(Region.getRegion(Regions.fromName(region)));\n\t\tint count = 0;\n\t\tSensorReading r = null;\n\t\tdo {\n\t\t\tr = nextSensorReading(format);\n\n\t\t\ttry {\n\t\t\t\tPutRecordRequest req = new PutRecordRequest()\n\t\t\t\t\t\t.withPartitionKey(\"\" + rand.nextLong())\n\t\t\t\t\t\t.withStreamName(streamName)\n\t\t\t\t\t\t.withData(ByteBuffer.wrap(r.toString().getBytes()));\n\t\t\t\tkinesisClient.putRecord(req);\n\t\t\t} catch (ProvisionedThroughputExceededException e) {\n\t\t\t\tThread.sleep(BACKOFF);\n\t\t\t}\n\n\t\t\tSystem.out.println(r);\n\t\t\tcount++;\n\t\t} while (count < events);\n\t}\n\n\tpublic static void main(String[] args) throws Exception {\n\t\tInteger i = Integer.parseInt(args[0]);\n\t\tOutputFormat format = OutputFormat.valueOf(args[1]);\n\t\tString streamName = args[2];\n\t\tString region = args[3];\n\n\t\tnew SensorReadingProducer().run(i, format, streamName, region);\n\t}\n}\n"
  },
  {
    "path": "sample/resources/BySegment-CSV.json",
    "content": "[{\"dataExtractor\": \"CSV\",\n    \"dateFormat\": \"\",\n    \"dateItem\": 2,\n    \"dateAttributeAlias\":\"sensorTS\",\n    \"labelItems\": [\n      1\n    ],\n    \"labelAttributeAlias\":\"segment\",\n    \"namespace\": \"BySegment-CSV\",\n    \"delimiter\": \"|\",\n    \"summaryItems\": [\n      \"max(5) max-pressure\",\n      \"max(6) max-flow\",\n      \"max(9) max-corrosion\",\n      \"min(3) lat\",\n      \"min(4) lng\"\n    ],\n    \"timeHorizons\": [\n      \"SECOND\"\n    ],\n    \"type\": \"SUM\",\n    \"emitMetrics\":\"true\",\n    \"writeIOPS\":100,\n    \"readIOPS\":25\n  }\n]"
  },
  {
    "path": "sample/resources/BySegment-Json.json",
    "content": "[{\"dataExtractor\": \"JSON\",\n    \"dateFormat\": \"\",\n    \"dateAttribute\": \"captureTs\",\n    \"labelItems\": [\n      \"segment\"\n    ],\n    \"namespace\": \"BySegment-Json\",\n    \"summaryItems\": [\n      \"max(pressure)\",\n      \"max(flowRate)\",\n      \"max(corrosionIndex)\",\n      \"min(lat) lat\",\n      \"min(lng) lng\"\n    ],\n    \"timeHorizons\": [\n      \"SECOND\"\n    ],\n    \"type\": \"SUM\",\n    \"emitMetrics\":\"true\",\n    \"writeIOPS\":100,\n    \"readIOPS\":25\n  }\n]"
  },
  {
    "path": "sample/resources/BySegment-Regex.json",
    "content": "[{\"dataExtractor\": \"REGEX\",\n    \"dateFormat\": \"\",\n    \"dateItem\": 2,\n    \"dateAttributeAlias\":\"sensorTS\",\n    \"labelItems\": [\n      1\n    ],\n    \"labelAttributeAlias\":\"segment\",\n    \"namespace\": \"BySegment-Regex\",\n    \"regularExpression\": \"^(.*) \\\\((.*)\\\\) ts-(\\\\d+) (\\\\d+\\\\.\\\\d+)x(\\\\-\\\\d+\\\\.\\\\d+) (\\\\d+.\\\\d+) at (\\\\d+.\\\\d+) T:(\\\\d+\\\\.\\\\d+) c:\\\\ +(\\\\d+\\\\.\\\\d+) deg\\\\ +(\\\\d+\\\\.\\\\d+)$\",\n    \"summaryItems\": [\n      \"max(5) max-pressure\",\n      \"max(6) max-flow\",\n      \"max(9) max-corrosion\",\n      \"min(3) lat\",\n      \"min(4) lng\"\n    ],\n    \"timeHorizons\": [\n      \"SECOND\"\n    ],\n    \"type\": \"SUM\",\n    \"emitMetrics\":\"true\",\n    \"writeIOPS\":100,\n    \"readIOPS\":25\n  }\n]"
  },
  {
    "path": "src/.gitkeep",
    "content": "Feel free to delete this file as soon as actual Java code is added to this\ndirectory.\n"
  },
  {
    "path": "src/log4j.properties",
    "content": "# Root logger option\nlog4j.rootLogger=INFO, stdout\n \n# Direct log messages to stdout\nlog4j.appender.stdout=org.apache.log4j.ConsoleAppender\nlog4j.appender.stdout.Target=System.out\nlog4j.appender.stdout.layout=org.apache.log4j.PatternLayout\nlog4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n"
  },
  {
    "path": "src/main/WebContent/.ebextensions/as.config",
    "content": "option_settings:\n  - namespace: aws:autoscaling:asg\n    option_name: MinSize\n    value: 2\n  - namespace: aws:autoscaling:trigger\n    option_name: MeasureName\n    value: CPUUtilization\n  - namespace: aws:autoscaling:trigger\n    option_name: LowerThreshold\n    value: 40\n  - namespace: aws:autoscaling:trigger\n    option_name: UpperThreshold\n    value: 90"
  },
  {
    "path": "src/main/WebContent/META-INF/MANIFEST.MF",
    "content": "Manifest-Version: 1.0\r\nClass-Path: \r\n\r\n"
  },
  {
    "path": "src/main/WebContent/WEB-INF/web.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<web-app xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n\txmlns=\"http://java.sun.com/xml/ns/javaee\"\n\txsi:schemaLocation=\"http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_3_0.xsd\"\n\tid=\"WebApp_ID\" version=\"3.0\">\n\t<display-name>KinesisAggregatorsBeanstalkApplication</display-name>\n\t<welcome-file-list>\n\t\t<welcome-file>index.html</welcome-file>\n\t</welcome-file-list>\n\t<listener>\n\t\t<listener-class>com.amazonaws.services.kinesis.aggregators.app.AggregatorsBeanstalkApp</listener-class>\n\t</listener>\n\t<servlet>\n\t\t<servlet-name>DateQuery</servlet-name>\n\t\t<servlet-class>com.amazonaws.services.kinesis.aggregators.app.DateQueryServlet</servlet-class>\n\t</servlet>\n\t<servlet-mapping>\n\t\t<servlet-name>DateQuery</servlet-name>\n\t\t<url-pattern>/dateQuery</url-pattern>\n\t</servlet-mapping>\n\n\t<servlet>\n\t\t<servlet-name>KeyQuery</servlet-name>\n\t\t<servlet-class>com.amazonaws.services.kinesis.aggregators.app.ListAggregateKeysServlet</servlet-class>\n\t</servlet>\n\t<servlet-mapping>\n\t\t<servlet-name>KeyQuery</servlet-name>\n\t\t<url-pattern>/keyQuery</url-pattern>\n\t</servlet-mapping>\n\n\n\t<servlet>\n\t\t<servlet-name>LabelQuery</servlet-name>\n\t\t<servlet-class>com.amazonaws.services.kinesis.aggregators.app.QueryByLabelServlet</servlet-class>\n\t</servlet>\n\t<servlet-mapping>\n\t\t<servlet-name>LabelQuery</servlet-name>\n\t\t<url-pattern>/labelQuery</url-pattern>\n\t</servlet-mapping>\n\n\t<servlet>\n\t\t<servlet-name>ConfigParams</servlet-name>\n\t\t<servlet-class>com.amazonaws.services.kinesis.aggregators.app.FetchConfigurationServlet</servlet-class>\n\t</servlet>\n\t<servlet-mapping>\n\t\t<servlet-name>ConfigParams</servlet-name>\n\t\t<url-pattern>/configParams</url-pattern>\n\t</servlet-mapping>\n\n\t<servlet>\n\t\t<servlet-name>Configuration</servlet-name>\n\t\t<servlet-class>com.amazonaws.services.kinesis.aggregators.app.ShowConfigurationServlet</servlet-class>\n\t</servlet>\n\t<servlet-mapping>\n\t\t<servlet-name>Configuration</servlet-name>\n\t\t<url-pattern>/configuration</url-pattern>\n\t</servlet-mapping>\n\n\t<servlet>\n\t\t<servlet-name>ConfigFile</servlet-name>\n\t\t<servlet-class>com.amazonaws.services.kinesis.aggregators.app.ShowConfigFileServlet</servlet-class>\n\t</servlet>\n\t<servlet-mapping>\n\t\t<servlet-name>ConfigFile</servlet-name>\n\t\t<url-pattern>/configFile</url-pattern>\n\t</servlet-mapping>\n</web-app>"
  },
  {
    "path": "src/main/WebContent/index.html",
    "content": "<!--\n\n    Amazon Kinesis Aggregators\n\n    Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n\n    Licensed under the Amazon Software License (the \"License\").\n    You may not use this file except in compliance with the License.\n    A copy of the License is located at\n\n     http://aws.amazon.com/asl/\n\n    or in the \"license\" file accompanying this file. This file is distributed\n    on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n    express or implied. See the License for the specific language governing\n    permissions and limitations under the License.\n\n-->\n<html>\n<head>\n<title>Kinesis Aggregators Managed Application in Elastic Beanstalk</title>\n</head>\n<body>\nOK - Kinesis Aggregators Managed Application hosted in Elastic Beanstalk Online\n</body>\n</html>"
  },
  {
    "path": "src/main/WebContent/styles/styles.css",
    "content": "/*************************************\nGENERAL\n*************************************/\nbody {\n\tmargin: 0;\n\tpadding: 0;\n\tfont: 12px/1.4em \"Lucida Grande\", Verdana, sans-serif;\n\tcolor: #333;\n\toverflow-y: scroll;\n\ttext-rendering: optimizeLegibility;\n\tbackground-color: #d5e9ed;\n}\n\nh2 {\n\tfont-size: 1.3em;\n\tline-height: 1.5em;\n\tfont-weight: bold;\n\tmargin: 20px 0 0 0;\n\tpadding: 0;\n\tborder-bottom: 3px solid #eee;\n\n\t/* icon setup */\n\tpadding: 0.2em 1em 0.2em 30px;\n\tbackground-position: 0 50%;\n\tbackground-repeat: no-repeat;\n}\n\n\n/*************************************\nSECTIONS\n*************************************/\ndiv#content {\n\tmargin: 30px auto;\n\tpadding: 0 30px 15px 30px;\n\tbackground-color: #fff;\n\twidth: 940px;\n\n\t/* box-shadow */\n\t-moz-box-shadow: 0 5px 10px #aaa;\n\t-webkit-box-shadow: 0 5px 10px #aaa;\n\tbox-shadow: 0 5px 10px #aaa;\n\n\t/* bottom corners */\n\t-webkit-border-bottom-right-radius: 7px;\n\t-webkit-border-bottom-left-radius: 7px;\n\t-moz-border-radius-bottomright: 7px;\n\t-moz-border-radius-bottomleft: 7px;\n\tborder-bottom-right-radius: 7px;\n\tborder-bottom-left-radius: 7px;\n}\n\n/*div#content div.section {}*/\n\ndiv#content div.section ul {\n\tmargin: 0;\n\tpadding: 1em 0 0 2em;\n\toverflow: hidden;\n}\n\ndiv#content div.section ul li {\n\tlist-style-type: square;\n\twhite-space: nowrap;\n\tline-height: 1.5em;\n}\n\n/* Section titles */\ndiv#content div.section.s3 h2 {\n\tbackground-image: url(../images/drive.png);\n}\n\ndiv#content div.section.ec2 h2 {\n\tbackground-image: url(../images/server.png);\n}\n\ndiv#content div.section.sdb h2 {\n\tbackground-image: url(../images/database.png);\n}\n\n\n/*************************************\nCONTAINERS\n*************************************/\n.container {\n\tzoom: 1;\n}\n\n.container:after {\n    content: \".\";\n    display: block;\n    height: 0;\n    clear: both;\n    visibility: hidden;\n}\n\n\n/*************************************\nGRIDS\n*************************************/\n.grid { float: left; margin-right: 20px; }\n.gridlast { margin-right: 0; }\n.grid5 { width: 300px; }\n.grid15 { width: 940px; }\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/AggregateData.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators;\n\nimport java.util.Date;\nimport java.util.Map;\n\npublic class AggregateData {\n    private String uniqueId;\n\n    private LabelSet labels;\n\n    private Date date;\n\n    private Map<String, Double> summaries;\n\n    public AggregateData(String uniqueId, LabelSet labels, Date date, Map<String, Double> summaries) {\n        this.uniqueId = uniqueId;\n        this.labels = labels;\n        this.date = date;\n        this.summaries = summaries;\n    }\n\n    public String getUniqueId() {\n        return this.uniqueId;\n    }\n\n    public String getLabel() {\n        return this.labels.valuesAsString();\n    }\n\n    public LabelSet getLabels() {\n        return this.labels;\n    }\n\n    public Date getDate() {\n        return this.date;\n    }\n\n    public Map<String, Double> getSummaries() {\n        return this.summaries;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/AggregatorGroup.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators;\n\nimport java.util.ArrayList;\nimport java.util.List;\n\nimport com.amazonaws.auth.AWSCredentialsProvider;\nimport com.amazonaws.services.kinesis.model.Record;\n\n/**\n * Class which provides a simple automation around a number of aggregators.\n * Register any number of aggregators with the container, and then call all of\n * the registered aggregators aggregate and checkpoint methods through this\n * simple proxy\n */\npublic class AggregatorGroup implements IStreamAggregator {\n    List<StreamAggregator> aggregators = new ArrayList<>();\n\n    public AggregatorGroup() {\n    }\n\n    public AggregatorGroup(AggregatorGroup template) throws Exception {\n        // create a new aggregator group from all of the aggregators this one\n        // encapsulates, by instantiating new aggregators with their copy\n        // constructors\n        for (StreamAggregator agg : template.aggregators) {\n            this.registerAggregator(new StreamAggregator(agg));\n        }\n    }\n\n    public void registerAggregator(StreamAggregator agg) {\n        this.aggregators.add(agg);\n    }\n\n    public List<StreamAggregator> getAggregators() {\n        return this.aggregators;\n    }\n\n    /**\n     * {@inheritDoc}\n     */\n    @Override\n    public void aggregate(List<Record> records) throws Exception {\n        for (IStreamAggregator agg : aggregators) {\n            agg.aggregate(records);\n        }\n    }\n\n    public void aggregateEvents(List<InputEvent> events) throws Exception {\n        for (IStreamAggregator agg : aggregators) {\n            agg.aggregateEvents(events);\n        }\n    }\n\n    /**\n     * {@inheritDoc}\n     */\n    @Override\n    public void checkpoint() throws Exception {\n        for (IStreamAggregator agg : aggregators) {\n            agg.checkpoint();\n        }\n    }\n\n    @Override\n    public void initialize(String shardId) throws Exception {\n        for (IStreamAggregator agg : aggregators) {\n            agg.initialize(shardId);\n        }\n    }\n\n    /**\n     * {@inheritDoc}\n     */\n    @Override\n    public void shutdown(boolean flushState) throws Exception {\n        for (IStreamAggregator agg : aggregators) {\n            agg.shutdown(flushState);\n        }\n    }\n\n    /**\n     * N/A - use getTableNames()\n     */\n    @Override\n    public String getTableName() {\n        return null;\n    }\n\n    public List<String> getTableNames() {\n        List<String> out = new ArrayList<>(this.aggregators.size());\n\n        for (IStreamAggregator i : this.aggregators) {\n            out.add(i.getTableName());\n        }\n\n        return out;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/AggregatorType.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators;\n\n/**\n * Types of Aggregators supported by the Kinesis Aggregator Framework.\n */\npublic enum AggregatorType {\n    /**\n     * Count Aggregators maintain only an Event Count observed for the indicated\n     * {@link com.amazonaws.services.kinesis.aggregators.TimeHorizon}\n     */\n    COUNT,\n    /**\n     * Sum Aggregators maintain an Event Count, plus a set of summary values for\n     * data indicated on the stream as being a summary value. Summary Values can\n     * be any of\n     * {@link com.amazonaws.services.kinesis.aggregators.SummaryCalculation}\n     */\n    SUM;\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/AggregatorsConstants.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators;\n\npublic class AggregatorsConstants {\n    public static final String CONFIG_URL_PARAM = \"config-file-url\";\n\n    public static final String CONFIG_PATH_PARAM = \"config-file-path\";\n\n    public static final String STREAM_NAME_PARAM = \"stream-name\";\n\n    public static final String NAMESPACE_PARAM = \"namespace\";\n\n    public static final String APP_NAME_PARAM = \"application-name\";\n\n    public static final String REGION_PARAM = \"region\";\n\n    public static final String STREAM_POSITION_PARAM = \"position-in-stream\";\n\n    public static final String MAX_RECORDS_PARAM = \"max-records\";\n\n    public static final String ENVIRONMENT_PARAM = \"environment\";\n\n    public static final String FAILURES_TOLERATED_PARAM = \"failures-tolerated\";\n\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/EnvironmentType.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators;\n\npublic enum EnvironmentType {\n    DEV, TEST, INT, PERF, PROD;\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/IStreamAggregator.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators;\n\nimport java.util.List;\n\nimport com.amazonaws.auth.AWSCredentialsProvider;\nimport com.amazonaws.services.kinesis.model.Record;\n\n/**\n * Stream aggregators give end users the ability to dynamically aggregate\n * Kinesis data in Dynamo DB. All a consumer must do is create an Aggregator and\n * then call aggregate within the processRecords method of an IRecordProcessor.\n * Please note all writes made within the aggregate context are durable.\n * \n * @author meyersi\n */\npublic interface IStreamAggregator {\n    /**\n     * Aggregate a set of records received from the Kinesis Client Library.\n     * \n     * @param records The set of Records received from a processRecords\n     *        invocation\n     * @throws Exception\n     */\n    public void aggregate(List<Record> records) throws Exception;\n\n    public void aggregateEvents(List<InputEvent> events) throws Exception;\n\n    /**\n     * Commit all aggregated data to the backing store.\n     */\n    public void checkpoint() throws Exception;\n\n    /**\n     * Initialise the Aggregator on a shard. Should be called by\n     * IRecordProcessor.initialize().\n     */\n    public void initialize(String shardId) throws Exception;\n\n    /**\n     * Terminate an Aggregator running, which will mark the process as offline\n     * in the {@link InventoryModel} table.\n     */\n    public void shutdown(boolean flushState) throws Exception;\n\n    /** Get the underlying data store name for the aggregator. */\n    public String getTableName();\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/InputEvent.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators;\n\nimport com.amazonaws.services.kinesis.model.Record;\n\npublic class InputEvent {\n    private String sequenceNumber;\n\n    private String partitionKey;\n\n    private byte[] data;\n\n    public InputEvent(Record record) {\n        this.sequenceNumber = record.getSequenceNumber();\n        this.partitionKey = record.getPartitionKey();\n        this.data = record.getData().array();\n    }\n\n    public InputEvent withSequence(String sequence) {\n        this.sequenceNumber = sequence;\n        return this;\n    }\n\n    public InputEvent withPartitionKey(String partitionKey) {\n        this.partitionKey = partitionKey;\n        return this;\n    }\n\n    public String getSequenceNumber() {\n        return sequenceNumber;\n    }\n\n    public String getPartitionKey() {\n        return partitionKey;\n    }\n\n    public byte[] getData() {\n        return data;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/InventoryModel.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators;\n\nimport java.util.ArrayList;\nimport java.util.Date;\nimport java.util.HashMap;\nimport java.util.List;\nimport java.util.Map;\n\nimport com.amazonaws.auth.AWSCredentialsProvider;\nimport com.amazonaws.services.dynamodbv2.AmazonDynamoDB;\nimport com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient;\nimport com.amazonaws.services.dynamodbv2.model.AttributeAction;\nimport com.amazonaws.services.dynamodbv2.model.AttributeDefinition;\nimport com.amazonaws.services.dynamodbv2.model.AttributeValue;\nimport com.amazonaws.services.dynamodbv2.model.AttributeValueUpdate;\nimport com.amazonaws.services.dynamodbv2.model.DeleteItemRequest;\nimport com.amazonaws.services.dynamodbv2.model.GetItemResult;\nimport com.amazonaws.services.dynamodbv2.model.KeySchemaElement;\nimport com.amazonaws.services.dynamodbv2.model.KeyType;\nimport com.amazonaws.services.dynamodbv2.model.UpdateItemRequest;\nimport com.amazonaws.services.kinesis.aggregators.datastore.DynamoUtils;\n\n/**\n * Class used to provide configuration and setup for the Worker Inventory table\n * in Dynamo DB.\n */\n@SuppressWarnings(\"serial\")\npublic final class InventoryModel {\n    private boolean online = false;\n\n    /**\n     * Name of the Table in Dynamo DB.\n     */\n    public static final String TABLE_NAME = \"KinesisAggregatorWorkerState\";\n\n    /**\n     * Column name used to store the Kinesis Stream Name for an Aggregator.\n     */\n    public static final String AGGREGATOR = \"aggregator\";\n\n    /**\n     * Column name used to store the Shard ID for an Aggregator.\n     */\n    public static final String SHARD_ID = \"shardId\";\n\n    /**\n     * Column name used to store the last time an Aggregator updated.\n     */\n    public static final String LAST_WRITE_TIME = \"lastWriteTime\";\n\n    /**\n     * Column name used to store the lowest sequence value updated in the last\n     * flush of an Aggregator.\n     */\n    public static final String LAST_LOW_SEQ = \"lastLowSeq\";\n\n    /**\n     * Column name used to store the highest sequence value updated in the last\n     * flush of an Aggregator.\n     */\n    public static final String LAST_HIGH_SEQ = \"lastHighSeq\";\n\n    /**\n     * Column name used to store the status of the running or stopped\n     * Aggregator.\n     */\n    public static final String STATUS = \"status\";\n\n    /**\n     * Amount of read IOPS to provision for the Inventory table.\n     */\n    public static final long READ_CAPACITY = 10L;\n\n    /**\n     * Amount of write IOPS to provision for the Inventory table.\n     */\n    public static final long WRITE_CAPACITY = 10L;\n\n    /**\n     * Available states for an Aggregator to be in.\n     */\n    public static enum STATE {\n        STARTING, RUNNING, STOPPED, SERIALISATION_ERROR, UNKNOWN_ERROR;\n    }\n\n    private AmazonDynamoDB dynamoClient;\n\n    public InventoryModel(AmazonDynamoDB dynamoClient) throws Exception {\n        this.dynamoClient = dynamoClient;\n        init();\n    }\n\n    public InventoryModel(AWSCredentialsProvider credentials) throws Exception {\n        this(new AmazonDynamoDBClient(credentials));\n    }\n\n    protected void init() throws Exception {\n        List<AttributeDefinition> attributes = new ArrayList<AttributeDefinition>() {\n            {\n                add(new AttributeDefinition().withAttributeName(InventoryModel.AGGREGATOR).withAttributeType(\n                        \"S\"));\n                add(new AttributeDefinition().withAttributeName(InventoryModel.SHARD_ID).withAttributeType(\n                        \"S\"));\n            }\n        };\n\n        List<KeySchemaElement> key = new ArrayList<KeySchemaElement>() {\n            {\n                add(new KeySchemaElement().withAttributeName(InventoryModel.AGGREGATOR).withKeyType(\n                        KeyType.HASH));\n                add(new KeySchemaElement().withAttributeName(InventoryModel.SHARD_ID).withKeyType(\n                        KeyType.RANGE));\n            }\n        };\n\n        DynamoUtils.initTable(dynamoClient, InventoryModel.TABLE_NAME,\n                InventoryModel.READ_CAPACITY, InventoryModel.WRITE_CAPACITY, attributes, key, null);\n\n        online = true;\n    }\n\n    private Map<String, AttributeValue> getKey(final String streamName,\n            final String applicationName, final String namespace, final String shardId) {\n        return new HashMap<String, AttributeValue>() {\n            {\n                put(InventoryModel.AGGREGATOR, new AttributeValue().withS(String.format(\"%s.%s.%s\",\n                        streamName, applicationName, namespace)));\n                put(InventoryModel.SHARD_ID, new AttributeValue().withS(shardId));\n            }\n        };\n    }\n\n    public void removeState(final String streamName, final String applicationName,\n            final String namespace, final String shardId) throws Exception {\n        DeleteItemRequest req = new DeleteItemRequest().withTableName(TABLE_NAME).withKey(\n                getKey(streamName, applicationName, namespace, shardId));\n        dynamoClient.deleteItem(req);\n    }\n\n    /**\n     * Update the Inventory table with the state of an Aggregator.\n     * \n     * @param streamName The Kinesis Stream being aggregated.\n     * @param applicationName The application name running the aggregator.\n     * @param workerId The worker ID which encapsulates an instance of an\n     *        Aggregator.\n     * @param lastLowSeq The lowest sequence number observed in all records\n     *        which were flushed prior to this update.\n     * @param lastHighSeq The highest sequence number for all records flushed in\n     *        this update.\n     * @param lastWriteTime The write time of the data to Dynamo DB.\n     * @param status The {@link STATE} of the Aggregator.\n     * @throws Exception\n     */\n    public void update(final String streamName, final String applicationName,\n            final String namespace, final String shardId, final String lastLowSeq,\n            final String lastHighSeq, final long lastWriteTime, final STATE status)\n            throws Exception {\n        // create the last write time value\n        final String lastUpdateDateLabel = StreamAggregator.dateFormatter.format(new Date(\n                lastWriteTime));\n        // generate the item update\n        Map<String, AttributeValueUpdate> inventoryUpdate = new HashMap<String, AttributeValueUpdate>() {\n            {\n                put(InventoryModel.LAST_WRITE_TIME,\n                        new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue(\n                                new AttributeValue().withS(lastUpdateDateLabel)));\n                if (lastLowSeq != null)\n                    put(InventoryModel.LAST_LOW_SEQ,\n                            new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue(\n                                    new AttributeValue().withS(lastLowSeq)));\n                if (lastHighSeq != null)\n                    put(InventoryModel.LAST_HIGH_SEQ,\n                            new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue(\n                                    new AttributeValue().withS(lastHighSeq)));\n                if (status != null)\n                    put(InventoryModel.STATUS,\n                            new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue(\n                                    new AttributeValue().withS(status.name())));\n            }\n        };\n        DynamoUtils.updateWithRetries(\n                dynamoClient,\n                new UpdateItemRequest().withTableName(InventoryModel.TABLE_NAME).withKey(\n                        getKey(streamName, applicationName, namespace, shardId)).withAttributeUpdates(\n                        inventoryUpdate));\n    }\n\n    /**\n     * Method which returns the update information for an Aggregator process.\n     * \n     * @param streamName The Stream name which is being aggregated.\n     * @param applicationName The application which is hosting the aggregator.\n     * @param workerId The worker ID which is running an aggregator instance.\n     * @return Tuple of Last Write Time (String), Last Low Sequence, and Last\n     *         High Sequence\n     */\n    public InventoryStatus getLastUpdate(final String streamName, final String applicationName,\n            final String namespace, final String shardId) {\n        GetItemResult response = dynamoClient.getItem(InventoryModel.TABLE_NAME,\n                getKey(streamName, applicationName, namespace, shardId));\n        if (response.getItem() != null) {\n            Map<String, AttributeValue> item = response.getItem();\n            AttributeValue lastTime, lowSeq, highSeq = null;\n            lastTime = item.get(InventoryModel.LAST_WRITE_TIME);\n            lowSeq = item.get(InventoryModel.LAST_LOW_SEQ);\n            highSeq = item.get(InventoryModel.LAST_HIGH_SEQ);\n\n            return new InventoryStatus(lastTime == null ? null : lastTime.getS(),\n                    lowSeq == null ? null : lowSeq.getS(), highSeq == null ? null : highSeq.getS());\n        } else {\n            return null;\n        }\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/InventoryStatus.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators;\n\npublic class InventoryStatus {\n    private String lastTime, lowSeq, highSeq;\n\n    public InventoryStatus(String lastTime, String lowSeq, String highSeq) {\n        super();\n        this.lastTime = lastTime;\n        this.lowSeq = lowSeq;\n        this.highSeq = highSeq;\n    }\n\n    public String getLastTime() {\n        return lastTime;\n    }\n\n    public String getLowSeq() {\n        return lowSeq;\n    }\n\n    public String getHighSeq() {\n        return highSeq;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/LabelSet.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators;\n\nimport java.util.LinkedHashMap;\nimport java.util.List;\n\n/**\n * Object which encapsulates all label values to be tracked and managed within\n * the Aggregator framework. Adheres to the properties of strict ordering by\n * insert sequence, equality on the basis of values as well as hash on the basis\n * of values, and a name property synthesized from the keyset, and a String\n * value synthesized from the value set\n */\npublic class LabelSet extends LinkedHashMap<String, String> {\n    private final String setDelimiter = \".\";\n\n    private String alias = null;\n\n    public LabelSet() {\n        super();\n    }\n\n    public static LabelSet fromIntegerKeys(List<Integer> keys) {\n        LabelSet labels = new LabelSet();\n        for (Integer i : keys) {\n            labels.put(\"\" + i, null);\n        }\n        return labels;\n    }\n\n    public static LabelSet fromStringKeys(List<String> keys) {\n        LabelSet labels = new LabelSet();\n        for (String s : keys) {\n            labels.put(s, null);\n        }\n        return labels;\n    }\n\n    @Override\n    public String put(String key, String value) {\n        // wrap general map put with internal pre-processing of names\n        return super.put(StreamAggregatorUtils.methodToColumn(key), value);\n    }\n\n    public String valuesAsString() {\n        StringBuffer sb = new StringBuffer();\n        for (String s : this.values()) {\n            sb.append(s + setDelimiter);\n        }\n\n        return sb.substring(0, sb.length() - 1);\n    }\n\n    public String getName() {\n        if (this.alias == null) {\n            StringBuffer sb = new StringBuffer();\n            for (String s : this.keySet()) {\n                sb.append(StreamAggregatorUtils.methodToColumn(s) + setDelimiter);\n            }\n\n            return sb.substring(0, sb.length() - 1);\n        } else {\n            return this.alias;\n        }\n    }\n\n    public LabelSet withAlias(String alias) {\n        this.alias = alias;\n\n        return this;\n    }\n\n    @Override\n    public boolean equals(Object o) {\n        if (o == null)\n            return false;\n\n        if (!(o instanceof LabelSet))\n            return false;\n\n        LabelSet other = (LabelSet) o;\n        boolean matched = false;\n\n        // match on keys\n        for (String s : this.keySet()) {\n            matched = false;\n            for (String k : other.keySet()) {\n                if (k.equals(s)) {\n                    matched = true;\n                    break;\n                }\n            }\n            if (!matched)\n                return false;\n        }\n\n        // must match on values\n        for (String t : this.values()) {\n            matched = false;\n            for (String v : other.values()) {\n                if (t.equals(v)) {\n                    matched = true;\n                    break;\n                }\n            }\n            if (!matched)\n                return false;\n        }\n\n        return true;\n    }\n\n    @Override\n    public int hashCode() {\n        int res = 17;\n        for (String s : this.keySet()) {\n            res = 31 * res + (s == null ? 0 : s.hashCode());\n        }\n\n        for (String t : this.values()) {\n            res = 31 * res + (t == null ? 0 : t.hashCode());\n        }\n        return res;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/StreamAggregator.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators;\n\nimport java.math.BigInteger;\nimport java.text.SimpleDateFormat;\nimport java.util.ArrayList;\nimport java.util.Date;\nimport java.util.HashMap;\nimport java.util.List;\nimport java.util.Map;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.ClientConfiguration;\nimport com.amazonaws.regions.Region;\nimport com.amazonaws.regions.Regions;\nimport com.amazonaws.services.dynamodbv2.AmazonDynamoDB;\nimport com.amazonaws.services.dynamodbv2.AmazonDynamoDBAsyncClient;\nimport com.amazonaws.services.dynamodbv2.model.AttributeValue;\nimport com.amazonaws.services.dynamodbv2.model.ComparisonOperator;\nimport com.amazonaws.services.dynamodbv2.model.Condition;\nimport com.amazonaws.services.kinesis.AmazonKinesisClient;\nimport com.amazonaws.services.kinesis.aggregators.cache.AggregateCache;\nimport com.amazonaws.services.kinesis.aggregators.datastore.DynamoDataStore;\nimport com.amazonaws.services.kinesis.aggregators.datastore.DynamoQueryEngine.QueryKeyScope;\nimport com.amazonaws.services.kinesis.aggregators.datastore.IDataStore;\nimport com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException;\nimport com.amazonaws.services.kinesis.aggregators.exception.SerializationException;\nimport com.amazonaws.services.kinesis.aggregators.idempotency.DefaultIdempotencyCheck;\nimport com.amazonaws.services.kinesis.aggregators.idempotency.IIdempotencyCheck;\nimport com.amazonaws.services.kinesis.aggregators.metrics.CloudWatchMetricsEmitter;\nimport com.amazonaws.services.kinesis.aggregators.metrics.IMetricsEmitter;\nimport com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration;\nimport com.amazonaws.services.kinesis.io.IDataExtractor;\nimport com.amazonaws.services.kinesis.model.Record;\n\n/**\n * StreamAggregator is the main implementation of the Kinesis Aggregators\n * framework. It provides the ability to create dynamic aggregations in Dynamo\n * DB for data being streamed through Kinesis. Objects are aggregated on the\n * basis of the unique values contained in the Aggregate Label by Date, storing\n * event counts. Additionally, by configuring a set of summary values on the\n * StreamAggregator with AggregatorType set to SUM, an additional data element\n * is aggregated tracking the total value observed for each element of the\n * stream.Data in DynamoDB is aggregated on the basis of the configured\n * TimeHorizon, from granularity of SECOND to FOREVER.\n * \n * @author meyersi\n */\npublic class StreamAggregator implements IStreamAggregator {\n\tpublic static final String AWSApplication = \"AmazonKinesisAggregators\";\n\n\tpublic static final String version = \".9.2.7.3\";\n\n\t/**\n\t * The default column name for the aggregated value, if none is provided.\n\t */\n\tpublic static final String DEFAULT_AGGREGATE_COLUMN = \"aggregatedValue\";\n\n\t/**\n\t * The default attribute name for the date value of an aggregate, if none is\n\t * provided.\n\t */\n\tpublic static final String DEFAULT_DATE_VALUE = \"dateValue\";\n\n\t/**\n\t * The default attribute name for the count of events observed for an\n\t * aggregate value and date, if none is provided.\n\t */\n\tpublic static final String EVENT_COUNT = \"eventCount\";\n\n\t/**\n\t * The attribute name for the time horizon marker\n\t */\n\tpublic static final String TIME_HORIZON_ATTR = \"timeHorizonType\";\n\n\t/**\n\t * The attribute name used for the last write sequence value in the table.\n\t */\n\tpublic static final String LAST_WRITE_SEQ = \"lastWriteSeq\";\n\n\t/**\n\t * The attribute name used for the timestamp of the update of the aggregate.\n\t */\n\tpublic static final String LAST_WRITE_TIME = \"lastWriteTime\";\n\n\t/**\n\t * The attribute used to refer to the partition key\n\t */\n\tpublic static final String REF_PARTITION_KEY = \"__partition_key\";\n\n\t/**\n\t * The attribute used to refer to the event sequence number\n\t */\n\tpublic static final String REF_SEQUENCE = \"__sequence\";\n\n\tpublic static final SimpleDateFormat dateFormatter = new SimpleDateFormat(\n\t\t\t\"yyyy-MM-dd HH:mm:ss\");\n\n\tprotected String namespace;\n\n\tprivate KinesisClientLibConfiguration config;\n\n\tprivate String environment;\n\n\tprivate AmazonDynamoDB dynamoClient;\n\n\tprivate AmazonKinesisClient kinesisClient;\n\n\tprivate InventoryModel inventory;\n\n\tprotected String tableName;\n\n\tprotected boolean withTimeHierarchy = false;\n\n\tprotected List<TimeHorizon> timeHorizons = new ArrayList<>();\n\n\tprotected AggregatorType aggregatorType = AggregatorType.COUNT;\n\n\tprotected long readCapacity;\n\n\tprotected long writeCapacity;\n\n\tprotected final String streamName;\n\n\tprotected final String applicationName;\n\n\tprotected String shardId = null;\n\n\tprivate boolean isFirstShardWorker = false;\n\n\tprivate final Log LOG = LogFactory.getLog(StreamAggregator.class);\n\n\tprivate Region region = null;\n\n\tprotected AggregateCache cache;\n\n\tprotected boolean online = false;\n\n\tprotected String lowSeq;\n\n\tprotected BigInteger highSeq = null;\n\n\tprotected long start;\n\n\tprivate IDataExtractor dataExtractor;\n\n\tprivate IDataStore dataStore;\n\n\tprivate IIdempotencyCheck idempotencyCheck = new DefaultIdempotencyCheck();\n\n\tprivate IMetricsEmitter metricsEmitter;\n\n\tprivate boolean raiseExceptionOnDataExtractionErrors = true;\n\n\tprivate int ignoredRecordsBelowHWM = 0;\n\n\tprivate boolean publishMetrics = false;\n\n\t/**\n\t * Copy Constructor\n\t * \n\t * @param template\n\t */\n\tpublic StreamAggregator(StreamAggregator template) throws Exception {\n\t\tthis.streamName = template.streamName;\n\t\tthis.applicationName = template.applicationName;\n\t\tthis.namespace = template.namespace;\n\t\tthis.config = template.config;\n\t\tthis.dataExtractor = template.dataExtractor.copy();\n\t\tthis.withDataStore(template.getDataStore());\n\t\tthis.withAggregatorType(template.aggregatorType);\n\t\tthis.withRaiseExceptionOnDataExtractionErrors(template.raiseExceptionOnDataExtractionErrors);\n\t\tthis.withStorageCapacity(template.readCapacity, template.writeCapacity);\n\t\tthis.withTableName(template.tableName);\n\t\tthis.withTimeHorizon(template.timeHorizons);\n\t\tthis.withIdempotencyCheck(template.idempotencyCheck);\n\t\tif (template.publishMetrics) {\n\t\t\tthis.publishMetrics = true;\n\t\t\tthis.metricsEmitter = template.metricsEmitter;\n\t\t}\n\t}\n\n\tpublic StreamAggregator(String streamName, String applicationName,\n\t\t\tString namespace, KinesisClientLibConfiguration config,\n\t\t\tIDataExtractor dataExtractor) {\n\t\tthis.streamName = streamName;\n\t\tthis.applicationName = applicationName;\n\t\tthis.namespace = namespace;\n\t\tthis.config = config;\n\t\tthis.dataExtractor = dataExtractor;\n\t}\n\n\tpublic void checkpoint() throws Exception {\n\t\tcache.flush();\n\t\tlowSeq = null;\n\n\t\t// update the worker inventory showing progress to the last sequence\n\t\t// value\n\t\tinventory.update(this.streamName, this.applicationName, this.namespace,\n\t\t\t\tthis.shardId, this.lowSeq, this.highSeq.toString(),\n\t\t\t\tSystem.currentTimeMillis(), InventoryModel.STATE.RUNNING);\n\n\t\t// warn and reset if there were any ignored records\n\t\tif (ignoredRecordsBelowHWM > 0) {\n\t\t\tlogWarn(String\n\t\t\t\t\t.format(\"Processed %s records which were ignored due to being below the current processing HWM\",\n\t\t\t\t\t\t\tignoredRecordsBelowHWM));\n\t\t\tignoredRecordsBelowHWM = 0;\n\t\t}\n\n\t\tLOG.debug(\"Aggregator Checkpoint for Shard \" + this.shardId\n\t\t\t\t+ \" Complete\");\n\t}\n\n\t/*\n\t * builder methods\n\t */\n\tpublic StreamAggregator withStorageCapacity(Long readCapacity,\n\t\t\tLong writeCapacity) {\n\t\tif (readCapacity != null)\n\t\t\tthis.readCapacity = readCapacity;\n\t\tif (writeCapacity != null)\n\t\t\tthis.writeCapacity = writeCapacity;\n\n\t\treturn this;\n\t}\n\n\tprivate void logInfo(String message) {\n\t\tLOG.info(\"[\" + this.shardId + \"] \" + message);\n\t}\n\n\tprivate void logWarn(String message) {\n\t\tLOG.warn(\"[\" + this.shardId + \"] \" + message);\n\t}\n\n\tprivate void logWarn(String message, Exception e) {\n\t\tLOG.warn(\"[\" + this.shardId + \"] \" + message);\n\t\tLOG.error(e);\n\t}\n\n\tpublic void initialize(String shardId) throws Exception {\n\t\t// Set System properties to allow entity expansion of unlimited items in\n\t\t// response documents from AWS API\n\t\t//\n\t\t// see https://blogs.oracle.com/joew/entry/jdk_7u45_aws_issue_123 for\n\t\t// more information\n\t\tSystem.setProperty(\"entityExpansionLimit\", \"0\");\n\t\tSystem.setProperty(\"jdk.xml.entityExpansionLimit\", \"0\");\n\n\t\tthis.shardId = shardId;\n\n\t\t// establish we are running on the lowest shard on the basis of hash\n\t\t// range\n\t\tAmazonKinesisClient kinesisClient = new AmazonKinesisClient(\n\t\t\t\tthis.config.getKinesisCredentialsProvider());\n\t\tif (this.config.getRegionName() != null) {\n\t\t\tregion = Region.getRegion(Regions.fromName(this.config\n\t\t\t\t\t.getRegionName()));\n\t\t\tkinesisClient.setRegion(region);\n\t\t}\n\n\t\ttry {\n\t\t\tif (this.shardId.equals(StreamAggregatorUtils.getFirstShardName(\n\t\t\t\t\tkinesisClient, this.config.getStreamName()))) {\n\t\t\t\tthis.isFirstShardWorker = true;\n\t\t\t\tlogInfo(\"Aggregator taking Primary Thread Responsibility\");\n\t\t\t}\n\t\t} catch (Exception e) {\n\t\t\tlogWarn(\"Unable to establish if Worker Thread is Primary\");\n\t\t}\n\n\t\tvalidateConfig();\n\n\t\t// set the default aggregator type\n\t\tif (this.aggregatorType == null) {\n\t\t\tthis.aggregatorType = AggregatorType.COUNT;\n\t\t}\n\n\t\tif (this.dataExtractor == null)\n\t\t\tthrow new InvalidConfigurationException(\n\t\t\t\t\t\"Unable to create Aggregator Instance without a configured IDataStore\");\n\n\t\t// set the aggregator type on the data extractor\n\t\tthis.dataExtractor.setAggregatorType(this.aggregatorType);\n\t\tthis.dataExtractor.validate();\n\n\t\t// create connections to dynamo and kinesis\n\t\tClientConfiguration clientConfig = new ClientConfiguration()\n\t\t\t\t.withSocketTimeout(60000);\n\t\tthis.dynamoClient = new AmazonDynamoDBAsyncClient(\n\t\t\t\tthis.config.getDynamoDBCredentialsProvider(), clientConfig);\n\t\tif (region != null)\n\t\t\tthis.dynamoClient.setRegion(region);\n\n\t\tthis.kinesisClient = new AmazonKinesisClient(\n\t\t\t\tthis.config.getKinesisCredentialsProvider());\n\t\tif (region != null)\n\t\t\tthis.kinesisClient.setRegion(region);\n\n\t\tinventory = new InventoryModel(this.dynamoClient);\n\n\t\t// get the latest sequence number checkpointed for this named aggregator\n\t\t// on this shard\n\t\tInventoryStatus lastUpdate = inventory.getLastUpdate(this.streamName,\n\t\t\t\tthis.applicationName, this.namespace, this.shardId);\n\t\tif (lastUpdate != null && lastUpdate.getHighSeq() != null) {\n\t\t\t// set the current high sequence to the last high sequence\n\t\t\tthis.highSeq = new BigInteger(lastUpdate.getHighSeq());\n\t\t}\n\n\t\t// log that we are now starting up\n\t\tinventory.update(this.streamName, this.applicationName, this.namespace,\n\t\t\t\tthis.shardId, null, null, System.currentTimeMillis(),\n\t\t\t\tInventoryModel.STATE.STARTING);\n\n\t\t// set the table name we will use for aggregated values\n\t\tif (this.tableName == null) {\n\t\t\tthis.tableName = StreamAggregatorUtils.getTableName(\n\t\t\t\t\tconfig.getApplicationName(), this.getNamespace());\n\t\t}\n\n\t\tif (this.environment != null && !this.environment.equals(\"\"))\n\t\t\tthis.tableName = String.format(\"%s.%s\", this.environment,\n\t\t\t\t\tthis.tableName);\n\n\t\t// resolve the basic data being aggregated\n\t\tString labelColumn = StreamAggregatorUtils.methodToColumn(dataExtractor\n\t\t\t\t.getAggregateLabelName());\n\t\tString dateColumn = dataExtractor.getDateValueName() == null ? DEFAULT_DATE_VALUE\n\t\t\t\t: dataExtractor.getDateValueName();\n\n\t\t// configure the default dynamo data store\n\t\tif (this.dataStore == null) {\n\t\t\tthis.dataStore = new DynamoDataStore(this.dynamoClient,\n\t\t\t\t\tthis.kinesisClient, this.aggregatorType, this.streamName,\n\t\t\t\t\tthis.tableName, labelColumn, dateColumn)\n\t\t\t\t\t.withStorageCapacity(this.readCapacity, this.writeCapacity);\n\t\t\tthis.dataStore.setRegion(region);\n\t\t}\n\t\tthis.dataStore.initialise();\n\n\t\t// configure the cache so it can do its work\n\t\tcache = new AggregateCache(this.shardId)\n\t\t\t\t.withCredentials(this.config.getKinesisCredentialsProvider())\n\t\t\t\t.withAggregateType(this.aggregatorType)\n\t\t\t\t.withTableName(this.tableName).withLabelColumn(labelColumn)\n\t\t\t\t.withDateColumn(dateColumn).withDataStore(this.dataStore);\n\n\t\t// create a cloudwatch client for the cache to publish against if needed\n\t\tif (this.publishMetrics && this.metricsEmitter == null) {\n\t\t\tthis.metricsEmitter = new CloudWatchMetricsEmitter(this.tableName,\n\t\t\t\t\tthis.config.getCloudWatchCredentialsProvider());\n\t\t}\n\n\t\tif (this.metricsEmitter != null) {\n\t\t\tif (this.config.getRegionName() != null)\n\t\t\t\tthis.metricsEmitter.setRegion(region);\n\t\t}\n\t\t// add the metrics publisher to the cache if we are bound to the lowest\n\t\t// shard\n\t\tif (this.metricsEmitter != null) {\n\t\t\tcache.withMetricsEmitter(this.metricsEmitter);\n\t\t}\n\t\tcache.initialise();\n\n\t\t// set the user agent\n\t\tStringBuilder userAgent = new StringBuilder(\n\t\t\t\tClientConfiguration.DEFAULT_USER_AGENT);\n\t\tuserAgent.append(\" \");\n\t\tuserAgent.append(this.AWSApplication);\n\t\tuserAgent.append(\"/\");\n\t\tuserAgent.append(this.version);\n\t\tthis.config.getKinesisClientConfiguration().setUserAgent(\n\t\t\t\tuserAgent.toString());\n\n\t\t// log startup state\n\t\tStringBuffer sb = new StringBuffer();\n\t\tfor (TimeHorizon t : timeHorizons) {\n\t\t\tsb.append(String.format(\"%s,\", t.name()));\n\t\t}\n\t\tsb.deleteCharAt(sb.length() - 1);\n\n\t\tlogInfo(String\n\t\t\t\t.format(\"Amazon Kinesis Stream Aggregator Online\\nStream: %s\\nApplication: %s\\nNamespace: %s\\nWorker: %s\\nGranularity: %s\\nContent Extracted With: %s\",\n\t\t\t\t\t\tstreamName, applicationName, this.namespace,\n\t\t\t\t\t\tthis.config.getWorkerIdentifier(), sb.toString(),\n\t\t\t\t\t\tdataExtractor.getClass().getName()));\n\t\tif (this.highSeq != null)\n\t\t\tlogInfo(String.format(\"Processing Data from Seq: %s\", this.highSeq));\n\t\tonline = true;\n\t}\n\n\tprivate void validateConfig() throws Exception {\n\t\t// this would only be null if the containing worker IRecordProcessor has\n\t\t// not called initialise()\n\t\tif (this.shardId == null) {\n\t\t\tthrow new Exception(\n\t\t\t\t\t\"Aggregator Not Online - Call Initialise to establish System State on Shard\");\n\t\t}\n\n\t\t// default to Hourly granularity if the customer has not configured it\n\t\tif (this.timeHorizons == null) {\n\t\t\twithTimeHorizon(TimeHorizon.HOUR);\n\t\t}\n\t}\n\n\t/**\n\t * Add a single\n\t * {@link com.amazonaws.services.kinesis.aggregators.TimeHorizon} to the\n\t * configuration of the Aggregator\n\t * \n\t * @param horizon\n\t *            TimeHorizon value to be used for aggregated data\n\t * @return\n\t */\n\tpublic StreamAggregator withTimeHorizon(TimeHorizon horizon) {\n\t\tif (this.timeHorizons == null)\n\t\t\tthis.timeHorizons = new ArrayList<>();\n\n\t\tthis.timeHorizons.add(horizon);\n\n\t\treturn this;\n\t}\n\n\t/**\n\t * Add a set of\n\t * {@link com.amazonaws.services.kinesis.aggregators.TimeHorizon} values to\n\t * the configuration of the Aggregator\n\t * \n\t * @param horizon\n\t *            TimeHorizon value to be used for aggregated data\n\t * @return\n\t */\n\tpublic StreamAggregator withTimeHorizon(List<TimeHorizon> horizons) {\n\t\tif (this.timeHorizons == null) {\n\t\t\tthis.timeHorizons = horizons;\n\t\t} else {\n\t\t\tthis.timeHorizons.addAll(horizons);\n\t\t}\n\n\t\treturn this;\n\t}\n\n\t/**\n\t * Add a set of\n\t * {@link com.amazonaws.services.kinesis.aggregators.TimeHorizon} values to\n\t * the configuration of the Aggregator\n\t * \n\t * @param horizon\n\t *            TimeHorizon value to be used for aggregated data\n\t * @return\n\t */\n\tpublic StreamAggregator withTimeHorizon(TimeHorizon... horizons) {\n\t\tif (this.timeHorizons == null)\n\t\t\tthis.timeHorizons = new ArrayList<>();\n\n\t\tfor (TimeHorizon t : horizons) {\n\t\t\tthis.timeHorizons.add(t);\n\t\t}\n\n\t\treturn this;\n\t}\n\n\t/**\n\t * Set the name of the data store in Dynamo DB for the Aggregated Data\n\t * \n\t * @param tableName\n\t *            The table name to use for data storage\n\t * @return\n\t */\n\tpublic StreamAggregator withTableName(String tableName) {\n\t\tthis.tableName = tableName;\n\t\treturn this;\n\t}\n\n\t/**\n\t * Select an explicit\n\t * {@link com.amazonaws.servies.kinesis.aggregators.AggregatorType} for the\n\t * Aggregator. Default is COUNT\n\t * \n\t * @param t\n\t *            The Aggregator Type to use\n\t * @return\n\t */\n\tpublic StreamAggregator withAggregatorType(AggregatorType t) {\n\t\tif (t != null) {\n\t\t\tthis.aggregatorType = t;\n\t\t}\n\t\treturn this;\n\t}\n\n\t/**\n\t * Override the default behaviour of an Aggregator to fail when the data\n\t * stream cannot be deserialised. When setting this value to 'true', then\n\t * the Aggregator stream will be able to deal with bad data that cannot be\n\t * aggregated, and will simply continue working\n\t * \n\t * @param bool\n\t *            Boolean indicating whether to fail when bad data is received\n\t *            on the stream and cannot be deserialised\n\t * @return\n\t */\n\tpublic StreamAggregator withRaiseExceptionOnDataExtractionErrors(\n\t\t\tboolean bool) {\n\t\tthis.raiseExceptionOnDataExtractionErrors = bool;\n\t\treturn this;\n\t}\n\n\t/**\n\t * Should we publish CloudWatch metrics for all captured data?\n\t * \n\t * @param bool\n\t * @return\n\t */\n\tpublic StreamAggregator withCloudWatchMetrics() {\n\t\tthis.publishMetrics = true;\n\t\treturn this;\n\t}\n\n\t/**\n\t * Allow configuring a non-Default data store\n\t * \n\t * @param dataStore\n\t * @return\n\t */\n\tpublic StreamAggregator withDataStore(IDataStore dataStore) {\n\t\tif (dataStore != null) {\n\t\t\tthis.dataStore = dataStore;\n\t\t}\n\t\treturn this;\n\t}\n\n\t/**\n\t * Allow configuring a non-Default metrics emitter\n\t * \n\t * @param metricsEmitter\n\t * @return\n\t */\n\tpublic StreamAggregator withMetricsEmitter(IMetricsEmitter metricsEmitter) {\n\t\tif (metricsEmitter != null) {\n\t\t\tthis.metricsEmitter = metricsEmitter;\n\t\t}\n\t\treturn this;\n\t}\n\n\t/**\n\t * Allow configuring a non-Default idempotency check\n\t * \n\t * @param idempotencyCheck\n\t * @return\n\t */\n\tpublic StreamAggregator withIdempotencyCheck(\n\t\t\tIIdempotencyCheck idempotencyCheck) {\n\t\tif (idempotencyCheck != null) {\n\t\t\tthis.idempotencyCheck = idempotencyCheck;\n\t\t}\n\t\treturn this;\n\t}\n\n\tpublic StreamAggregator withEnvironment(EnvironmentType environment) {\n\t\tthis.environment = environment.name();\n\t\treturn this;\n\t}\n\n\tpublic StreamAggregator withEnvironment(String environment) {\n\t\tthis.environment = environment;\n\t\treturn this;\n\t}\n\n\t/* Simple property accessors */\n\tpublic String getNamespace() {\n\t\treturn this.namespace;\n\t}\n\n\tpublic IDataExtractor getDataExtractor() {\n\t\treturn this.dataExtractor;\n\t}\n\n\tpublic IDataStore getDataStore() {\n\t\treturn this.dataStore;\n\t}\n\n\tpublic String getTableName() {\n\t\treturn this.tableName;\n\t}\n\n\tpublic String getLabelAttribute() {\n\t\treturn this.dataExtractor.getAggregateLabelName();\n\t}\n\n\tpublic String getDateAttribute() {\n\t\treturn this.dataExtractor.getDateValueName();\n\t}\n\n\tpublic AggregatorType getAggregatorType() {\n\t\treturn this.aggregatorType;\n\t}\n\n\tpublic long getReadCapacity() {\n\t\treturn this.readCapacity;\n\t}\n\n\tpublic long getWriteCapacity() {\n\t\treturn this.writeCapacity;\n\t}\n\n\tpublic List<TimeHorizon> getTimeHorizon() {\n\t\treturn this.timeHorizons;\n\t}\n\n\t/**\n\t * Shut down an aggregator and mark its state as Stopped in the Inventory\n\t * Table\n\t * \n\t * @param flushState\n\t *            Should the aggregator clear it's pending updates prior to\n\t *            shutting down\n\t * @param withState\n\t *            Final status for the aggregator\n\t * @throws Exception\n\t */\n\tpublic void shutdown() throws Exception {\n\t\tshutdown(true);\n\t}\n\n\tpublic void shutdown(boolean flushState) throws Exception {\n\t\tshutdown(flushState, null);\n\t}\n\n\tpublic void shutdown(boolean flushState, InventoryModel.STATE withState)\n\t\t\tthrows Exception {\n\t\tif (flushState)\n\t\t\tcheckpoint();\n\n\t\tif (inventory != null)\n\t\t\tinventory.update(this.streamName, this.applicationName,\n\t\t\t\t\tthis.namespace, this.shardId, null, null, System\n\t\t\t\t\t\t\t.currentTimeMillis(),\n\t\t\t\t\twithState == null ? InventoryModel.STATE.STOPPED\n\t\t\t\t\t\t\t: withState);\n\n\t}\n\n\t/**\n\t * {@inheritDoc}\n\t */\n\tpublic void aggregate(List<Record> records) throws Exception {\n\t\tList<InputEvent> events = new ArrayList<>();\n\n\t\tfor (Record r : records) {\n\t\t\tevents.add(new InputEvent(r));\n\t\t}\n\n\t\taggregateEvents(events);\n\t}\n\n\t/**\n\t * {@inheritDoc}\n\t */\n\tpublic void aggregateEvents(List<InputEvent> events) throws Exception {\n\t\tstart = System.currentTimeMillis();\n\t\tint aggregatedEventCount = 0;\n\t\tint aggregatedElementCount = 0;\n\n\t\tif (!online) {\n\t\t\tthrow new Exception(\"Aggregator Not Initialised\");\n\t\t}\n\n\t\tBigInteger thisSequence;\n\t\tList<AggregateData> extractedItems = null;\n\t\tDate eventDate = null;\n\n\t\ttry {\n\t\t\tfor (InputEvent event : events) {\n\t\t\t\t// reset extracted items\n\t\t\t\textractedItems = null;\n\n\t\t\t\tif (event.getSequenceNumber() != null) {\n\t\t\t\t\tthisSequence = new BigInteger(event.getSequenceNumber());\n\t\t\t\t\t// ignore any records which are going backward with regard\n\t\t\t\t\t// to\n\t\t\t\t\t// the current hwm\n\t\t\t\t\tif (highSeq != null\n\t\t\t\t\t\t\t&& highSeq.compareTo(thisSequence) != -1) {\n\t\t\t\t\t\tignoredRecordsBelowHWM++;\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\t// set the low sequence if this is the first record received\n\t\t\t\t// after a flush\n\t\t\t\tif (lowSeq == null)\n\t\t\t\t\tlowSeq = event.getSequenceNumber();\n\n\t\t\t\t// high sequence is always the latest value\n\t\t\t\thighSeq = new BigInteger(event.getSequenceNumber());\n\n\t\t\t\t// extract the data from the input event\n\t\t\t\ttry {\n\t\t\t\t\textractedItems = dataExtractor.getData(event);\n\t\t\t\t} catch (SerializationException se) {\n\t\t\t\t\t// customer may have elected to suppress serialisation\n\t\t\t\t\t// errors if the stream is expected have heterogenous data\n\t\t\t\t\t// on it\n\t\t\t\t\tif (this.raiseExceptionOnDataExtractionErrors) {\n\t\t\t\t\t\tthrow se;\n\t\t\t\t\t} else {\n\t\t\t\t\t\tlogWarn(String.format(\n\t\t\t\t\t\t\t\t\"Serialisation Exception Sequence %s Partition Key %s\",\n\t\t\t\t\t\t\t\tevent.getSequenceNumber(),\n\t\t\t\t\t\t\t\tevent.getPartitionKey()), se);\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\t// data extractor may have returned multiple data elements, or\n\t\t\t\t// be empty if there were serialisation problems which are\n\t\t\t\t// suppressed\n\t\t\t\tif (extractedItems != null) {\n\t\t\t\t\taggregatedEventCount++;\n\n\t\t\t\t\tfor (AggregateData data : extractedItems) {\n\t\t\t\t\t\t// run the idempotency check\n\t\t\t\t\t\tif (!this.idempotencyCheck.doProcess(\n\t\t\t\t\t\t\t\tevent.getPartitionKey(),\n\t\t\t\t\t\t\t\tevent.getSequenceNumber(), data,\n\t\t\t\t\t\t\t\tevent.getData())) {\n\t\t\t\t\t\t\tlogInfo(String\n\t\t\t\t\t\t\t\t\t.format(\"Ignoring Event %s as it failed Idempotency Check\",\n\t\t\t\t\t\t\t\t\t\t\tevent.getPartitionKey()));\n\t\t\t\t\t\t\tcontinue;\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\taggregatedElementCount++;\n\n\t\t\t\t\t\t// if the data extractor didn't have a date value to\n\t\t\t\t\t\t// extract, then use the current time\n\t\t\t\t\t\teventDate = data.getDate();\n\t\t\t\t\t\tif (eventDate == null) {\n\t\t\t\t\t\t\teventDate = new Date(System.currentTimeMillis());\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\t// generate the local updates, one per time horizon that\n\t\t\t\t\t\t// is requested\n\t\t\t\t\t\tfor (TimeHorizon h : timeHorizons) {\n\t\t\t\t\t\t\t// atomically update the aggregate table with event\n\t\t\t\t\t\t\t// count or count + summaries\n\t\t\t\t\t\t\tcache.update(\n\t\t\t\t\t\t\t\t\taggregatorType,\n\t\t\t\t\t\t\t\t\tdata.getLabels(),\n\t\t\t\t\t\t\t\t\t(timeHorizons.size() > 1 ? h\n\t\t\t\t\t\t\t\t\t\t\t.getItemWithMultiValueFormat(eventDate)\n\t\t\t\t\t\t\t\t\t\t\t: h.getValue(eventDate)), h, event\n\t\t\t\t\t\t\t\t\t\t\t.getSequenceNumber(), 1, data\n\t\t\t\t\t\t\t\t\t\t\t.getSummaries(), dataExtractor\n\t\t\t\t\t\t\t\t\t\t\t.getSummaryConfig());\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tlogInfo(String\n\t\t\t\t\t.format(\"Aggregation Complete - %s Records and %s Elements in %s ms\",\n\t\t\t\t\t\t\taggregatedEventCount, aggregatedElementCount,\n\t\t\t\t\t\t\t(System.currentTimeMillis() - start)));\n\t\t} catch (SerializationException se) {\n\t\t\tshutdown(true, InventoryModel.STATE.SERIALISATION_ERROR);\n\t\t\tLOG.error(se);\n\t\t\tthrow se;\n\t\t} catch (Exception e) {\n\t\t\tshutdown(true, InventoryModel.STATE.UNKNOWN_ERROR);\n\t\t\tLOG.error(e);\n\t\t\tthrow e;\n\t\t}\n\t}\n\n\t/**\n\t * Return the stored value for a label and date value at the configured time\n\t * granularity\n\t * \n\t * @param label\n\t *            The Aggregated Label Value to get data for\n\t * @param dateValue\n\t *            The Date Value to obtain data from\n\t * @param h\n\t *            The Time Horizon to query\n\t * @return\n\t */\n\tpublic List<Map<String, AttributeValue>> queryValue(String label,\n\t\t\tDate dateValue, ComparisonOperator comp) throws Exception {\n\t\tif (!(this.dataStore instanceof DynamoDataStore)) {\n\t\t\tthrow new Exception(\n\t\t\t\t\t\"Unable to Query by Date unless Data Store is Dynamo DB\");\n\t\t}\n\n\t\tif (comp != null && comp.equals(ComparisonOperator.BETWEEN)) {\n\t\t\tthrow new InvalidConfigurationException(\n\t\t\t\t\t\"Between Operator Not Supported\");\n\t\t}\n\n\t\treturn ((DynamoDataStore) this.dataStore).queryEngine().queryByKey(\n\t\t\t\tlabel, dateValue, comp);\n\t}\n\n\t/**\n\t * Query all data in the data store for a given range of date values and\n\t * time horizon\n\t * \n\t * @param dateValue\n\t *            The date to search relative to\n\t * @param h\n\t *            The Time Horizon to limit search to\n\t * @param comp\n\t *            The Comparison Operator to be applied to the dateValue, such\n\t *            as 'equal' EQ or 'greater than' GT\n\t * @return A list of data stored in Dynamo DB for the time range\n\t * @throws Exception\n\t */\n\tpublic List<Map<String, AttributeValue>> queryByDate(Date dateValue,\n\t\t\tTimeHorizon h, ComparisonOperator comp, int threads)\n\t\t\tthrows Exception {\n\t\tif (!(this.dataStore instanceof DynamoDataStore)) {\n\t\t\tthrow new Exception(\n\t\t\t\t\t\"Unable to Query by Date unless Data Store is Dynamo DB\");\n\t\t}\n\n\t\tif (comp.equals(ComparisonOperator.BETWEEN)) {\n\t\t\tthrow new InvalidConfigurationException(\n\t\t\t\t\t\"Between Operator Not Supported\");\n\t\t}\n\n\t\t// resolve the query date based on if we are managing multiple time\n\t\t// values or a single\n\t\tString queryDate = null;\n\t\tif (this.timeHorizons.size() > 1) {\n\t\t\tqueryDate = h.getItemWithMultiValueFormat(dateValue);\n\t\t} else {\n\t\t\tqueryDate = h.getValue(dateValue);\n\t\t}\n\n\t\t// setup the query condition on date\n\t\tMap<String, Condition> conditions = new HashMap<>();\n\t\tCondition dateCondition = new Condition().withComparisonOperator(comp)\n\t\t\t\t.withAttributeValueList(new AttributeValue().withS(queryDate));\n\t\tconditions.put(this.dataExtractor.getDateValueName(), dateCondition);\n\n\t\tList<Map<String, AttributeValue>> items = ((DynamoDataStore) this.dataStore)\n\t\t\t\t.queryEngine().parallelQueryDate(\n\t\t\t\t\t\tthis.dataExtractor.getDateValueName(), conditions,\n\t\t\t\t\t\tthreads);\n\n\t\treturn items;\n\t}\n\n\tpublic List<TableKeyStructure> parallelQueryKeys(QueryKeyScope scope,\n\t\t\tint threads) throws Exception {\n\t\tif (!(this.dataStore instanceof DynamoDataStore)) {\n\t\t\tthrow new Exception(\n\t\t\t\t\t\"Unable to Query Keys unless Data Store is Dynamo DB\");\n\t\t}\n\n\t\tlogInfo(String\n\t\t\t\t.format(\"Executing Unique Key Scan on %s with Scope %s using %s Threads\",\n\t\t\t\t\t\tthis.tableName, scope.toString(), threads));\n\t\treturn ((DynamoDataStore) this.dataStore).queryEngine()\n\t\t\t\t.parallelQueryKeys(scope, threads);\n\t}\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/StreamAggregatorUtils.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators;\n\nimport java.io.File;\nimport java.math.BigInteger;\nimport java.util.ArrayList;\nimport java.util.Collection;\nimport java.util.Collections;\nimport java.util.Comparator;\nimport java.util.HashMap;\nimport java.util.LinkedHashMap;\nimport java.util.List;\nimport java.util.Map;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.services.dynamodbv2.AmazonDynamoDB;\nimport com.amazonaws.services.dynamodbv2.model.AttributeValue;\nimport com.amazonaws.services.dynamodbv2.model.GetItemRequest;\nimport com.amazonaws.services.kinesis.AmazonKinesisClient;\nimport com.amazonaws.services.kinesis.aggregators.cache.UpdateKey;\nimport com.amazonaws.services.kinesis.aggregators.datastore.DynamoUtils;\nimport com.amazonaws.services.kinesis.model.LimitExceededException;\nimport com.amazonaws.services.kinesis.model.ResourceNotFoundException;\nimport com.amazonaws.services.kinesis.model.Shard;\nimport com.amazonaws.services.kinesis.model.StreamDescription;\nimport com.fasterxml.jackson.databind.DeserializationFeature;\nimport com.fasterxml.jackson.databind.JsonNode;\nimport com.fasterxml.jackson.databind.ObjectMapper;\n\n/**\n * Utility methods used across the Amazon Kinesis Aggregators framework.\n */\npublic class StreamAggregatorUtils {\n\tprivate static final Log LOG = LogFactory\n\t\t\t.getLog(StreamAggregatorUtils.class);\n\n\tprivate static final String rsTimeformat = \"yyyy-mm-dd hh:mi:ss\";\n\n\tprivate static final ObjectMapper mapper = new ObjectMapper();\n\n\tstatic {\n\t\tmapper.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY,\n\t\t\t\ttrue);\n\t\tmapper.configure(DeserializationFeature.WRAP_EXCEPTIONS, false);\n\t}\n\n\tprivate StreamAggregatorUtils() {\n\t}\n\n\t/**\n\t * Helper method which converts input values from Aggregator configurations\n\t * into names for attributes in Dynamo DB. In particular this supports\n\t * Object based Aggregators who will be configured using get methods. For\n\t * example, this methods will turn 'getValue' into 'value' and 'isSomething'\n\t * to 'isSomething'.\n\t * \n\t * @param methodName\n\t *            The name of the method to be converted into an Attribute Name.\n\t * @return A string value to be used as the corresponding attribute name.\n\t */\n\tpublic static String methodToColumn(String methodName) {\n\t\tif (methodName.startsWith(\"get\")) {\n\t\t\treturn methodName.substring(3, 4).toLowerCase()\n\t\t\t\t\t+ methodName.substring(4);\n\t\t} else {\n\t\t\treturn methodName.substring(0, 1).toLowerCase()\n\t\t\t\t\t+ methodName.substring(1);\n\t\t}\n\t}\n\n\t/**\n\t * Returns a statement which can be used to create an External Table in Hive\n\t * which wraps the Aggregator Table indicated, using the required name in\n\t * Hive.\n\t * \n\t * @param dynamoClient\n\t *            Dynamo DB Client to use for connection to Dynamo DB.\n\t * @param hiveTableName\n\t *            The table name to generate for the Hive Table.\n\t * @param dynamoTable\n\t *            The name of the aggregator table in Dynamo DB.\n\t * @return A CREATE EXTERNAL TABLE statement to be used in Hive\n\t * @throws Exception\n\t */\n\tpublic static String getDynamoHiveWrapper(AmazonDynamoDB dynamoClient,\n\t\t\tString hiveTableName, String dynamoTable) throws Exception {\n\t\tLOG.info(\"Generating Hive Integration Statement\");\n\n\t\tStringBuffer sb = new StringBuffer();\n\t\tsb.append(String.format(\"CREATE EXTERNAL TABLE %s(\", hiveTableName));\n\n\t\t// add the hive table spec\n\t\tList<String> tableDefinition = DynamoUtils.getDictionaryEntry(\n\t\t\t\tdynamoClient, dynamoTable);\n\t\tfor (String s : tableDefinition) {\n\t\t\tsb.append(String.format(\"%s string,\", s));\n\t\t}\n\t\tsb.replace(sb.length() - 1, sb.length(), \"\");\n\n\t\tsb.append(String\n\t\t\t\t.format(\") STORED BY 'org.apache.hadoop.hive.dynamodb.DynamoDBStorageHandler' TBLPROPERTIES (\\\"dynamodb.table.name\\\" = \\\"%s\\\", \\\"dynamodb.column.mapping\\\" = \\\"\",\n\t\t\t\t\t\tdynamoTable));\n\t\tfor (String s : tableDefinition) {\n\t\t\tsb.append(String.format(\"%s:%s,\", s, s));\n\t\t}\n\t\tsb.replace(sb.length() - 1, sb.length(), \"\");\n\t\tsb.append(\"))\");\n\n\t\treturn sb.toString();\n\t}\n\n\t/**\n\t * Helper method to generate a Redshift CREATE TABLE command which matches\n\t * the structure of the aggregate table, and a COPY command which will load\n\t * the table data from an Aggregator table into a Redshift Table. * @param\n\t * dynamoClient Dynamo DB Client to use for connection to Dynamo DB.\n\t * \n\t * @param redshiftTableName\n\t *            The table name to use in Redshift.\n\t * @param dynamoTable\n\t *            The Aggregator table name in Dynamo DB.\n\t * @return A String which contains the create table and copy commands to be\n\t *         issued against redshift.\n\t */\n\tpublic static String getRedshiftCopyCommand(\n\t\t\tfinal AmazonDynamoDB dynamoClient, String redshiftTableName,\n\t\t\tString dynamoTable) throws Exception {\n\t\tLOG.info(\"Generating Redshift Copy Command\");\n\t\tStringBuffer sb = new StringBuffer();\n\n\t\t// generate the create table statement\n\t\tsb.append(String.format(\"CREATE TABLE %S(\\n\", redshiftTableName));\n\t\tint i = 0;\n\t\tList<String> tableStructure = DynamoUtils.getDictionaryEntry(\n\t\t\t\tdynamoClient, dynamoTable);\n\t\tString columnSpec = null;\n\t\tString dataType = null;\n\t\tfor (String s : tableStructure) {\n\t\t\ti++;\n\n\t\t\tswitch (s) {\n\t\t\tcase StreamAggregator.LAST_WRITE_SEQ:\n\t\t\t\tdataType = \"BIGINT\";\n\t\t\t\tbreak;\n\t\t\tcase StreamAggregator.LAST_WRITE_TIME:\n\t\t\t\tdataType = \"TIMESTAMP\";\n\t\t\t\tbreak;\n\t\t\tcase StreamAggregator.EVENT_COUNT:\n\t\t\t\tdataType = \"INT\";\n\t\t\t\tbreak;\n\t\t\tdefault:\n\t\t\t\tif (s.contains(\"-SUM\") || s.contains(\"-MIN\")\n\t\t\t\t\t\t|| s.contains(\"-MAX\")) {\n\t\t\t\t\tdataType = \"INT\";\n\t\t\t\t} else {\n\t\t\t\t\tdataType = \"VARCHAR(1000)\";\n\t\t\t\t}\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\t;\n\n\t\t\tcolumnSpec = s + \" \" + dataType;\n\n\t\t\tif (i == tableStructure.size()) {\n\t\t\t\tsb.append(columnSpec);\n\t\t\t} else {\n\t\t\t\tsb.append(columnSpec + \",\");\n\t\t\t}\n\t\t}\n\t\tsb.append(\");\\n\\n\");\n\n\t\t// generate the copy command\n\t\tsb.append(String\n\t\t\t\t.format(\"copy %s from 'dynamodb://%s' credentials 'aws_access_key_id=<Your-Access-Key-ID>;aws_secret_access_key=<Your-Secret-Access-Key>' readratio 50 timeformat 'yyyy-MM-dd hh:mi:ss';\",\n\t\t\t\t\t\tredshiftTableName, dynamoTable, rsTimeformat));\n\t\treturn sb.toString();\n\t}\n\n\t/**\n\t * Index name which should be used for the last write sequence GSI on a\n\t * table\n\t * \n\t * @param dynamoTable\n\t *            The table name in Dynamo DB.\n\t * @return The name for the global secondary index on the table for last\n\t *         write sequence.\n\t */\n\tpublic static final String getLastWriteSeqIndexName(String dynamoTable) {\n\t\treturn dynamoTable + \"-seq\";\n\t}\n\n\t/**\n\t * Index name which should be used for the last write sequence GSI on a\n\t * table\n\t * \n\t * @param dynamoTable\n\t *            The table name in Dynamo DB.\n\t * @return The name for the global secondary index on the table for last\n\t *         write sequence.\n\t */\n\tpublic static final String getDateDimensionIndexName(String dynamoTable,\n\t\t\tString dateAttribute) {\n\t\treturn String.format(\"%s-%s\", dynamoTable, dateAttribute);\n\t}\n\n\t/**\n\t * Method which will generate a correctly formatted primary key for a dynamo\n\t * table hosting aggregated data.\n\t * \n\t * @param updateKey\n\t *            An {@link UpdateKey} which should be pivoted into a key.\n\t * @return\n\t */\n\tpublic static Map<String, AttributeValue> getTableKey(UpdateKey updateKey) {\n\t\treturn getTableKey(updateKey.getAggregateColumnName(),\n\t\t\t\tupdateKey.getAggregatedValue(),\n\t\t\t\tupdateKey.getDateValueColumnName(), updateKey.getDateValue());\n\t}\n\n\t/**\n\t * Method which will generate a correctly formatted primary key for a dynamo\n\t * table hosting aggregated data.\n\t * \n\t * @param keyColumnName\n\t *            The attribute name in the table to be used as the first part\n\t *            of a hash key.\n\t * @param fieldValue\n\t *            The value of the hash key to query for.\n\t * @param dateColumnName\n\t *            The attribute name of the date column to be used as the range\n\t *            key.\n\t * @param dateValue\n\t *            The value of the range key value to query for.\n\t * @return\n\t */\n\tprotected static Map<String, AttributeValue> getTableKey(\n\t\t\tString keyColumnName, String fieldValue, String dateColumnName,\n\t\t\tString dateValue) {\n\t\tHashMap<String, AttributeValue> key = new HashMap<>();\n\t\tkey.put(keyColumnName, new AttributeValue().withS(fieldValue));\n\t\tkey.put(dateColumnName, new AttributeValue().withS(dateValue));\n\n\t\treturn key;\n\t}\n\n\tprotected static Map<String, AttributeValue> getValue(\n\t\t\tfinal AmazonDynamoDB dynamoClient, final String tableName,\n\t\t\tfinal UpdateKey key) {\n\t\tGetItemRequest req = new GetItemRequest().withTableName(tableName)\n\t\t\t\t.withKey(getTableKey(key));\n\t\treturn dynamoClient.getItem(req).getItem();\n\t}\n\n\tprotected static String getTableName(final String applicationName,\n\t\t\tfinal String namespace) {\n\t\treturn String.format(\"%s-%s\", applicationName, namespace);\n\t}\n\n\tpublic static JsonNode asJsonNode(String s) throws Exception {\n\t\treturn mapper.readTree(s);\n\t}\n\n\tpublic static JsonNode asJsonNode(File f) throws Exception {\n\t\treturn mapper.readTree(f);\n\t}\n\n\tpublic static JsonNode readJsonValue(JsonNode json, String atPath) {\n\t\tif (!atPath.contains(\".\")) {\n\t\t\treturn json.get(atPath);\n\t\t} else {\n\t\t\tString[] path = atPath.split(\"\\\\.\");\n\n\t\t\tJsonNode node = json.get(path[0]);\n\t\t\tfor (int i = 1; i < path.length; i++) {\n\t\t\t\tnode = node.path(path[i]);\n\t\t\t}\n\n\t\t\treturn node;\n\t\t}\n\t}\n\n\tpublic static String readValueAsString(JsonNode json, String atPath) {\n\t\tJsonNode node = readJsonValue(json, atPath);\n\n\t\treturn node == null ? null : node.asText();\n\t}\n\n\t/**\n\t * Get a list of all Open shards ordered by their start hash\n\t * \n\t * @param streamName\n\t * @return A Map of only Open Shards indexed by the Shard ID\n\t */\n\tpublic static Map<String, Shard> getOpenShards(\n\t\t\tAmazonKinesisClient kinesisClient, String streamName)\n\t\t\tthrows Exception {\n\t\tMap<String, Shard> shardMap = new LinkedHashMap<>();\n\t\tfinal int BACKOFF_MILLIS = 10;\n\t\tfinal int MAX_DESCRIBE_ATTEMPTS = 10;\n\t\tint describeAttempts = 0;\n\t\tStreamDescription stream = null;\n\t\ttry {\n\t\t\tdo {\n\t\t\t\ttry {\n\t\t\t\t\tstream = kinesisClient.describeStream(streamName)\n\t\t\t\t\t\t\t.getStreamDescription();\n\t\t\t\t} catch (LimitExceededException e) {\n\t\t\t\t\tThread.sleep(2 ^ describeAttempts * BACKOFF_MILLIS);\n\t\t\t\t\tdescribeAttempts++;\n\t\t\t\t}\n\t\t\t} while (stream == null && describeAttempts < MAX_DESCRIBE_ATTEMPTS);\n\t\t} catch (InterruptedException e) {\n\t\t\tLOG.error(e);\n\t\t\tthrow e;\n\t\t}\n\n\t\tif (stream == null) {\n\t\t\tthrow new Exception(String.format(\n\t\t\t\t\t\"Unable to describe Stream after %s attempts\",\n\t\t\t\t\tMAX_DESCRIBE_ATTEMPTS));\n\t\t}\n\t\tCollection<String> openShardNames = new ArrayList<String>();\n\n\t\t// load all the shards on the stream\n\t\tfor (Shard shard : stream.getShards()) {\n\t\t\topenShardNames.add(shard.getShardId());\n\t\t\tshardMap.put(shard.getShardId(), shard);\n\n\t\t\t// remove this shard's parents from the set of active shards -\n\t\t\t// we\n\t\t\t// can't do anything to them\n\t\t\tif (shard.getParentShardId() != null) {\n\t\t\t\topenShardNames.remove(shard.getParentShardId());\n\t\t\t}\n\t\t\tif (shard.getAdjacentParentShardId() != null) {\n\t\t\t\topenShardNames.remove(shard.getAdjacentParentShardId());\n\t\t\t}\n\t\t}\n\n\t\t// create a List of Open shards for sorting\n\t\tList<Shard> shards = new ArrayList<Shard>();\n\t\tfor (String s : openShardNames) {\n\t\t\tshards.add(shardMap.get(s));\n\t\t}\n\n\t\t// sort the list into lowest start hash order\n\t\tCollections.sort(shards, new Comparator<Shard>() {\n\t\t\tpublic int compare(Shard o1, Shard o2) {\n\t\t\t\treturn new BigInteger(o1.getHashKeyRange().getStartingHashKey())\n\t\t\t\t\t\t.compareTo(new BigInteger(o2.getHashKeyRange()\n\t\t\t\t\t\t\t\t.getStartingHashKey()));\n\t\t\t}\n\t\t});\n\n\t\t// rebuild the shard map into the correct order\n\t\tshardMap.clear();\n\t\tfor (Shard s : shards) {\n\t\t\tshardMap.put(s.getShardId(), s);\n\t\t}\n\n\t\treturn shardMap;\n\n\t}\n\n\tpublic static Shard getFirstShard(AmazonKinesisClient kinesisClient,\n\t\t\tString streamName) throws Exception {\n\t\treturn getOpenShards(kinesisClient, streamName).values().iterator()\n\t\t\t\t.next();\n\t}\n\n\tpublic static String getFirstShardName(AmazonKinesisClient kinesisClient,\n\t\t\tString streamName) throws Exception {\n\t\treturn getFirstShard(kinesisClient, streamName).getShardId();\n\t}\n\n\tpublic static int getShardCount(AmazonKinesisClient kinesisClient,\n\t\t\tString streamName) throws Exception {\n\t\treturn getOpenShards(kinesisClient, streamName).keySet().size();\n\t}\n\n\t/**\n\t * Strip the TimeHorizon abbreviation from a dateValueItem used in DynamoDB\n\t * with multi-value format\n\t */\n\tpublic static String extractDateFromMultivalue(TimeHorizon t, String date) {\n\t\tif (date.startsWith(t.getAbbrev())) {\n\t\t\treturn date.replaceAll(\"^\" + t.getAbbrev() + \"-\", \"\");\n\t\t} else {\n\t\t\treturn date;\n\t\t}\n\t}\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/TableKeyStructure.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators;\n\nimport java.util.HashSet;\nimport java.util.Set;\n\npublic class TableKeyStructure {\n    private String labelAttributeName, labelAttributeValue, dateAttributeName;\n\n    private Set<String> dateValues;\n\n    public TableKeyStructure() {\n    }\n\n    public TableKeyStructure(String labelAttributeName, String labelAttributeValue,\n            String dateAttributeName) {\n        this.labelAttributeName = labelAttributeName;\n        this.labelAttributeValue = labelAttributeValue;\n        this.dateAttributeName = dateAttributeName;\n    }\n\n    public TableKeyStructure(String labelAttributeName, String labelAttributeValue,\n            String dateAttributeName, String dateAttributeValue) {\n        this.labelAttributeName = labelAttributeName;\n        this.labelAttributeValue = labelAttributeValue;\n        this.dateAttributeName = dateAttributeName;\n        this.dateValues = new HashSet<>();\n        this.dateValues.add(dateAttributeValue);\n    }\n\n    public TableKeyStructure withDateValue(String dateValue) {\n        if (this.dateValues == null) {\n            this.dateValues = new HashSet<>();\n        }\n        this.dateValues.add(dateValue);\n        return this;\n    }\n\n    public String getLabelAttributeName() {\n        return this.labelAttributeName;\n    }\n\n    public String getLabelAttributeValue() {\n        return this.labelAttributeValue;\n    }\n\n    public String getDateAttributeName() {\n        return this.dateAttributeName;\n    }\n\n    public Set<String> getDateValues() {\n        return this.dateValues;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/TimeHorizon.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators;\n\nimport java.text.SimpleDateFormat;\nimport java.util.ArrayList;\nimport java.util.Calendar;\nimport java.util.Date;\nimport java.util.List;\n\npublic enum TimeHorizon {\n    SECOND(0, \"MM-dd HH:mm:ss\", \"s\"), MINUTE(1, \"MM-dd HH:mm:00\", \"m\"), MINUTES_GROUPED(1, null,\n            \"mb\") {\n        private Calendar calendar = Calendar.getInstance();\n\n        private int scope;\n\n        @Override\n        public int getGranularity() {\n            return this.scope;\n        }\n\n        @Override\n        public void setGranularity(int bucketSize) {\n            this.scope = bucketSize;\n        }\n\n        @Override\n        public String getValue(Date forDate) {\n            calendar.setTime(forDate);\n            int minutes = calendar.get(Calendar.MINUTE);\n            int bucket = new Double(Math.floor(minutes / scope) * scope).intValue();\n\n            return String.format(\"%s:%02d:00\",\n                    new SimpleDateFormat(\"yyyy-MM-dd HH\").format(forDate), bucket);\n        }\n    },\n    HOUR(2, \"MM-dd HH:00:00\", \"H\"), DAY(3, \"MM-dd 00:00:00\", \"d\"), WEEK(4, \"ww\", \"W\"), MONTH(5, \"MM-01 00:00:00\", \"M\"), YEAR(\n            6, \"01-01 00:00:00\", \"Y\"), FOREVER(999, \"\", \"*\") {\n        /**\n         * Override the getValue method, as TimeHorizon.FOREVER is for all\n         * values regardless of time period. We'll set the value to '*' as\n         * Dynamo wont allow an empty value\n         */\n        @Override\n        public String getValue(Date forDate) {\n            return \"*\";\n        }\n    };\n\n    private TimeHorizon(int placemark, String mask, String abbrev) {\n        this.placemark = placemark;\n        this.mask = mask;\n        this.abbrev = abbrev;\n    }\n\n    private int placemark;\n\n    private String mask;\n\n    private String abbrev;\n\n    private SimpleDateFormat getMask() {\n        return new SimpleDateFormat(\"yyyy-\" + this.mask);\n    }\n\n    public String getAbbrev() {\n        return this.abbrev;\n    }\n\n    public String getItemWithMultiValueFormat(Date dateValue) {\n        return getAbbrev() + \"-\" + getValue(dateValue);\n    }\n\n    public String getValue(Date forDate) {\n        return getMask().format(forDate);\n    }\n\n    /**\n     * Returns the full hierarchy of TimeHorizon values from this Horizon to\n     * FOREVER\n     * \n     * @return\n     */\n    public List<TimeHorizon> getFullHierarchy() {\n        return getHierarchyTo(TimeHorizon.FOREVER);\n    }\n\n    /**\n     * Get a list of all TimeHorizons in decreasing granularity, to the\n     * indicated Time Horizon. For example, if we requested\n     * TimeHorizon.MINUTE.getHierarchyTo(TimeHorizon.MONTH), we would receive a\n     * list of MINUTE, HOUR, DAY, MONTH\n     * \n     * @param t\n     * @return\n     */\n    public List<TimeHorizon> getHierarchyTo(TimeHorizon t) {\n        List<TimeHorizon> hierarchy = new ArrayList<>();\n\n        for (TimeHorizon h : TimeHorizon.values()) {\n            // don't include Minutes Group in automated hierarchies as they are\n            // a peer to Minutes\n            if (h.placemark >= this.placemark && h.placemark <= t.placemark\n                    && !h.equals(TimeHorizon.MINUTES_GROUPED)) {\n                hierarchy.add(h);\n            }\n        }\n\n        return hierarchy;\n    }\n\n    public int getGranularity() throws Exception {\n        throw new Exception(\"Not Implemented\");\n    }\n\n    public void setGranularity(int scope) throws Exception {\n        throw new Exception(\"Not Implemented\");\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/annotations/Aggregate.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.annotations;\n\nimport java.lang.annotation.ElementType;\nimport java.lang.annotation.Retention;\nimport java.lang.annotation.RetentionPolicy;\nimport java.lang.annotation.Target;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregatorType;\nimport com.amazonaws.services.kinesis.aggregators.TimeHorizon;\nimport com.amazonaws.services.kinesis.aggregators.datastore.DynamoDataStore;\nimport com.amazonaws.services.kinesis.aggregators.metrics.CloudWatchMetricsEmitter;\n\n/**\n * Annotations to indicate that a Class contains an Aggregator Configuration\n */\n@Target(ElementType.TYPE)\n@Retention(RetentionPolicy.RUNTIME)\npublic @interface Aggregate {\n    /**\n     * The type of Aggregator to create. Default is COUNT.\n     * \n     * @return\n     */\n    AggregatorType type() default AggregatorType.COUNT;\n\n    /** The list of Time Horizons to Aggregate on */\n    TimeHorizon[] timeHorizons() default TimeHorizon.HOUR;\n\n    int[] timeGranularity() default -1;\n\n    /**\n     * The namespace for the Aggregation Data.\n     * \n     * @return\n     */\n    String namespace() default \"\";\n\n    /**\n     * Should the Aggregator fail on errors in reading data from the stream for\n     * Aggregation.\n     * \n     * @return\n     */\n    boolean failOnDataExtractionErrors() default true;\n\n    /**\n     * Should the aggregator publish intrumentation metrics? The default metrics\n     * emitter is CloudWatch\n     * \n     * @return\n     */\n    boolean emitMetrics() default false;\n\n    /**\n     * Configure an IDataStore other than the default Dynamo DB Datastore\n     * \n     * @return\n     */\n    Class dataStore() default DynamoDataStore.class;\n\n    /**\n     * Configure an IMetricsEmitter other than the default CloudWatch metrics\n     * service\n     */\n    Class metricsEmitter() default CloudWatchMetricsEmitter.class;\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/annotations/AnnotationProcessor.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.annotations;\n\nimport java.lang.annotation.Annotation;\nimport java.lang.reflect.Method;\nimport java.util.ArrayList;\nimport java.util.HashMap;\nimport java.util.LinkedHashMap;\nimport java.util.List;\nimport java.util.Map;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregatorType;\nimport com.amazonaws.services.kinesis.aggregators.LabelSet;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils;\nimport com.amazonaws.services.kinesis.aggregators.TimeHorizon;\nimport com.amazonaws.services.kinesis.aggregators.datastore.IDataStore;\nimport com.amazonaws.services.kinesis.aggregators.exception.ClassNotAnnotatedException;\nimport com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException;\nimport com.amazonaws.services.kinesis.aggregators.metrics.IMetricsEmitter;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryConfiguration;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryElement;\n\n/**\n * AnnotationProcess provides a helper mechanism to extract information from an\n * Annotationed Class which will be used to configure an Object Serialisation\n * based Aggregation. See\n * {@link com.amazonaws.services.kinesis.aggregators.factory.ObjectAggregatorFactory}\n * .\n */\npublic class AnnotationProcessor {\n    @SuppressWarnings(\"rawtypes\")\n    private Class clazz;\n\n    private LabelSet labelSet = new LabelSet();\n\n    private List<String> labelMethodNames = new ArrayList<>();\n\n    private Map<String, Method> labelMethodMap = new LinkedHashMap<>();\n\n    private String dateMethodName;\n\n    private Method dateMethod;\n\n    private Map<String, Method> summaryMethods = new HashMap<>();\n\n    private SummaryConfiguration summaryConfig = new SummaryConfiguration();\n\n    private AggregatorType type;\n\n    private List<TimeHorizon> timeHorizons;\n\n    private boolean timeHierarchy;\n\n    private String namespace;\n\n    private boolean failOnDataExtractionErrors = true;\n\n    private boolean emitMetrics = false;\n\n    private Class<IDataStore> dataStore;\n\n    private Class<IMetricsEmitter> metricsEmitter;\n\n    private AnnotationProcessor() {\n    }\n\n    /**\n     * Create a new Annotation Processor for an Annotated Class.\n     * \n     * @param clazz The Class to extract annotation information from.\n     * @throws Exception\n     */\n    public AnnotationProcessor(@SuppressWarnings(\"rawtypes\") Class clazz) throws Exception {\n        this.clazz = clazz;\n        boolean isAnnotated = false;\n\n        // get the class annotations\n        for (Annotation a : this.clazz.getAnnotations()) {\n            if (a.annotationType().equals(Aggregate.class)) {\n                isAnnotated = true;\n\n                Aggregate annotatedObject = (Aggregate) a;\n                this.namespace = annotatedObject.namespace();\n                if (this.namespace.contains(\" \"))\n                    throw new ClassNotAnnotatedException(\"Namespace may not contain spaces\");\n\n                this.type = annotatedObject.type();\n\n                // process time horizon annotations\n                int[] timeGranularities = annotatedObject.timeGranularity();\n                TimeHorizon[] horizons = annotatedObject.timeHorizons();\n                this.timeHorizons = new ArrayList<>();\n                int i = 0;\n                for (TimeHorizon h : horizons) {\n                    if (h.equals(TimeHorizon.MINUTES_GROUPED)) {\n                        try {\n                            // prevent use of the default time granularity\n                            if (timeGranularities[i] == -1) {\n                                throw new ArrayIndexOutOfBoundsException();\n                            }\n                            h.setGranularity(timeGranularities[i]);\n                        } catch (ArrayIndexOutOfBoundsException e) {\n                            throw new InvalidConfigurationException(\n                                    \"Unable to generate a MINUTES_GROUPED Time Horizon without configuration of timeGranularity\");\n                        }\n                    }\n                    this.timeHorizons.add(h);\n                    i++;\n                }\n\n                this.failOnDataExtractionErrors = annotatedObject.failOnDataExtractionErrors();\n\n                this.emitMetrics = annotatedObject.emitMetrics();\n\n                this.dataStore = annotatedObject.dataStore();\n\n                this.metricsEmitter = annotatedObject.metricsEmitter();\n            }\n        }\n\n        if (!isAnnotated)\n            throw new ClassNotAnnotatedException(\n                    \"Cannot get Aggregator Config from non-Annotated Class\");\n\n        // process the method annotations\n        if (isAnnotated) {\n            for (Method m : this.clazz.getDeclaredMethods()) {\n                // label method\n                if (m.getAnnotation(Label.class) != null) {\n                    this.labelMethodNames.add(m.getName());\n                    m.setAccessible(true);\n                    this.labelMethodMap.put(m.getName(), m);\n\n                    this.labelSet.put(m.getName(), null);\n                }\n\n                // date method\n                if (m.getAnnotation(DateValue.class) != null) {\n                    this.dateMethodName = m.getName();\n                    m.setAccessible(true);\n                    this.dateMethod = m;\n                }\n\n                // summary methods\n                Annotation summary = m.getAnnotation(Summary.class);\n                if (summary != null) {\n                    m.setAccessible(true);\n                    this.summaryMethods.put(m.getName(), m);\n\n                    // process the summary configuration\n                    SummaryCalculation[] requestedCalcs = ((Summary) summary).type();\n\n                    if (requestedCalcs != null) {\n                        for (SummaryCalculation c : requestedCalcs) {\n                            this.summaryConfig.add(m.getName(), new SummaryElement(m.getName(), c));\n                        }\n                    } else {\n                        this.summaryConfig.add(m.getName(), new SummaryElement(m.getName(),\n                                SummaryCalculation.SUM));\n                    }\n                }\n            }\n        }\n    }\n\n    public List<String> getLabelMethodNames() {\n        return this.labelMethodNames;\n    }\n\n    public Map<String, Method> getLabelMethods() {\n        return this.labelMethodMap;\n    }\n\n    public String getDateMethodName() {\n        return this.dateMethodName;\n    }\n\n    public Method getDateMethod() {\n        return this.dateMethod;\n    }\n\n    public Map<String, Method> getSummaryMethods() {\n        return this.summaryMethods;\n    }\n\n    public SummaryConfiguration getSummaryConfig() {\n        return this.summaryConfig;\n    }\n\n    public AggregatorType getType() {\n        return this.type;\n    }\n\n    public List<TimeHorizon> getTimeHorizon() {\n        return this.timeHorizons;\n    }\n\n    public boolean hasTimeHierarchy() {\n        return this.timeHierarchy;\n    }\n\n    public boolean shouldFailOnDataExtractionErrors() {\n        return this.failOnDataExtractionErrors;\n    }\n\n    public boolean shouldEmitMetrics() {\n        return this.emitMetrics;\n    }\n\n    public Class<IMetricsEmitter> getMetricsEmitter() {\n        return this.metricsEmitter;\n    }\n\n    public Class<IDataStore> getDataStore() {\n        return this.dataStore;\n    }\n\n    public String getNamespace() {\n        return this.namespace;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/annotations/DateValue.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.annotations;\n\nimport java.lang.annotation.ElementType;\nimport java.lang.annotation.Retention;\nimport java.lang.annotation.RetentionPolicy;\nimport java.lang.annotation.Target;\n\n/**\n * Marker Annotation indicating that a method should be used as the date value\n * for Aggregation.\n */\n@Target(ElementType.METHOD)\n@Retention(RetentionPolicy.RUNTIME)\npublic @interface DateValue {\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/annotations/Label.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.annotations;\n\nimport java.lang.annotation.ElementType;\nimport java.lang.annotation.Retention;\nimport java.lang.annotation.RetentionPolicy;\nimport java.lang.annotation.Target;\n\n/**\n * Marker Annotation indicating that this indicated method is the label to be\n * used for Aggregation.\n */\n@Target(ElementType.METHOD)\n@Retention(RetentionPolicy.RUNTIME)\npublic @interface Label {\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/annotations/Summary.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.annotations;\n\nimport java.lang.annotation.ElementType;\nimport java.lang.annotation.Retention;\nimport java.lang.annotation.RetentionPolicy;\nimport java.lang.annotation.Target;\nimport java.util.Arrays;\n\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation;\n\n/**\n * Annotation which indicates that a method should be used as a summary\n * aggregation. If no type is indicated then it will be used as a\n * {@link com.amazonaws.services.kinesis.aggregators.SummaryCalculation.SUM}.\n */\n@Target(ElementType.METHOD)\n@Retention(RetentionPolicy.RUNTIME)\npublic @interface Summary {\n    /**\n     * The type of summary calculations to apply to the method.\n     * \n     * @return\n     */\n    public SummaryCalculation[] type() default SummaryCalculation.SUM;\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/app/AbstractQueryServlet.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.app;\n\nimport java.io.IOException;\n\nimport javax.servlet.ServletException;\nimport javax.servlet.http.HttpServlet;\nimport javax.servlet.http.HttpServletRequest;\nimport javax.servlet.http.HttpServletResponse;\n\npublic abstract class AbstractQueryServlet extends HttpServlet {\n    public void doGet(HttpServletRequest request, HttpServletResponse response)\n            throws ServletException, IOException {\n        doAction(request, response);\n    }\n\n    public void doPost(HttpServletRequest request, HttpServletResponse response)\n            throws ServletException, IOException {\n        doAction(request, response);\n    }\n\n    protected abstract void doAction(HttpServletRequest request, HttpServletResponse response)\n            throws ServletException, IOException;\n\n    protected void doError(HttpServletResponse response, String message) throws ServletException {\n        try {\n            response.getWriter().print(message);\n            response.setStatus(400);\n        } catch (IOException e) {\n            throw new ServletException(e);\n        }\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/app/AggregatorsBeanstalkApp.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.app;\n\nimport javax.servlet.ServletContextEvent;\nimport javax.servlet.ServletContextListener;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregatorGroup;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorsConstants;\nimport com.amazonaws.services.kinesis.aggregators.consumer.AggregatorConsumer;\nimport com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;\n\npublic class AggregatorsBeanstalkApp implements ServletContextListener {\n    private static final Log LOG = LogFactory.getLog(AggregatorsBeanstalkApp.class);\n\n    protected static final String AGGREGATOR_GROUP_PARAM = \"aggregator-group\";\n\n    private AggregatorConsumer consumer;\n\n    private Thread t;\n\n    @Override\n    public void contextDestroyed(ServletContextEvent arg0) {\n        try {\n            consumer.shutdown();\n            t.interrupt();\n        } catch (Exception e) {\n            LOG.error(e);\n        }\n    }\n\n    @SuppressWarnings({ \"unchecked\" })\n    @Override\n    public void contextInitialized(ServletContextEvent contextEvent) {\n        String configPath = System.getProperty(AggregatorsConstants.CONFIG_URL_PARAM);\n\n        if (configPath != null && !configPath.equals(\"\")) {\n            LOG.info(\"Starting Managed Beanstalk Aggregators Worker\");\n            String streamNameParam = System.getProperty(AggregatorsConstants.STREAM_NAME_PARAM);\n            String appNameParam = System.getProperty(AggregatorsConstants.APP_NAME_PARAM);\n            String regionNameParam = System.getProperty(AggregatorsConstants.REGION_PARAM);\n            String streamPosParam = System.getProperty(AggregatorsConstants.STREAM_POSITION_PARAM);\n            String maxRecordsParam = System.getProperty(AggregatorsConstants.MAX_RECORDS_PARAM);\n            String environmentParam = System.getProperty(AggregatorsConstants.ENVIRONMENT_PARAM);\n            String failuresToleratedParam = System.getProperty(AggregatorsConstants.FAILURES_TOLERATED_PARAM);\n\n            if (streamNameParam == null || streamNameParam.equals(\"\") || appNameParam == null\n                    || appNameParam.equals(\"\")) {\n                LOG.error(String.format(\n                        \"Unable to run Beanstalk Managed Aggregator Consumer without Configuration of Parameters %s and %s. Application is Idle.\",\n                        AggregatorsConstants.STREAM_NAME_PARAM, AggregatorsConstants.APP_NAME_PARAM));\n                return;\n            }\n\n            InitialPositionInStream initialPosition = null;\n            if (streamPosParam != null) {\n                try {\n                    initialPosition = InitialPositionInStream.valueOf(streamPosParam);\n                    LOG.info(String.format(\"Starting from %s Position in Stream\", streamPosParam));\n                } catch (Exception e) {\n                    LOG.error(String.format(\"%s is an invalid Initial Position in Stream\",\n                            streamPosParam));\n                    return;\n                }\n            }\n\n            try {\n                AggregatorConsumer consumer = new AggregatorConsumer(streamNameParam, appNameParam,\n                        configPath);\n\n                // add consumer parameters, if set from System Properties\n                if (regionNameParam != null && !regionNameParam.equals(\"\")) {\n                    consumer.withRegionName(regionNameParam);\n                }\n\n                if (initialPosition != null) {\n                    consumer.withInitialPositionInStream(initialPosition.name());\n                }\n\n                if (maxRecordsParam != null && !maxRecordsParam.equals(\"\")) {\n                    consumer.withMaxRecords(Integer.parseInt(maxRecordsParam));\n                }\n\n                if (environmentParam != null && !environmentParam.equals(\"\")) {\n                    consumer.withEnvironment(environmentParam);\n                }\n\n                if (failuresToleratedParam != null && !failuresToleratedParam.equals(\"\")) {\n                    consumer.withToleratedWorkerFailures(Integer.parseInt(failuresToleratedParam));\n                }\n\n                // configure the consumer so that the aggregators get\n                // instantiated\n                consumer.configure();\n\n                AggregatorGroup aggGroup = consumer.getAggregators();\n\n                // put the aggregator group reference and configureation\n                // references into the application context\n                contextEvent.getServletContext().setAttribute(AGGREGATOR_GROUP_PARAM, aggGroup);\n                contextEvent.getServletContext().setAttribute(\n                        AggregatorsConstants.STREAM_NAME_PARAM, streamNameParam);\n\n                LOG.info(\"Registered Stream and Aggregator Group with Servlet Context\");\n\n                // start the consumer\n                final class ConsumerRunner implements Runnable {\n                    final AggregatorConsumer consumer;\n\n                    public ConsumerRunner(AggregatorConsumer consumer) {\n                        this.consumer = consumer;\n                    }\n\n                    @Override\n                    public void run() {\n                        try {\n                            consumer.run();\n                        } catch (Exception e) {\n                            e.printStackTrace();\n                            LOG.error(e);\n                        }\n                    }\n                }\n                t = new Thread(new ConsumerRunner(consumer));\n                t.start();\n            } catch (Exception e) {\n                LOG.error(e);\n            }\n        } else {\n            LOG.warn(String.format(\n                    \"No Aggregators Configuration File found in Beanstalk Configuration %s. Application is Idle\",\n                    AggregatorsConstants.CONFIG_URL_PARAM));\n        }\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/app/DateQueryServlet.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.app;\n\nimport java.io.IOException;\nimport java.io.PrintWriter;\nimport java.util.Date;\nimport java.util.List;\nimport java.util.Map;\n\nimport javax.servlet.ServletException;\nimport javax.servlet.http.HttpServletRequest;\nimport javax.servlet.http.HttpServletResponse;\n\nimport com.amazonaws.services.dynamodbv2.model.AttributeValue;\nimport com.amazonaws.services.dynamodbv2.model.ComparisonOperator;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorGroup;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorsConstants;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.TimeHorizon;\n\npublic class DateQueryServlet extends AbstractQueryServlet {\n    public static final String NAMESPACE_PARAM = \"namespace\";\n\n    public static final String DATE_VALUE_PARAM = \"date-value\";\n\n    public static final String OPERATOR_PARAM = \"operator\";\n\n    public static final String GRANULARITY_PARAM = \"granularity\";\n\n    public static final int QUERY_THREADS = 10;\n\n    private void respondWith(HttpServletResponse response,\n            List<Map<String, AttributeValue>> queryResult) throws IOException {\n        response.setStatus(200);\n        // cors grant\n        response.setHeader(\"Access-Control-Allow-Origin\", \"*\");\n        PrintWriter w = response.getWriter();\n        w.println(\"[\");\n\n        int i = 0;\n\n        // write out the response values as json\n        if (queryResult != null) {\n            for (Map<String, AttributeValue> map : queryResult) {\n                i++;\n                int j = 0;\n                w.print(\"{\");\n                for (String s : map.keySet()) {\n                    j++;\n\n                    String toPrint = map.get(s).getS();\n\n                    if (toPrint == null) {\n                        toPrint = map.get(s).getN();\n                    }\n\n                    w.print(String.format(\"\\\"%s\\\":\\\"%s\\\"\", s, toPrint));\n\n                    if (j != map.keySet().size()) {\n                        w.println(\",\");\n                    }\n                }\n                w.print(\"}\");\n\n                if (i != queryResult.size()) {\n                    w.println(\",\");\n                }\n            }\n        }\n\n        w.print(\"]\");\n    }\n\n    public void doAction(HttpServletRequest request, HttpServletResponse response)\n            throws ServletException, IOException {\n        String namespace = request.getParameter(NAMESPACE_PARAM);\n        String dateValue = request.getParameter(DATE_VALUE_PARAM);\n        String operator = request.getParameter(OPERATOR_PARAM);\n        String granularity = request.getParameter(GRANULARITY_PARAM);\n\n        // create the date item\n        Date d = null;\n        try {\n            d = StreamAggregator.dateFormatter.parse(dateValue);\n        } catch (Exception e) {\n            doError(response, String.format(\"Date Parameter must be in format %s\",\n                    StreamAggregator.dateFormatter.getDateFormatSymbols().toString()));\n            return;\n        }\n\n        // create the ComparisonOperator for Dynamo from the argument\n        ComparisonOperator c = null;\n        try {\n            c = ComparisonOperator.fromValue(operator);\n        } catch (Exception e) {\n            doError(response, String.format(\"%s is an invalid Comparison Operator\", operator));\n            return;\n        }\n\n        // create the Time Horizon value from the argument\n        TimeHorizon h = null;\n        try {\n            h = TimeHorizon.valueOf(granularity);\n        } catch (Exception e) {\n            doError(response, String.format(\"%s is an invalid Granularity\", granularity));\n            return;\n        }\n\n        String streamName = (String) request.getServletContext().getAttribute(\n                AggregatorsConstants.STREAM_NAME_PARAM);\n        AggregatorGroup aggGroup = (AggregatorGroup) request.getServletContext().getAttribute(\n                AggregatorsBeanstalkApp.AGGREGATOR_GROUP_PARAM);\n\n        if (aggGroup == null) {\n            doError(response, \"Aggregator Application Not Initialised\");\n            return;\n        }\n\n        // initialise the aggregator group onto shard 'none' for this operation\n        // - it may already be initialised\n        try {\n            aggGroup.initialize(\"none\");\n        } catch (Exception e) {\n            throw new ServletException(e);\n        }\n\n        // put the initialised aggregator group back into the context\n        request.getServletContext().setAttribute(AggregatorsBeanstalkApp.AGGREGATOR_GROUP_PARAM,\n                aggGroup);\n\n        // acquire the correct aggregator by namespace\n        for (StreamAggregator agg : aggGroup.getAggregators()) {\n            if (agg.getNamespace().equals(namespace)) {\n                // run the query\n                try {\n                    respondWith(response, agg.queryByDate(d, h, c, QUERY_THREADS));\n                    return;\n                } catch (Exception e) {\n                    throw new ServletException(e);\n                }\n            }\n        }\n\n        // shouldn't get here, so bail with a meaning error on namespace\n        doError(response,\n                String.format(\"Unable to acquire Aggregator with Namespace %s\", namespace));\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/app/FetchConfigurationServlet.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.app;\n\nimport java.io.IOException;\nimport java.io.PrintWriter;\nimport java.util.HashMap;\nimport java.util.Map;\n\nimport javax.servlet.ServletException;\nimport javax.servlet.http.HttpServletRequest;\nimport javax.servlet.http.HttpServletResponse;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregatorsConstants;\nimport com.amazonaws.services.kinesis.aggregators.configuration.ConfigFileUtils;\n\npublic class FetchConfigurationServlet extends AbstractQueryServlet {\n    private void respondWith(HttpServletResponse response, Map<String, String> configItems)\n            throws IOException {\n        response.setStatus(200);\n        // cors grant\n        response.setHeader(\"Access-Control-Allow-Origin\", \"*\");\n        PrintWriter w = response.getWriter();\n\n        int i = 0;\n\n        // write out the response values as json\n        w.println(\"{\");\n\n        int resultCount = 0;\n\n        for (String s : configItems.keySet()) {\n            resultCount++;\n\n            String value = configItems.get(s);\n\n            w.print(String.format(\"\\\"%s\\\":%s\", s,\n                    value == null ? \"null\" : String.format(\"\\\"%s\\\"\", value)));\n\n            if (resultCount != configItems.size()) {\n                w.println(\",\");\n            }\n        }\n        w.print(\"}\");\n    }\n\n    @Override\n    protected void doAction(HttpServletRequest request, HttpServletResponse response)\n            throws ServletException, IOException {\n        try {\n            Map<String, String> config = new HashMap<>();\n\n            // required items\n            config.put(AggregatorsConstants.REGION_PARAM,\n                    System.getProperty(AggregatorsConstants.REGION_PARAM));\n            config.put(AggregatorsConstants.STREAM_NAME_PARAM,\n                    System.getProperty(AggregatorsConstants.STREAM_NAME_PARAM));\n            config.put(AggregatorsConstants.APP_NAME_PARAM,\n                    System.getProperty(AggregatorsConstants.APP_NAME_PARAM));\n            config.put(AggregatorsConstants.CONFIG_URL_PARAM,\n                    System.getProperty(AggregatorsConstants.CONFIG_URL_PARAM));\n            config.put(\n                    \"fetch-config-url\",\n                    ConfigFileUtils.makeConfigFileURL(System.getProperty(AggregatorsConstants.CONFIG_URL_PARAM)));\n\n            // optional items\n            config.put(AggregatorsConstants.ENVIRONMENT_PARAM,\n                    System.getProperty(AggregatorsConstants.ENVIRONMENT_PARAM));\n            config.put(AggregatorsConstants.MAX_RECORDS_PARAM,\n                    System.getProperty(AggregatorsConstants.MAX_RECORDS_PARAM));\n            config.put(AggregatorsConstants.FAILURES_TOLERATED_PARAM,\n                    System.getProperty(AggregatorsConstants.FAILURES_TOLERATED_PARAM));\n\n            respondWith(response, config);\n        } catch (Exception e) {\n            throw new ServletException(e);\n        }\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/app/ListAggregateKeysServlet.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.app;\n\nimport java.io.IOException;\nimport java.io.PrintWriter;\nimport java.util.List;\n\nimport javax.servlet.ServletException;\nimport javax.servlet.http.HttpServletRequest;\nimport javax.servlet.http.HttpServletResponse;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregatorGroup;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorsConstants;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.TableKeyStructure;\nimport com.amazonaws.services.kinesis.aggregators.datastore.DynamoQueryEngine.QueryKeyScope;\n\npublic class ListAggregateKeysServlet extends AbstractQueryServlet {\n    public static final String NAMESPACE_PARAM = \"namespace\";\n\n    public static final String SCOPE_PARAM = \"scope\";\n\n    public static final int QUERY_THREADS = 3;\n\n    private void respondWith(HttpServletResponse response, List<TableKeyStructure> queryResult)\n            throws IOException {\n        response.setStatus(200);\n        // cors grant\n        response.setHeader(\"Access-Control-Allow-Origin\", \"*\");\n        PrintWriter w = response.getWriter();\n        w.println(\"{\");\n\n        int i = 0;\n\n        // write out the response values as json\n        if (queryResult != null) {\n            int result = 0;\n            for (TableKeyStructure t : queryResult) {\n                if (result == 0) {\n                    w.println(String.format(\"\\\"labelName\\\":\\\"%s\\\",\", t.getLabelAttributeName()));\n                    w.println(String.format(\"\\\"dateName\\\":\\\"%s\\\",\", t.getDateAttributeName()));\n                    w.println(\"\\\"values\\\":[\");\n                }\n\n                // write the value as a struct\n                w.print(\"{\");\n                w.print(String.format(\"\\\"value\\\":\\\"%s\\\"\", t.getLabelAttributeValue()));\n\n                int dateItem = 0;\n                if (t.getDateValues() != null) {\n                    if (dateItem == 0) {\n                        w.print(\",\\n\\\"dates\\\":[\");\n                    }\n\n                    for (String s : t.getDateValues()) {\n                        // write the date value\n                        w.print(String.format(\"\\\"%s\\\"\", s));\n\n                        if (dateItem != t.getDateValues().size() - 1) {\n                            w.println(\",\");\n                        } else {\n                            w.print(\"]\");\n                        }\n                        dateItem++;\n                    }\n                }\n\n                w.print(\"}\");\n\n                if (result != queryResult.size() - 1) {\n                    w.println(\",\");\n                }\n\n                result++;\n            }\n        }\n\n        w.print(\"]}\");\n    }\n\n    @Override\n    protected void doAction(HttpServletRequest request, HttpServletResponse response)\n            throws ServletException, IOException {\n        String namespace = request.getParameter(NAMESPACE_PARAM);\n        String scope = request.getParameter(SCOPE_PARAM);\n\n        // resolve the scope\n        QueryKeyScope queryScope = null;\n        try {\n            queryScope = QueryKeyScope.valueOf(scope);\n        } catch (Exception e) {\n            doError(response, String.format(\"Invalid Query Scope %s\", scope));\n            return;\n        }\n\n        String streamName = (String) request.getServletContext().getAttribute(\n                AggregatorsConstants.STREAM_NAME_PARAM);\n        AggregatorGroup aggGroup = (AggregatorGroup) request.getServletContext().getAttribute(\n                AggregatorsBeanstalkApp.AGGREGATOR_GROUP_PARAM);\n\n        if (aggGroup == null) {\n            doError(response, \"Aggregator Application Not Initialised\");\n            return;\n        }\n\n        // initialise the aggregator group onto shard 'none' for this operation\n        // - it may already be initialised\n        try {\n            aggGroup.initialize(\"none\");\n        } catch (Exception e) {\n            throw new ServletException(e);\n        }\n\n        // put the initialised aggregator group back into the context\n        request.getServletContext().setAttribute(AggregatorsBeanstalkApp.AGGREGATOR_GROUP_PARAM,\n                aggGroup);\n\n        // acquire the correct aggregator by namespace\n        for (StreamAggregator agg : aggGroup.getAggregators()) {\n            if (agg.getNamespace().equals(namespace)) {\n                // run the query\n                try {\n                    respondWith(response, agg.parallelQueryKeys(queryScope, QUERY_THREADS));\n                    return;\n                } catch (Exception e) {\n                    throw new ServletException(e);\n                }\n            }\n        }\n\n        // shouldn't get here, so bail with a meaning error on namespace\n        doError(response,\n                String.format(\"Unable to acquire Aggregator with Namespace %s\", namespace));\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/app/QueryByLabelServlet.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.app;\n\nimport java.io.IOException;\nimport java.io.PrintWriter;\nimport java.text.ParseException;\nimport java.util.Date;\nimport java.util.List;\nimport java.util.Map;\n\nimport javax.servlet.ServletException;\nimport javax.servlet.http.HttpServletRequest;\nimport javax.servlet.http.HttpServletResponse;\n\nimport com.amazonaws.services.dynamodbv2.model.AttributeValue;\nimport com.amazonaws.services.dynamodbv2.model.ComparisonOperator;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorGroup;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorsConstants;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\n\npublic class QueryByLabelServlet extends AbstractQueryServlet {\n    public static final String NAMESPACE_PARAM = \"namespace\";\n\n    public static final String LABEL_VALUE_PARAM = \"label-value\";\n\n    public static final String DATE_VALUE_PARAM = \"date-value\";\n\n    public static final String OPERATOR_PARAM = \"operator\";\n\n    private void respondWith(HttpServletResponse response,\n            List<Map<String, AttributeValue>> queryResult) throws IOException {\n        response.setStatus(200);\n        // cors grant\n        response.setHeader(\"Access-Control-Allow-Origin\", \"*\");\n        PrintWriter w = response.getWriter();\n        w.println(\"[\");\n\n        int i = 0;\n\n        // write out the response values as json\n        if (queryResult != null) {\n            int resultCount = 0;\n\n            for (Map<String, AttributeValue> map : queryResult) {\n                resultCount++;\n                int mapCount = 0;\n                if (map != null) {\n                    w.println(\"{\");\n\n                    for (String s : map.keySet()) {\n                        mapCount++;\n\n                        if (map.get(s).getN() == null) {\n                            w.print(String.format(\"\\\"%s\\\":\\\"%s\\\"\", s, map.get(s).getS()));\n                        } else {\n                            w.print(String.format(\"\\\"%s\\\":%s\", s, map.get(s).getN()));\n                        }\n\n                        if (mapCount != map.size()) {\n                            w.println(\",\");\n                        }\n                    }\n                    w.print(\"}\");\n\n                    if (resultCount != queryResult.size()) {\n                        w.println(\",\");\n                    }\n                }\n            }\n        }\n\n        w.print(\"]\");\n    }\n\n    @Override\n    protected void doAction(HttpServletRequest request, HttpServletResponse response)\n            throws ServletException, IOException {\n        String namespace = request.getParameter(NAMESPACE_PARAM);\n        String labelValue = request.getParameter(LABEL_VALUE_PARAM);\n        String dateValue = request.getParameter(DATE_VALUE_PARAM);\n        String operator = request.getParameter(OPERATOR_PARAM);\n\n        // have to provide namespace and label\n        if (namespace == null) {\n            doError(response, String.format(\"Argument '%s' must not be null\", NAMESPACE_PARAM));\n            return;\n        }\n        if (labelValue == null || labelValue.equals(\"\")) {\n            doError(response, String.format(\"Argument '%s' must not be null\", LABEL_VALUE_PARAM));\n            return;\n        }\n\n        // if date value is provided, the so too must operator and granularity\n        ComparisonOperator setOperator = null;\n        if (dateValue != null && operator == null) {\n            setOperator = ComparisonOperator.EQ;\n        }\n\n        if (operator != null) {\n            try {\n                setOperator = ComparisonOperator.fromValue(operator);\n            } catch (Exception e) {\n                doError(response, String.format(\"%s is an invalid Comparison Operator\", operator));\n                return;\n            }\n        }\n\n        String streamName = (String) request.getServletContext().getAttribute(\n                AggregatorsConstants.STREAM_NAME_PARAM);\n        AggregatorGroup aggGroup = (AggregatorGroup) request.getServletContext().getAttribute(\n                AggregatorsBeanstalkApp.AGGREGATOR_GROUP_PARAM);\n        if (aggGroup == null) {\n            doError(response, \"Aggregator Application Not Initialised\");\n            return;\n        } else {\n            // initialise the aggregator group onto shard 'none' for this\n            // operation\n            // - it may already be initialised\n            try {\n                aggGroup.initialize(\"none\");\n            } catch (Exception e) {\n                throw new ServletException(e);\n            }\n        }\n\n        Date dateValueAsDate = null;\n        if (dateValue != null) {\n            try {\n                dateValueAsDate = StreamAggregator.dateFormatter.parse(dateValue);\n            } catch (ParseException e1) {\n                throw new ServletException(e1);\n            }\n        }\n\n        // put the initialised aggregator group back into the context\n        request.getServletContext().setAttribute(AggregatorsBeanstalkApp.AGGREGATOR_GROUP_PARAM,\n                aggGroup);\n\n        // acquire the correct aggregator by namespace\n        for (StreamAggregator agg : aggGroup.getAggregators()) {\n            if (agg.getNamespace().equals(namespace)) {\n                // run the query\n                try {\n                    respondWith(response, agg.queryValue(labelValue, dateValueAsDate, setOperator));\n                    return;\n                } catch (Exception e) {\n                    throw new ServletException(e);\n                }\n            }\n        }\n\n        // shouldn't get here, so bail with a meaning error on namespace\n        doError(response,\n                String.format(\"Unable to acquire Aggregator with Namespace %s\", namespace));\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/app/ShowConfigFileServlet.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.app;\n\nimport java.io.IOException;\nimport java.net.URL;\nimport java.util.Date;\n\nimport javax.servlet.ServletException;\nimport javax.servlet.http.HttpServletRequest;\nimport javax.servlet.http.HttpServletResponse;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.HttpMethod;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorsConstants;\nimport com.amazonaws.services.kinesis.aggregators.configuration.ConfigFileUtils;\nimport com.amazonaws.services.s3.AmazonS3;\nimport com.amazonaws.services.s3.AmazonS3Client;\nimport com.amazonaws.services.s3.model.GeneratePresignedUrlRequest;\n\npublic class ShowConfigFileServlet extends AbstractQueryServlet {\n    private static final Log LOG = LogFactory.getLog(ShowConfigFileServlet.class);\n\n    @Override\n    protected void doAction(HttpServletRequest request, HttpServletResponse response)\n            throws ServletException, IOException {\n        try {\n            String configUrl = System.getProperty(AggregatorsConstants.CONFIG_URL_PARAM);\n            String url = null;\n            if (configUrl == null) {\n                response.setStatus(404);\n            } else {\n                url = ConfigFileUtils.makeConfigFileURL(configUrl);\n                LOG.info(String.format(\"Sending Redirect for Config File to S3 Temporary URL %s\",\n                        url));\n\n                response.setHeader(\"Access-Control-Allow-Origin\", \"*\");\n                response.sendRedirect(url);\n            }\n        } catch (Exception e) {\n            throw new ServletException(e);\n        }\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/app/ShowConfigurationServlet.java",
    "content": "package com.amazonaws.services.kinesis.aggregators.app;\n\nimport java.io.IOException;\nimport java.io.PrintWriter;\nimport java.util.HashMap;\nimport java.util.Map;\n\nimport javax.servlet.ServletException;\nimport javax.servlet.http.HttpServletRequest;\nimport javax.servlet.http.HttpServletResponse;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregatorsConstants;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.fasterxml.jackson.databind.ObjectMapper;\n\npublic class ShowConfigurationServlet extends AbstractQueryServlet {\n\n\t@Override\n\tprotected void doAction(HttpServletRequest request,\n\t\t\tHttpServletResponse response) throws ServletException, IOException {\n\t\tresponse.setStatus(200);\n\t\t// cors grant\n\t\tresponse.setHeader(\"Access-Control-Allow-Origin\", \"*\");\n\n\t\tObjectMapper mapper = new ObjectMapper();\n\t\tMap<String, String> configMap = new HashMap<>();\n\n\t\tconfigMap.put(\"version\", StreamAggregator.version);\n\t\tconfigMap.put(AggregatorsConstants.STREAM_NAME_PARAM,\n\t\t\t\tSystem.getProperty(AggregatorsConstants.STREAM_NAME_PARAM));\n\t\tconfigMap.put(AggregatorsConstants.APP_NAME_PARAM,\n\t\t\t\tSystem.getProperty(AggregatorsConstants.APP_NAME_PARAM));\n\t\tconfigMap.put(AggregatorsConstants.REGION_PARAM,\n\t\t\t\tSystem.getProperty(AggregatorsConstants.REGION_PARAM));\n\t\tconfigMap.put(AggregatorsConstants.STREAM_POSITION_PARAM,\n\t\t\t\tSystem.getProperty(AggregatorsConstants.STREAM_POSITION_PARAM));\n\t\tconfigMap.put(AggregatorsConstants.MAX_RECORDS_PARAM,\n\t\t\t\tSystem.getProperty(AggregatorsConstants.MAX_RECORDS_PARAM));\n\t\tconfigMap.put(AggregatorsConstants.ENVIRONMENT_PARAM,\n\t\t\t\tSystem.getProperty(AggregatorsConstants.ENVIRONMENT_PARAM));\n\t\tconfigMap.put(AggregatorsConstants.FAILURES_TOLERATED_PARAM, System\n\t\t\t\t.getProperty(AggregatorsConstants.FAILURES_TOLERATED_PARAM));\n\t\tconfigMap.put(AggregatorsConstants.CONFIG_URL_PARAM, System\n\t\t\t\t.getProperty(AggregatorsConstants.CONFIG_URL_PARAM));\n\n\t\tPrintWriter w = response.getWriter();\n\t\tw.println(mapper.writeValueAsString(configMap));\n\t}\n\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/cache/AggregateCache.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.cache;\n\nimport java.util.HashMap;\nimport java.util.Map;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.auth.AWSCredentialsProvider;\nimport com.amazonaws.regions.Region;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorType;\nimport com.amazonaws.services.kinesis.aggregators.EnvironmentType;\nimport com.amazonaws.services.kinesis.aggregators.LabelSet;\nimport com.amazonaws.services.kinesis.aggregators.TimeHorizon;\nimport com.amazonaws.services.kinesis.aggregators.datastore.AggregateAttributeModification;\nimport com.amazonaws.services.kinesis.aggregators.datastore.DynamoDataStore;\nimport com.amazonaws.services.kinesis.aggregators.datastore.IDataStore;\nimport com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException;\nimport com.amazonaws.services.kinesis.aggregators.metrics.IMetricsEmitter;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryConfiguration;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryElement;\n\n/**\n * AggregateCache encapsulates the in-flight copy of aggregated data, which is\n * flushed to Dynamo DB when the aggregator checkpoints.\n */\n/*\n * Flush and Update methods are not thread safe so are marked as synchronised.\n * Intended utilisation in an inherently multi-threaded environment will be with\n * multiple instances of an Aggregator, which will by definition generate\n * multiple instances of the cache, so this synchronisation should not be an\n * issue in practice\n */\npublic class AggregateCache {\n\tprivate String shardId;\n\n\tprivate String environment;\n\n\tprivate String streamName, tableName, labelName, dateName;\n\n\tprivate AWSCredentialsProvider credentials;\n\n\tprivate AggregatorType aggregatorType = AggregatorType.COUNT;\n\n\tprivate Map<UpdateKey, UpdateValue> pendingUpdates;\n\n\tprivate long reportUpdatesPendingCount = -1;\n\n\tprivate long warnUpdatesPendingCount = -1;\n\n\tprivate long forceCheckpointOnPendingUpdateCount = -1;\n\n\tprivate final int updateForceCheckpointFrequency = 3;\n\n\tprivate int forcedCount = 0;\n\n\tprivate final Log LOG = LogFactory.getLog(AggregateCache.class);\n\n\tprivate boolean online = false;\n\n\tprivate IMetricsEmitter metricsEmitter;\n\n\tprivate IDataStore dataStore = null;\n\n\tprivate Region region;\n\n\tpublic AggregateCache(String shardId) {\n\t\tthis.shardId = shardId;\n\t}\n\n\tprivate void logInfo(String message) {\n\t\tLOG.info(\"[\" + this.shardId + \"] \" + message);\n\t}\n\n\tprivate void logWarn(String message) {\n\t\tLOG.warn(\"[\" + this.shardId + \"] \" + message);\n\t}\n\n\t/**\n\t * Configure the Aggregate Cache with its underlying data store.\n\t * \n\t * @throws Exception\n\t */\n\tpublic void initialise() throws Exception {\n\t\tif (pendingUpdates == null) {\n\t\t\tpendingUpdates = new HashMap<>();\n\t\t}\n\n\t\t// configure the default dynamo data store\n\t\tif (this.dataStore == null) {\n\t\t\tthis.dataStore = new DynamoDataStore(this.credentials,\n\t\t\t\t\tthis.aggregatorType, this.streamName, this.tableName,\n\t\t\t\t\tthis.labelName, dateName).withStorageCapacity(\n\t\t\t\t\tDynamoDataStore.DEFAULT_READ_CAPACITY,\n\t\t\t\t\tDynamoDataStore.DEFAULT_WRITE_CAPACITY);\n\t\t\tthis.dataStore.setRegion(region);\n\t\t}\n\t\tthis.dataStore.initialise();\n\n\t\t// set the checkpointing thresholds based on the current io throughputs\n\t\tsetCheckpointForcingThresholds();\n\n\t\tLOG.info(\"Aggregator Cache Online\\nIDataStore: \"\n\t\t\t\t+ this.getDataStore().getClass().getName()\n\t\t\t\t+ \"\\n\"\n\t\t\t\t+ \"IMetricsEmitter: \"\n\t\t\t\t+ (this.metricsEmitter == null ? \"Null\" : this.metricsEmitter\n\t\t\t\t\t\t.getClass().getName()));\n\n\t\tthis.online = true;\n\t}\n\n\tprotected long getReportUpdatesPendingCount() {\n\t\treturn reportUpdatesPendingCount;\n\t}\n\n\tprotected long getWarnUpdatesPendingCount() {\n\t\treturn warnUpdatesPendingCount;\n\t}\n\n\tprotected long getForceCheckpointOnPendingUpdateCount() {\n\t\treturn forceCheckpointOnPendingUpdateCount;\n\t}\n\n\t/* builder methods */\n\tpublic AggregateCache withEnvironment(EnvironmentType environment) {\n\t\tthis.environment = environment.name();\n\t\treturn this;\n\t}\n\n\tpublic AggregateCache withEnvironment(String environment) {\n\t\tthis.environment = environment;\n\t\treturn this;\n\t}\n\n\tpublic AggregateCache withTableName(String tableName) {\n\t\tthis.tableName = tableName;\n\t\treturn this;\n\t}\n\n\tpublic AggregateCache withStreamName(String streamName) {\n\t\tthis.streamName = streamName;\n\t\treturn this;\n\t}\n\n\tpublic AggregateCache withRegion(Region region) {\n\t\tthis.region = region;\n\t\treturn this;\n\t}\n\n\tpublic AggregateCache withLabelColumn(String labelColumn) {\n\t\tthis.labelName = labelColumn;\n\t\treturn this;\n\t}\n\n\tpublic AggregateCache withDateColumn(String dateColumn) {\n\t\tthis.dateName = dateColumn;\n\t\treturn this;\n\t}\n\n\tpublic AggregateCache withCredentials(AWSCredentialsProvider credentials) {\n\t\tthis.credentials = credentials;\n\t\treturn this;\n\t}\n\n\tpublic AggregateCache withAggregateType(AggregatorType type) {\n\t\tthis.aggregatorType = type;\n\t\treturn this;\n\t}\n\n\tpublic AggregateCache withMetricsEmitter(IMetricsEmitter metricsEmitter) {\n\t\tthis.metricsEmitter = metricsEmitter;\n\t\treturn this;\n\t}\n\n\tpublic AggregateCache withDataStore(IDataStore dataStore) {\n\t\tthis.dataStore = dataStore;\n\n\t\treturn this;\n\t}\n\n\tprotected void setCheckpointForcingThresholds() throws Exception {\n\t\t// set the force checkpoint level @ 4 minutes of write capacity, warning\n\t\t// at half that, and info an half the warning threshold\n\t\tif (this.dataStore.refreshForceCheckpointThresholds() > 0) {\n\t\t\tthis.forceCheckpointOnPendingUpdateCount = this.dataStore\n\t\t\t\t\t.refreshForceCheckpointThresholds();\n\t\t\tthis.warnUpdatesPendingCount = (long) Math\n\t\t\t\t\t.ceil(this.forceCheckpointOnPendingUpdateCount / 2);\n\t\t\tthis.reportUpdatesPendingCount = (long) Math\n\t\t\t\t\t.ceil(this.warnUpdatesPendingCount / 2);\n\t\t}\n\t}\n\n\t/**\n\t * Mechanism to update the pending update set with new summary values, based\n\t * upon new events being consumed and calculated with the indicated\n\t * calculation.\n\t * \n\t * @param aggregatorType\n\t *            The type of Aggregator that the cache is being used with\n\t * @param fieldLabel\n\t *            The label value on which data will be aggregated\n\t * @param dateValue\n\t *            The date value on which data will be aggregated\n\t * @param seq\n\t *            The sequence number of the underlying Kinesis record which\n\t *            generated the update\n\t * @param countIncrement\n\t *            The increment of count for the item\n\t * @param summedIncrements\n\t *            The set of summary values to be added to the aggregate\n\t * @param calculationConfig\n\t *            The configuration of what types of summaries should be applied\n\t *            to the summed fields\n\t * @throws Exception\n\t */\n\t/*\n\t * This method is synchronised to prevent any issues where the consumer has\n\t * not implemented the aggregator=>worker mapping in a threadsafe manner.\n\t * Using the internal IRecordProcessor and IRecordProcessorFactory, we\n\t * generate new instances of the aggregator per shard worker thread.\n\t * However, a customer may allocate a single aggregator to multiple workers,\n\t * and while this will be slower, at least the data in the backing store\n\t * will be correct\n\t */\n\tpublic synchronized void update(final AggregatorType aggregatorType,\n\t\t\tfinal LabelSet fieldLabel, final String dateValue,\n\t\t\tfinal TimeHorizon timeHorizon, final String seq,\n\t\t\tfinal Integer countIncrement,\n\t\t\tfinal Map<String, Double> summedIncrements,\n\t\t\tSummaryConfiguration calculationConfig) throws Exception {\n\t\t// lazy validate the configuration\n\t\tif (!online)\n\t\t\tinitialise();\n\n\t\t// get the payload for the current label value to be updated\n\t\tUpdateKey key = new UpdateKey(fieldLabel, this.dateName, dateValue,\n\t\t\t\ttimeHorizon);\n\t\tUpdateValue payload = pendingUpdates.get(key);\n\t\tif (payload == null) {\n\t\t\tpayload = new UpdateValue();\n\t\t}\n\n\t\t// always update the count\n\t\tpayload.incrementCount(countIncrement);\n\n\t\t// process summary updates based on the summary configuration\n\t\tif (aggregatorType.equals(AggregatorType.SUM)) {\n\t\t\t// process all the requested calculations\n\t\t\tfor (String s : calculationConfig.getItemSet()) {\n\t\t\t\tfor (SummaryElement e : calculationConfig\n\t\t\t\t\t\t.getRequestedCalculations(s)) {\n\t\t\t\t\t// be tolerant that not every summary item may be present on\n\t\t\t\t\t// every extracted item\n\t\t\t\t\tif (summedIncrements.containsKey(s)) {\n\t\t\t\t\t\tpayload.updateSummary(e.getAttributeAlias(),\n\t\t\t\t\t\t\t\tsummedIncrements.get(e.getStreamDataElement()),\n\t\t\t\t\t\t\t\te);\n\t\t\t\t\t} else {\n\t\t\t\t\t\tlogWarn(String\n\t\t\t\t\t\t\t\t.format(\"Summary Item '%s' not found in Extracted Data - Ignoring\",\n\t\t\t\t\t\t\t\t\t\ts));\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\t// update the last write sequence and time\n\t\tpayload.lastWrite(seq, System.currentTimeMillis());\n\n\t\t// write the updates back\n\t\tpendingUpdates.put(key, payload);\n\n\t\t// put some nags into the log to remind an implementer to checkpoint\n\t\t// periodically\n\t\tif (reportUpdatesPendingCount > 0) {\n\t\t\tif (pendingUpdates.size() % reportUpdatesPendingCount == 0) {\n\t\t\t\tlogInfo(String.format(\"%s Pending Aggregates to be flushed\",\n\t\t\t\t\t\tpendingUpdates.size()));\n\t\t\t}\n\t\t}\n\n\t\tif (warnUpdatesPendingCount > 0) {\n\t\t\tif (pendingUpdates.size() > warnUpdatesPendingCount) {\n\t\t\t\tlogWarn(String.format(\n\t\t\t\t\t\t\"Warning - %s Pending Aggregates - Checkpoint NOW\",\n\t\t\t\t\t\tpendingUpdates.size()));\n\t\t\t}\n\t\t}\n\n\t\t// checkpoint manually at the force threshold to prevent the aggregator\n\t\t// falling over\n\t\tif (forceCheckpointOnPendingUpdateCount > 0) {\n\t\t\tif (pendingUpdates.size() > forceCheckpointOnPendingUpdateCount) {\n\t\t\t\tlogWarn(String\n\t\t\t\t\t\t.format(\"Forcing checkpoint at %s Aggregates to avoid KCL Worker Disconnect - please ensure you have checkpointed the enclosing IRecordProcessor\",\n\t\t\t\t\t\t\t\tpendingUpdates.size()));\n\t\t\t\tflush();\n\n\t\t\t\tforcedCount++;\n\n\t\t\t\tif (forcedCount % updateForceCheckpointFrequency == 0) {\n\t\t\t\t\t// allow the system to refresh the force checkpoint\n\t\t\t\t\t// thresholds\n\t\t\t\t\t// periodically\n\t\t\t\t\tsetCheckpointForcingThresholds();\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\tpublic UpdateValue get(UpdateKey key) {\n\t\treturn pendingUpdates.get(key);\n\t}\n\n\tprotected IDataStore getDataStore() {\n\t\treturn this.dataStore;\n\t}\n\n\t/**\n\t * Flush the state of all pending in memory updates to Dynamo DB.\n\t * \n\t * @throws Exception\n\t */\n\t/*\n\t * See comments on aggregate() as to why this method is synchronised\n\t */\n\tpublic synchronized void flush() throws Exception {\n\t\tlong startTime = System.currentTimeMillis();\n\t\tMap<UpdateKey, Map<String, AggregateAttributeModification>> dataModifications = this.dataStore\n\t\t\t\t.write(pendingUpdates);\n\t\tlogInfo(String.format(\"Cache Flushed %s modifications in %sms\",\n\t\t\t\tthis.pendingUpdates.size(),\n\t\t\t\t(System.currentTimeMillis() - startTime)));\n\n\t\t// publish the cloudwatch metrics\n\t\tif (this.metricsEmitter != null)\n\t\t\ttry {\n\t\t\t\tstartTime = System.currentTimeMillis();\n\t\t\t\tthis.metricsEmitter.emit(dataModifications);\n\t\t\t\tlogInfo(String\n\t\t\t\t\t\t.format(\"Instrumentation Dispatched to Metrics Service in %sms\",\n\t\t\t\t\t\t\t\t(System.currentTimeMillis() - startTime)));\n\t\t\t} catch (Exception e) {\n\t\t\t\t// log the error but do not fail\n\t\t\t\tLOG.error(\"Metrics Emitter Exception - Aggregate Cache will NOT terminate\");\n\t\t\t\tLOG.error(e);\n\t\t\t}\n\n\t\tpendingUpdates = new HashMap<>();\n\t}\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/cache/UpdateKey.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.cache;\n\nimport java.text.ParseException;\nimport java.util.Calendar;\nimport java.util.Date;\n\nimport com.amazonaws.services.kinesis.aggregators.LabelSet;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils;\nimport com.amazonaws.services.kinesis.aggregators.TimeHorizon;\n\n/**\n * Class which is used by the object Aggregator as the key to the in-memory\n * version of the AggregateTable.\n */\npublic class UpdateKey {\n\tprivate LabelSet labelValues;\n\n\tprivate String dateAttribute;\n\n\tprivate String dateValue;\n\n\tprivate TimeHorizon timeHorizon;\n\n\tprivate Calendar cal = Calendar.getInstance();\n\n\tpublic UpdateKey(LabelSet labelValues, String dateAttribute,\n\t\t\tString dateValue, TimeHorizon timeHorizon) {\n\t\tthis.labelValues = labelValues;\n\t\tthis.dateAttribute = dateAttribute;\n\t\tthis.dateValue = dateValue;\n\t\tthis.timeHorizon = timeHorizon;\n\t}\n\n\tpublic String getAggregateColumnName() {\n\t\treturn this.labelValues.getName();\n\t}\n\n\tpublic String getDateValueColumnName() {\n\t\treturn this.dateAttribute;\n\t}\n\n\tpublic String getAggregatedValue() {\n\t\treturn this.labelValues.valuesAsString();\n\t}\n\n\tpublic String getDateValue() {\n\t\treturn this.dateValue;\n\t}\n\n\tpublic Date getDateValueAsDate() throws ParseException {\n\t\t// instrument the FOREVER metric at current time\n\t\tif (this.getTimeHorizon().equals(TimeHorizon.FOREVER)) {\n\t\t\tcal.setTimeInMillis(System.currentTimeMillis());\n\t\t\treturn cal.getTime();\n\t\t} else {\n\t\t\treturn StreamAggregator.dateFormatter.parse(StreamAggregatorUtils\n\t\t\t\t\t.extractDateFromMultivalue(this.getTimeHorizon(),\n\t\t\t\t\t\t\tthis.getDateValue()));\n\t\t}\n\t}\n\n\tpublic TimeHorizon getTimeHorizon() {\n\t\treturn this.timeHorizon;\n\t}\n\n\t@Override\n\tpublic boolean equals(Object o) {\n\t\tif (o == null)\n\t\t\treturn false;\n\n\t\tif (!(o instanceof UpdateKey))\n\t\t\treturn false;\n\n\t\tUpdateKey other = (UpdateKey) o;\n\t\tif (this.labelValues.equals(other.labelValues)\n\t\t\t\t&& this.dateValue.equals(other.dateValue)) {\n\t\t\treturn true;\n\t\t} else {\n\t\t\treturn false;\n\t\t}\n\t}\n\n\t@Override\n\tpublic int hashCode() {\n\t\tint res = 17;\n\t\tres = 31 * res\n\t\t\t\t+ (this.labelValues == null ? 0 : this.labelValues.hashCode());\n\t\tres = 31 * res\n\t\t\t\t+ (this.dateValue == null ? 0 : this.dateValue.hashCode());\n\t\treturn res;\n\t}\n\n\t@Override\n\tpublic String toString() {\n\t\treturn String\n\t\t\t\t.format(\"Update Key - Date Value: %s, Date Column: %s, Label Values: %s\",\n\t\t\t\t\t\tthis.dateValue, this.dateAttribute, this.labelValues);\n\t}\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/cache/UpdateValue.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.cache;\n\nimport java.util.HashMap;\nimport java.util.Map;\n\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.datastore.AggregateAttributeModification;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryElement;\n\n/**\n * Class which is used as the payload container for data which is cached in the\n * Aggregator prior to checkpointing.\n */\npublic class UpdateValue {\n    private double aggregateCount;\n\n    /*\n     * The pending update summaries are comprised of the value to be applied to\n     * the attribute, the calculation that was applied to get that value, and\n     * the original value from the stream used to extract the data\n     */\n    private Map<String, AggregateAttributeModification> summaryValues;\n\n    private String lastWriteSeq;\n\n    private long lastWriteTime;\n\n    public UpdateValue() {\n        this.aggregateCount = 0;\n        this.summaryValues = new HashMap<>();\n    }\n\n    public void incrementCount(int count) {\n        this.aggregateCount += count;\n    }\n\n    public void updateSummary(String label, double withValue, SummaryElement element) {\n        // apply the calculation to the old and new values in the update\n        // payload\n        AggregateAttributeModification current = this.summaryValues.get(element.getAttributeAlias());\n        Double currentValue = current == null ? null : current.getFinalValue();\n\n        // apply the calculation using the apply method\n        Double newValue = element.getCalculation().apply(currentValue, withValue);\n\n        // build the summary value to be tracked in memory\n        AggregateAttributeModification update = new AggregateAttributeModification(\n                element.getAttributeAlias(), label, currentValue, withValue, newValue,\n                element.getCalculation(), 0);\n\n        // update the in memory version of the update payload for the label\n        this.summaryValues.put(element.getAttributeAlias(), update);\n    }\n\n    public void lastWrite(String lastSeq, long lastTime) {\n        this.lastWriteSeq = lastSeq;\n        this.lastWriteTime = lastTime;\n    }\n\n    public double getAggregateCount() {\n        return aggregateCount;\n    }\n\n    public double getSummaryValue(String label) {\n        return getSummary(label).getFinalValue();\n    }\n\n    public AggregateAttributeModification getSummary(String label) {\n        return this.summaryValues.get(label);\n    }\n\n    public AggregateAttributeModification getValueByOriginal(String attributeName,\n            SummaryCalculation calculation) {\n        return this.summaryValues.get(SummaryElement.makeStoreAttributeName(attributeName,\n                calculation));\n    }\n\n    public Map<String, AggregateAttributeModification> getSummaryValues() {\n        return this.summaryValues;\n    }\n\n    public String getLastWriteSeq() {\n        return lastWriteSeq;\n    }\n\n    public long getLastWriteTime() {\n        return lastWriteTime;\n    }\n\n    @Override\n    public String toString() {\n        String summary = \"\";\n        if (this.summaryValues != null && this.summaryValues.size() > 0) {\n            summary = \",\";\n            for (String s : this.summaryValues.keySet()) {\n                summary = summary + summaryValues.get(s).toString() + \",\";\n            }\n            summary = summary.substring(0, summary.length() - 1);\n        }\n        return String.format(\n                \"Update Value - Aggregate Count: %s, Last Write Seq: %s, Last Write Time: %s%s\",\n                this.aggregateCount, this.lastWriteSeq,\n                StreamAggregator.dateFormatter.format(this.lastWriteTime), summary);\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/cli/AggregatorsCli.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.cli;\n\nimport com.amazonaws.auth.AWSCredentialsProvider;\nimport com.amazonaws.auth.ClasspathPropertiesFileCredentialsProvider;\nimport com.amazonaws.auth.EnvironmentVariableCredentialsProvider;\nimport com.amazonaws.regions.Region;\nimport com.amazonaws.regions.Regions;\nimport com.amazonaws.services.dynamodbv2.AmazonDynamoDB;\nimport com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorType;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorsConstants;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils;\nimport com.amazonaws.services.kinesis.aggregators.datastore.DynamoUtils;\n\npublic class AggregatorsCli {\n    public static final String DELETE_TO_HWM = \"delete-to-hwm\";\n\n    public static final String GET_REDSHIFT_COPY = \"get-redshift-copy-command\";\n\n    public static final String GET_HIVE_WRAPPER = \"get-hive-wrapper-statement\";\n\n    public static final String GET_TABLE_STRUCTURE = \"get-dynamo-table-structure\";\n\n    private static void validateAction(String actionRequested) throws Exception {\n        if (!actionRequested.equals(DELETE_TO_HWM) && !actionRequested.equals(GET_REDSHIFT_COPY)\n                && !actionRequested.equals(GET_HIVE_WRAPPER)\n                && !actionRequested.equals(GET_TABLE_STRUCTURE))\n            throw new Exception(String.format(\"Invalid Action %s\", actionRequested));\n    }\n\n    public static void main(String[] args) throws Exception {\n        String applicationName = System.getProperty(AggregatorsConstants.APP_NAME_PARAM);\n        String namespace = System.getProperty(AggregatorsConstants.NAMESPACE_PARAM);\n        String action = System.getProperty(\"action\");\n        String regionName = System.getProperty(AggregatorsConstants.REGION_PARAM);\n        Region region = null;\n        if (regionName != null && !regionName.equals(\"\")) {\n            region = Region.getRegion(Regions.fromName(regionName));\n        }\n\n        validateAction(action);\n\n        final AWSCredentialsProvider credentialsProvider;\n\n        final String accessKey = System.getenv(\"AWS_ACCESS_KEY_ID\");\n        if (accessKey == null) {\n            credentialsProvider = new ClasspathPropertiesFileCredentialsProvider();\n        } else {\n            credentialsProvider = new EnvironmentVariableCredentialsProvider();\n        }\n\n        String aggregatorTableName;\n        final AmazonDynamoDB dynamoClient = new AmazonDynamoDBClient(credentialsProvider);\n        if (region != null)\n            dynamoClient.setRegion(region);\n\n        switch (action) {\n            case DELETE_TO_HWM:\n                String hwm = System.getProperty(\"last-sequence-number\");\n                aggregatorTableName = System.getProperty(\"from-aggregator-table\");\n\n                DynamoUtils.cleanupAggTable(credentialsProvider, region, aggregatorTableName, hwm);\n                break;\n            case GET_REDSHIFT_COPY:\n                // get the redshift target table name\n                String redshiftTableName = System.getProperty(\"to-redshift-table\");\n                aggregatorTableName = System.getProperty(\"from-aggregator-table\");\n\n                System.out.println(StreamAggregatorUtils.getRedshiftCopyCommand(dynamoClient,\n                        redshiftTableName, aggregatorTableName));\n                break;\n            case GET_HIVE_WRAPPER:\n                AggregatorType aggType = AggregatorType.valueOf(System.getProperty(\"aggregator-type\"));\n                String hiveTableName = System.getProperty(\"hive-table-name\");\n                aggregatorTableName = System.getProperty(\"from-aggregator-table\");\n\n                System.out.println(StreamAggregatorUtils.getDynamoHiveWrapper(dynamoClient,\n                        hiveTableName, aggregatorTableName));\n                break;\n            case GET_TABLE_STRUCTURE:\n                aggregatorTableName = System.getProperty(\"from-aggregator-table\");\n                System.out.println(DynamoUtils.getDynamoTableStructure(dynamoClient,\n                        aggregatorTableName));\n                break;\n            default:\n                break;\n        }\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/configuration/ConfigFileUtils.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.configuration;\n\nimport java.net.URL;\nimport java.util.Date;\n\nimport com.amazonaws.HttpMethod;\nimport com.amazonaws.services.s3.AmazonS3;\nimport com.amazonaws.services.s3.AmazonS3Client;\nimport com.amazonaws.services.s3.model.GeneratePresignedUrlRequest;\n\npublic class ConfigFileUtils {\n    public static final String makeConfigFileURL(String configUrl) throws Exception {\n        String url = null;\n\n        if (configUrl.startsWith(\"http\")) {\n            url = configUrl;\n        } else if (configUrl.startsWith(\"s3\")) {\n            AmazonS3 s3Client = new AmazonS3Client();\n            String bucket = configUrl.split(\"/\")[2];\n            String prefix = configUrl.substring(configUrl.indexOf(bucket) + bucket.length() + 1);\n\n            // generate a presigned url for X hours\n            Date expiration = new Date();\n            long msec = expiration.getTime();\n            msec += 1000 * 60 * 60; // 1 hour.\n            expiration.setTime(msec);\n\n            GeneratePresignedUrlRequest generatePresignedUrlRequest = new GeneratePresignedUrlRequest(\n                    bucket, prefix);\n            generatePresignedUrlRequest.setMethod(HttpMethod.GET);\n            generatePresignedUrlRequest.setExpiration(expiration);\n\n            URL s3url = s3Client.generatePresignedUrl(generatePresignedUrlRequest);\n            url = s3url.toString();\n        } else {\n            url = new URL(String.format(\"file://%s\", configUrl)).toString();\n        }\n\n        return url;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/configuration/DataExtractor.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.configuration;\n\npublic enum DataExtractor {\n    JSON(\"com.amazonaws.services.kinesis.io.JsonDataExtractor\"), CSV(\n            \"com.amazonaws.services.kinesis.io.CsvDataExtractor\"), OBJECT(\n            \"com.amazonaws.services.kinesis.io.ObjectExtractor\"), REGEX(\n            \"com.amazonaws.services.kinesis.io.RegexDataExtractor\");\n\n    private DataExtractor(String linkedClass) {\n        this.linkedClass = linkedClass;\n    }\n\n    private String linkedClass;\n\n    public String getLinkedClass() {\n        return this.linkedClass;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/configuration/ExternalConfigurationModel.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.configuration;\n\nimport java.io.File;\nimport java.io.IOException;\nimport java.io.InputStream;\nimport java.net.URL;\nimport java.util.ArrayList;\nimport java.util.Iterator;\nimport java.util.List;\n\nimport org.apache.commons.io.FileUtils;\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.auth.DefaultAWSCredentialsProviderChain;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorType;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils;\nimport com.amazonaws.services.kinesis.aggregators.TimeHorizon;\nimport com.amazonaws.services.kinesis.aggregators.annotations.AnnotationProcessor;\nimport com.amazonaws.services.kinesis.aggregators.datastore.IDataStore;\nimport com.amazonaws.services.kinesis.aggregators.exception.ClassNotAnnotatedException;\nimport com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException;\nimport com.amazonaws.services.kinesis.aggregators.metrics.IMetricsEmitter;\nimport com.amazonaws.services.s3.AmazonS3;\nimport com.amazonaws.services.s3.AmazonS3Client;\nimport com.amazonaws.services.s3.transfer.Download;\nimport com.amazonaws.services.s3.transfer.TransferManager;\nimport com.fasterxml.jackson.databind.JsonNode;\nimport com.fasterxml.jackson.databind.ObjectMapper;\n\npublic class ExternalConfigurationModel {\n    private static final Log LOG = LogFactory.getLog(ExternalConfigurationModel.class);\n\n    private String namespace;\n\n    private List<TimeHorizon> timeHorizons;\n\n    private AggregatorType aggregatorType;\n\n    private DataExtractor dataExtractor;\n\n    private List<String> labelItems = new ArrayList<>();\n\n    private String labelAttributeAlias;\n\n    private String dateItem, dateFormat, dateAttributeAlias;\n\n    private List<String> summaryItems;\n\n    private String delimiter;\n\n    private String itemTerminator;\n\n    private String filterRegex;\n\n    private String regularExpression;\n\n    private boolean isAnnotatedClass;\n\n    private Class<?> clazz;\n\n    private static ObjectMapper mapper = new ObjectMapper();\n\n    private String tableName;\n\n    private Long readIOPs;\n\n    private Long writeIOPs;\n\n    private boolean failOnDataExtraction;\n\n    private boolean emitMetrics;\n\n    private Class<IDataStore> dataStore;\n\n    private Class<IMetricsEmitter> metricsEmitter;\n\n    private static void configureCsv(JsonNode document, ExternalConfigurationModel model) {\n        model.setDelimiter(StreamAggregatorUtils.readValueAsString(document, \"delimiter\"));\n    }\n\n    private static void configureStringCommon(JsonNode document, ExternalConfigurationModel model) {\n        model.setItemTerminator(StreamAggregatorUtils.readValueAsString(document, \"lineTerminator\"));\n        model.setFilterRegex(StreamAggregatorUtils.readValueAsString(document, \"filterRegex\"));\n    }\n\n    private static void configureRegex(JsonNode document, ExternalConfigurationModel model)\n            throws InvalidConfigurationException {\n        String regex = StreamAggregatorUtils.readValueAsString(document, \"regularExpression\");\n\n        if (regex == null || regex.equals(\"\"))\n            throw new InvalidConfigurationException(\n                    \"Cannot configure a Regular Expression Aggregator without a Regular Expression (configuration 'regularExpression'\");\n        model.setRegularExpression(regex);\n    }\n\n    private static void configureObject(JsonNode document, ExternalConfigurationModel model)\n            throws InvalidConfigurationException {\n\n        String classname = StreamAggregatorUtils.readValueAsString(document, \"class\");\n        if (classname == null || classname.equals(\"\"))\n            throw new InvalidConfigurationException(\n                    \"Cannot configure an Aggregator which uses Object based data extraction without a 'class' configuration item\");\n\n        try {\n            model.setClazz(Class.forName(classname));\n        } catch (ClassNotFoundException e) {\n            throw new InvalidConfigurationException(String.format(\n                    \"ClassNotFoundException: %s not found on Classpath\", classname));\n        }\n\n        // try to load the class using its annotations\n        try {\n            AnnotationProcessor p = new AnnotationProcessor(model.getClazz());\n            model.setAnnotatedClass(true);\n        } catch (ClassNotAnnotatedException e) {\n            // no problem\n        } catch (Exception e) {\n            throw new InvalidConfigurationException(e);\n        }\n    }\n\n    private static void addTimeHorizons(JsonNode document, ExternalConfigurationModel model)\n            throws Exception {\n        JsonNode node = StreamAggregatorUtils.readJsonValue(document, \"timeHorizons\");\n        if (node != null) {\n            Iterator<JsonNode> timeHorizonValues = node.elements();\n            while (timeHorizonValues.hasNext()) {\n                String t = timeHorizonValues.next().asText();\n                String timeHorizonName = null;\n                int granularity = -1;\n\n                // process parameterised time horizons\n                if (t.contains(\"MINUTES_GROUPED\")) {\n                    String[] items = t.split(\"\\\\(\");\n                    timeHorizonName = items[0];\n                    granularity = Integer.parseInt(items[1].replaceAll(\"\\\\)\", \"\"));\n                } else {\n                    timeHorizonName = t;\n                }\n\n                try {\n                    TimeHorizon th = TimeHorizon.valueOf(timeHorizonName);\n\n                    if (th.equals(TimeHorizon.MINUTES_GROUPED) && granularity == -1) {\n                        throw new InvalidConfigurationException(\n                                \"Unable to create Grouped Minutes Time Horizon without configuration of Granularity using notation MINUTES_GROUPED(<granularity in minutes>)\");\n                    } else {\n                        if (th.equals(TimeHorizon.MINUTES_GROUPED)) {\n                            th.setGranularity(granularity);\n                        }\n                    }\n                    model.addTimeHorizon(th);\n                } catch (Exception e) {\n                    throw new Exception(String.format(\"Unable to configure Time Horizon %s\", t), e);\n                }\n            }\n        }\n    }\n\n    private static void setAggregatorType(JsonNode document, ExternalConfigurationModel model)\n            throws Exception {\n        String aggType = StreamAggregatorUtils.readValueAsString(document, \"type\");\n\n        if (aggType == null || aggType.equals(\"\")) {\n            model.setAggregatorType(AggregatorType.COUNT);\n        } else {\n            try {\n                model.setAggregatorType(AggregatorType.valueOf(aggType));\n            } catch (Exception e) {\n                throw new Exception(String.format(\"Unable to configure AggregatorType %s\", aggType));\n            }\n        }\n    }\n\n    public static List<ExternalConfigurationModel> buildFromConfig(String configFilePath)\n            throws Exception {\n        List<ExternalConfigurationModel> response = new ArrayList<>();\n\n        // reference the config file as a full path\n        File configFile = new File(configFilePath);\n        if (!configFile.exists()) {\n\n            // try to load the file from the classpath\n            InputStream classpathConfig = ExternalConfigurationModel.class.getClassLoader().getResourceAsStream(\n                    configFilePath);\n            if (classpathConfig != null && classpathConfig.available() > 0) {\n                configFile = new File(ExternalConfigurationModel.class.getResource(\n                        (configFilePath.startsWith(\"/\") ? \"\" : \"/\") + configFilePath).toURI());\n\n                LOG.info(String.format(\"Loaded Configuration %s from Classpath\", configFilePath));\n            } else {\n                if (configFilePath.startsWith(\"s3://\")) {\n                    AmazonS3 s3Client = new AmazonS3Client(new DefaultAWSCredentialsProviderChain());\n                    TransferManager tm = new TransferManager(s3Client);\n\n                    // parse the config path to get the bucket name and prefix\n                    final String s3ProtoRegex = \"s3:\\\\/\\\\/\";\n                    String bucket = configFilePath.replaceAll(s3ProtoRegex, \"\").split(\"/\")[0];\n                    String prefix = configFilePath.replaceAll(\n                            String.format(\"%s%s\\\\/\", s3ProtoRegex, bucket), \"\");\n\n                    // download the file using TransferManager\n                    configFile = File.createTempFile(configFilePath, null);\n                    Download download = tm.download(bucket, prefix, configFile);\n                    download.waitForCompletion();\n\n                    // shut down the transfer manager\n                    tm.shutdownNow();\n\n                    LOG.info(String.format(\"Loaded Configuration from Amazon S3 %s/%s to %s\",\n                            bucket, prefix, configFile.getAbsolutePath()));\n                } else {\n                    // load the file from external URL\n                    try {\n                        configFile = File.createTempFile(configFilePath, null);\n                        FileUtils.copyURLToFile(new URL(configFilePath), configFile, 1000, 1000);\n                        LOG.info(String.format(\"Loaded Configuration from %s to %s\",\n                                configFilePath, configFile.getAbsolutePath()));\n                    } catch (IOException e) {\n                        // handle the timeouts and so on with a generalised\n                        // config\n                        // file not found handler later\n                    }\n                }\n            }\n        } else {\n            LOG.info(String.format(\"Loaded Configuration from Filesystem %s\", configFilePath));\n        }\n\n        // if we haven't been able to load a config file, then bail\n        if (configFile == null || !configFile.exists()) {\n            throw new InvalidConfigurationException(String.format(\n                    \"Unable to Load Config File from %s\", configFilePath));\n        }\n\n        JsonNode document = StreamAggregatorUtils.asJsonNode(configFile);\n\n        ExternalConfigurationModel config = null;\n\n        Iterator<JsonNode> i = document.elements();\n        while (i.hasNext()) {\n            config = new ExternalConfigurationModel();\n\n            JsonNode section = i.next();\n\n            // set generic properties\n            config.setNamespace(StreamAggregatorUtils.readValueAsString(section, \"namespace\"));\n            config.setDateFormat(StreamAggregatorUtils.readValueAsString(section, \"dateFormat\"));\n            addTimeHorizons(section, config);\n            setAggregatorType(section, config);\n\n            // set the label items\n            JsonNode labelItems = StreamAggregatorUtils.readJsonValue(section, \"labelItems\");\n            if (labelItems != null && labelItems.size() > 0) {\n                Iterator<JsonNode> iterator = labelItems.elements();\n                while (iterator.hasNext()) {\n                    JsonNode n = iterator.next();\n                    config.addLabelItems(n.asText());\n                }\n            }\n            config.setLabelAttributeAlias(StreamAggregatorUtils.readValueAsString(section,\n                    \"labelAttributeAlias\"));\n\n            config.setDateItem(StreamAggregatorUtils.readValueAsString(section, \"dateItem\"));\n            config.setDateAttributeAlias(StreamAggregatorUtils.readValueAsString(section,\n                    \"dateAttributeAlias\"));\n            JsonNode summaryItems = StreamAggregatorUtils.readJsonValue(section, \"summaryItems\");\n            if (summaryItems != null && summaryItems.size() > 0) {\n                Iterator<JsonNode> iterator = summaryItems.elements();\n                while (iterator.hasNext()) {\n                    JsonNode n = iterator.next();\n                    config.addSummaryItem(n.asText());\n                }\n            }\n\n            config.setTableName(StreamAggregatorUtils.readValueAsString(section, \"tableName\"));\n\n            String readIO = StreamAggregatorUtils.readValueAsString(section, \"readIOPS\");\n            if (readIO != null)\n                config.setReadIOPs(Long.parseLong(readIO));\n            String writeIO = StreamAggregatorUtils.readValueAsString(section, \"writeIOPS\");\n            if (writeIO != null)\n                config.setWriteIOPs(Long.parseLong(writeIO));\n\n            // configure tolerance of data extraction problems\n            String failOnDataExtraction = StreamAggregatorUtils.readValueAsString(section,\n                    \"failOnDataExtraction\");\n            if (failOnDataExtraction != null)\n                config.setFailOnDataExtraction(Boolean.parseBoolean(failOnDataExtraction));\n\n            // configure whether metrics should be emitted\n            String emitMetrics = StreamAggregatorUtils.readValueAsString(section, \"emitMetrics\");\n            String metricsEmitterClassname = StreamAggregatorUtils.readValueAsString(section,\n                    \"metricsEmitterClass\");\n            if (emitMetrics != null || metricsEmitterClassname != null) {\n                if (metricsEmitterClassname != null) {\n                    config.setMetricsEmitter((Class<IMetricsEmitter>) ClassLoader.getSystemClassLoader().loadClass(\n                            metricsEmitterClassname));\n                } else {\n                    config.setEmitMetrics(Boolean.parseBoolean(emitMetrics));\n                }\n            }\n\n            // configure the data store class\n            String dataStoreClass = StreamAggregatorUtils.readValueAsString(section, \"IDataStore\");\n            if (dataStoreClass != null) {\n                Class<IDataStore> dataStore = (Class<IDataStore>) ClassLoader.getSystemClassLoader().loadClass(\n                        dataStoreClass);\n                config.setDataStore(dataStore);\n            }\n\n            // get the data extractor configuration, so we know what other json\n            // elements to retrieve from the configuration document\n            String useExtractor = null;\n            try {\n                useExtractor = StreamAggregatorUtils.readValueAsString(section, \"dataExtractor\");\n                config.setDataExtractor(DataExtractor.valueOf(useExtractor));\n            } catch (Exception e) {\n                throw new Exception(String.format(\n                        \"Unable to configure aggregator with Data Extractor %s\", useExtractor));\n            }\n\n            switch (config.getDataExtractor()) {\n                case CSV:\n                    configureStringCommon(section, config);\n                    configureCsv(section, config);\n                    break;\n                case JSON:\n                    configureStringCommon(section, config);\n                    break;\n                case OBJECT:\n                    configureObject(section, config);\n                    break;\n                case REGEX:\n                    configureRegex(section, config);\n            }\n\n            response.add(config);\n        }\n        return response;\n    }\n\n    public String getNamespace() {\n        return this.namespace;\n    }\n\n    public List<TimeHorizon> getTimeHorizons() {\n        return this.timeHorizons;\n    }\n\n    public String getFilterRegex() {\n        return this.filterRegex;\n    }\n\n    public String getRegularExpression() {\n        return this.regularExpression;\n    }\n\n    public String getTableName() {\n        return this.tableName;\n    }\n\n    public Long getReadIOPs() {\n        return this.readIOPs;\n    }\n\n    public Long getWriteIOPs() {\n        return this.writeIOPs;\n    }\n\n    public String getLabelAttributeAlias() {\n        return this.labelAttributeAlias;\n    }\n\n    public String getDateAttributeAlias() {\n        return this.dateAttributeAlias;\n    }\n\n    public boolean isAnnotatedClass() {\n        return this.isAnnotatedClass;\n    }\n\n    public void addTimeHorizon(TimeHorizon timeHorizon) {\n        if (this.timeHorizons == null)\n            this.timeHorizons = new ArrayList<>();\n\n        this.timeHorizons.add(timeHorizon);\n    }\n\n    public AggregatorType getAggregatorType() {\n        return this.aggregatorType;\n    }\n\n    public List<String> getLabelItems() {\n        return this.labelItems;\n    }\n\n    public String getDateItem() {\n        return this.dateItem;\n    }\n\n    public String getDateAlias() {\n        return this.dateAttributeAlias;\n    }\n\n    public String getDateFormat() {\n        return this.dateFormat;\n    }\n\n    public List<String> getSummaryItems() {\n        return this.summaryItems;\n    }\n\n    public String getDelimiter() {\n        return this.delimiter;\n    }\n\n    public String getItemTerminator() {\n        return this.itemTerminator;\n    }\n\n    public void addSummaryItem(String summaryItem) {\n        if (this.summaryItems == null)\n            this.summaryItems = new ArrayList<>();\n\n        this.summaryItems.add(summaryItem);\n    }\n\n    public Class getClazz() {\n        return this.clazz;\n    }\n\n    public DataExtractor getDataExtractor() {\n        return this.dataExtractor;\n    }\n\n    public boolean shouldFailOnDataExtraction() {\n        return this.failOnDataExtraction;\n    }\n\n    public boolean shouldEmitMetrics() {\n        return this.emitMetrics;\n    }\n\n    public Class<IMetricsEmitter> getMetricsEmitter() {\n        return this.metricsEmitter;\n    }\n\n    public Class getDataStore() {\n        return this.dataStore;\n    }\n\n    private void setNamespace(String namespace) {\n        this.namespace = namespace;\n    }\n\n    private void setAggregatorType(AggregatorType aggregatorType) {\n        this.aggregatorType = aggregatorType;\n    }\n\n    private void addLabelItems(String labelItem) {\n        this.labelItems.add(labelItem);\n    }\n\n    private void setLabelItems(List<String> labelItems) {\n        this.labelItems = labelItems;\n    }\n\n    private void setDateItem(String dateItem) {\n        this.dateItem = dateItem;\n    }\n\n    private void setDateFormat(String dateFormat) {\n        this.dateFormat = dateFormat;\n    }\n\n    private void setDelimiter(String delimiter) {\n        if (delimiter != null && !delimiter.equals(\"\"))\n            this.delimiter = delimiter;\n    }\n\n    private void setItemTerminator(String itemTerminator) {\n        if (itemTerminator != null && !itemTerminator.equals(\"\"))\n            this.itemTerminator = itemTerminator;\n    }\n\n    private void setFilterRegex(String filterRegex) {\n        this.filterRegex = filterRegex;\n    }\n\n    private void setRegularExpression(String regularExpression) {\n        this.regularExpression = regularExpression;\n    }\n\n    private void setClazz(Class clazz) {\n        this.clazz = clazz;\n    }\n\n    private void setDataExtractor(DataExtractor dataExtractor) {\n        this.dataExtractor = dataExtractor;\n    }\n\n    private void setAnnotatedClass(boolean isAnnotatedClass) {\n        this.isAnnotatedClass = isAnnotatedClass;\n    }\n\n    private void setTableName(String tableName) {\n        if (tableName != null && !tableName.equals(\"\"))\n            this.tableName = tableName;\n    }\n\n    private void setReadIOPs(Long readIOPs) {\n        this.readIOPs = readIOPs;\n    }\n\n    private void setWriteIOPs(Long writeIOPs) {\n        this.writeIOPs = writeIOPs;\n    }\n\n    private void setFailOnDataExtraction(boolean failOnDataExtraction) {\n        this.failOnDataExtraction = failOnDataExtraction;\n    }\n\n    private void setEmitMetrics(boolean emitMetrics) {\n        this.emitMetrics = emitMetrics;\n    }\n\n    private void setMetricsEmitter(Class<IMetricsEmitter> metricsEmitter) {\n        this.metricsEmitter = metricsEmitter;\n    }\n\n    private void setDataStore(Class<IDataStore> dataStore) {\n        this.dataStore = dataStore;\n    }\n\n    private void setLabelAttributeAlias(String labelAttributeAlias) {\n        this.labelAttributeAlias = labelAttributeAlias;\n    }\n\n    private void setDateAttributeAlias(String dateAttributeAlias) {\n        this.dateAttributeAlias = dateAttributeAlias;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/configuration/json.schema",
    "content": "{\n    \"title\": \"Amazon Kinesis Aggregators Configuration Schema\",\n    \"type\": \"object\",\n    \"properties\": {\n        \"namespace\": {\n            \"type\": \"string\",\n            \"description\": \"The namespace for data stored by the Aggregator\"\n        },\n        \"timeHorizons\":  {\n            \"type\": \"array\",\n            \"description\": \"The list of all time horizons to be used as the granularity of the Aggregators\",\n            \"items\": {\n                \"type\": \"string\"\n            },\n            \"minItems\": 1,\n            \"uniqueItems\": true\n        },\n        \"type\": {\n            \"description\": \"The Type of the Aggregator (COUNT or SUM)\",\n            \"type\": \"integer\"\n        },\n        \"dataExtractor\": {\n            \"description\": \"Mechanism for extracting data in form for aggregation from the Kinesis Stream. Must be one of CSV, JSON or OBJECT\",\n            \"type\": \"string\"\n        },\n        \"delimiter\": {\n            \"description\": \"Delimiter to be used in CSV Data Extractors\",\n            \"type\": \"string\"\n        },\n        \"labelItem\": {\n            \"description\": \"Index, Attribute or Method for the value in the stream which should be used as the top level aggregate\",\n            \"type\": \"integer\",\n            \"minValue\" : 0\n        },\n        \"dateItem\": {\n            \"description\": \"Index, Attribute or Method for the value in the stream which should be used as the event date\",\n            \"type\": \"integer\",\n            \"minValue\" : 0\n        },\n        \"dateFormat\": {\n            \"description\": \"(Optional) If the event date is stored on the stream as a String, then supply the date format which can be used to convert it into a date\",\n            \"type\": \"string\"\n        },\n        \"summaryItems\":  {\n            \"type\": \"array\",\n            \"description\": \"(Optional) The list of expressions of indicies, attributes or methods to be used as aggregated values in addition to event count\",\n            \"items\": {\n                \"type\": \"string\"\n            },\n            \"minItems\": 1,\n            \"uniqueItems\": true\n        },\n        \"filterRegex\": {\n            \"description\": \"(Optional) Regular Expression used to filter String type stream data prior to data extraction\",\n            \"type\": \"string\"\n        },\n        \"tableName\": {\n            \"description\": \"(Optional) The name of the table to be used for storing Aggregated data in Dynamo DB\",\n            \"type\": \"string\"\n        },\n        \"readIOPS\": {\n            \"description\": \"(Optional) The number of provisioned Read IOPS for the Dynamo DB Table\",\n            \"type\": \"long\"\n        },\n        \"writeIOPS\": {\n            \"description\": \"(Optional) The number of provisioned Write IOPS for the Dynamo DB Table\",\n            \"type\": \"long\"\n        },\n        \"class\": {\n            \"description\": \"(Optional) For Object based Data Extractors, the class to use for serialising data to and from the Kinesis Stream. This may also be an Annotated Class, which will be used over all other configuration\",\n            \"type\": \"string\"\n        },\n        \"failOnDataExtraction\": {\n            \"description\": \"(Optional) Controls whether the Aggregator process should stop when data extraction from the stream fails. The default is 'true', but setting this value to 'false' will ensure that an Aggregator continues to process data from a Shard even if it contains bad data\",\n            \"type\": \"boolean\"\n        }\n    },\n    \"required\": [\"namespace\", \"timeHorizons\", \"type\", \"dataExtractor\"]\n}"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/consumer/AggregatorConsumer.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.consumer;\n\nimport java.net.NetworkInterface;\nimport java.util.UUID;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.auth.AWSCredentialsProvider;\nimport com.amazonaws.auth.DefaultAWSCredentialsProviderChain;\nimport com.amazonaws.regions.Region;\nimport com.amazonaws.regions.Regions;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorGroup;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorsConstants;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException;\nimport com.amazonaws.services.kinesis.aggregators.factory.ExternallyConfiguredAggregatorFactory;\nimport com.amazonaws.services.kinesis.aggregators.processor.AggregatorProcessorFactory;\nimport com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory;\nimport com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;\nimport com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration;\nimport com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker;\n\npublic final class AggregatorConsumer {\n\n\tprivate static final Log LOG = LogFactory.getLog(AggregatorConsumer.class);\n\n\tprivate String streamName, appName, regionName, environmentName,\n\t\t\tconfigFilePath, positionInStream, kinesisEndpoint;\n\n\tprivate AWSCredentialsProvider credentialsProvider;\n\n\tprivate boolean emitMetrics = false;\n\n\tprivate InitialPositionInStream streamPosition;\n\n\tprivate int failuresToTolerate = -1;\n\n\tprivate int maxRecords = -1;\n\n\tprivate KinesisClientLibConfiguration config;\n\n\tprivate AggregatorGroup aggGroup;\n\n\tprivate boolean isConfigured = false;\n\tprivate Worker worker;\n\n\tpublic AggregatorConsumer(String streamName, String appName,\n\t\t\tString configFilePath) {\n\t\tthis.streamName = streamName;\n\t\tthis.appName = appName;\n\t\tthis.configFilePath = configFilePath;\n\t}\n\n\tprivate AggregatorGroup buildAggregatorsFromConfig() throws Exception {\n\t\treturn ExternallyConfiguredAggregatorFactory.buildFromConfig(\n\t\t\t\tthis.streamName, this.appName, this.config, configFilePath);\n\n\t}\n\n\tpublic void shutdown() throws Exception {\n\t\tthis.aggGroup.shutdown(true);\n\t\tworker.shutdown();\n\t}\n\n\tpublic int run() throws Exception {\n\t\tconfigure();\n\n\t\tSystem.out.println(String.format(\"Starting %s\", appName));\n\t\tLOG.info(String.format(\"Running %s to process stream %s\", appName,\n\t\t\t\tstreamName));\n\n\t\tIRecordProcessorFactory recordProcessorFactory = new AggregatorProcessorFactory(\n\t\t\t\taggGroup);\n\t\tworker = new Worker(recordProcessorFactory, this.config);\n\n\t\tint exitCode = 0;\n\t\tint failures = 0;\n\n\t\t// run the worker, tolerating as many failures as is configured\n\t\twhile (failures < failuresToTolerate || failuresToTolerate == -1) {\n\t\t\ttry {\n\t\t\t\tworker.run();\n\t\t\t} catch (Throwable t) {\n\t\t\t\tLOG.error(\"Caught throwable while processing data.\", t);\n\n\t\t\t\tfailures++;\n\n\t\t\t\tif (failures < failuresToTolerate) {\n\t\t\t\t\tLOG.error(\"Restarting...\");\n\t\t\t\t} else {\n\t\t\t\t\tshutdown();\n\t\t\t\t}\n\t\t\t\texitCode = 1;\n\t\t\t}\n\t\t}\n\n\t\treturn exitCode;\n\t}\n\n\tprivate void assertThat(boolean condition, String message) throws Exception {\n\t\tif (!condition) {\n\t\t\tthrow new InvalidConfigurationException(message);\n\t\t}\n\t}\n\n\tprivate void validateConfig() throws InvalidConfigurationException {\n\t\ttry {\n\t\t\tassertThat(this.streamName != null, \"Must Specify a Stream Name\");\n\t\t\tassertThat(this.appName != null, \"Must Specify an Application Name\");\n\t\t} catch (Exception e) {\n\t\t\tthrow new InvalidConfigurationException(e.getMessage());\n\t\t}\n\t}\n\n\tpublic void configure() throws Exception {\n\t\tif (!isConfigured) {\n\t\t\tvalidateConfig();\n\n\t\t\tif (this.positionInStream != null) {\n\t\t\t\tstreamPosition = InitialPositionInStream\n\t\t\t\t\t\t.valueOf(this.positionInStream);\n\t\t\t} else {\n\t\t\t\tstreamPosition = InitialPositionInStream.LATEST;\n\t\t\t}\n\n\t\t\t// append the environment name to the application name\n\t\t\tif (environmentName != null) {\n\t\t\t\tappName = String.format(\"%s-%s\", appName, environmentName);\n\t\t\t}\n\n\t\t\t// ensure the JVM will refresh the cached IP values of AWS resources\n\t\t\t// (e.g. service endpoints).\n\t\t\tjava.security.Security\n\t\t\t\t\t.setProperty(\"networkaddress.cache.ttl\", \"60\");\n\n\t\t\tString workerId = NetworkInterface.getNetworkInterfaces() + \":\"\n\t\t\t\t\t+ UUID.randomUUID();\n\t\t\tLOG.info(\"Using Worker ID: \" + workerId);\n\n\t\t\t// obtain credentials using the default provider chain or the\n\t\t\t// credentials provider supplied\n\t\t\tAWSCredentialsProvider credentialsProvider = this.credentialsProvider == null ? new DefaultAWSCredentialsProviderChain()\n\t\t\t\t\t: this.credentialsProvider;\n\n\t\t\tLOG.info(\"Using credentials with Access Key ID: \"\n\t\t\t\t\t+ credentialsProvider.getCredentials().getAWSAccessKeyId());\n\n\t\t\tconfig = new KinesisClientLibConfiguration(appName, streamName,\n\t\t\t\t\tcredentialsProvider, workerId).withInitialPositionInStream(\n\t\t\t\t\tstreamPosition).withKinesisEndpoint(kinesisEndpoint);\n\n\t\t\tconfig.getKinesisClientConfiguration().setUserAgent(\n\t\t\t\t\tStreamAggregator.AWSApplication);\n\n\t\t\tif (regionName != null) {\n\t\t\t\tRegion region = Region.getRegion(Regions.fromName(regionName));\n\t\t\t\tconfig.withRegionName(region.getName());\n\t\t\t}\n\n\t\t\tif (maxRecords != -1)\n\t\t\t\tconfig.withMaxRecords(maxRecords);\n\n\t\t\t// initialise the Aggregators\n\t\t\taggGroup = buildAggregatorsFromConfig();\n\n\t\t\tLOG.info(String\n\t\t\t\t\t.format(\"Amazon Kinesis Aggregators Managed Client prepared for %s on %s in %s (%s) using %s Max Records\",\n\t\t\t\t\t\t\tconfig.getApplicationName(),\n\t\t\t\t\t\t\tconfig.getStreamName(), config.getRegionName(),\n\t\t\t\t\t\t\tconfig.getWorkerIdentifier(),\n\t\t\t\t\t\t\tconfig.getMaxRecords()));\n\n\t\t\tisConfigured = true;\n\t\t}\n\t}\n\n\tpublic AggregatorConsumer withKinesisEndpoint(String kinesisEndpoint) {\n\t\tthis.kinesisEndpoint = kinesisEndpoint;\n\t\treturn this;\n\t}\n\n\tpublic AggregatorConsumer withToleratedWorkerFailures(int failuresToTolerate) {\n\t\tthis.failuresToTolerate = failuresToTolerate;\n\t\treturn this;\n\t}\n\n\tpublic AggregatorConsumer withMaxRecords(int maxRecords) {\n\t\tthis.maxRecords = maxRecords;\n\t\treturn this;\n\t}\n\n\tpublic AggregatorConsumer withRegionName(String regionName) {\n\t\tthis.regionName = regionName;\n\t\treturn this;\n\t}\n\n\tpublic AggregatorConsumer withEnvironment(String environmentName) {\n\t\tthis.environmentName = environmentName;\n\t\treturn this;\n\t}\n\n\tpublic AggregatorConsumer withCredentialsProvider(\n\t\t\tAWSCredentialsProvider credentialsProvider) {\n\t\tthis.credentialsProvider = credentialsProvider;\n\t\treturn this;\n\t}\n\n\tpublic AggregatorConsumer withInitialPositionInStream(\n\t\t\tString positionInStream) {\n\t\tthis.positionInStream = positionInStream;\n\t\treturn this;\n\t}\n\n\tpublic AggregatorConsumer withMetricsEmitter() {\n\t\tthis.emitMetrics = true;\n\t\treturn this;\n\t}\n\n\tpublic AggregatorGroup getAggregators() {\n\t\treturn this.aggGroup;\n\t}\n\n\tpublic static void main(String[] args) throws Exception {\n\t\tString streamName = System\n\t\t\t\t.getProperty(AggregatorsConstants.STREAM_NAME_PARAM);\n\t\tString appName = System\n\t\t\t\t.getProperty(AggregatorsConstants.APP_NAME_PARAM);\n\t\tString configFilePath = System\n\t\t\t\t.getProperty(AggregatorsConstants.CONFIG_PATH_PARAM);\n\t\tString regionName = System\n\t\t\t\t.getProperty(AggregatorsConstants.REGION_PARAM);\n\t\tString failuresToTolerate = System\n\t\t\t\t.getProperty(AggregatorsConstants.FAILURES_TOLERATED_PARAM);\n\t\tString maxRecords = System\n\t\t\t\t.getProperty(AggregatorsConstants.MAX_RECORDS_PARAM);\n\t\tString environmentName = System\n\t\t\t\t.getProperty(AggregatorsConstants.ENVIRONMENT_PARAM);\n\t\tString positionInStream = System\n\t\t\t\t.getProperty(AggregatorsConstants.STREAM_POSITION_PARAM);\n\n\t\tAggregatorConsumer consumer = new AggregatorConsumer(streamName,\n\t\t\t\tappName, configFilePath);\n\n\t\t// add optional configuration items\n\t\tif (regionName != null && regionName != \"\") {\n\t\t\tconsumer.withRegionName(regionName);\n\t\t}\n\n\t\tif (failuresToTolerate != null && failuresToTolerate != \"\") {\n\t\t\tconsumer.withToleratedWorkerFailures(Integer\n\t\t\t\t\t.parseInt(failuresToTolerate));\n\t\t}\n\n\t\tif (maxRecords != null && maxRecords != \"\") {\n\t\t\tconsumer.withMaxRecords(Integer.parseInt(maxRecords));\n\t\t}\n\n\t\tif (environmentName != null && environmentName != \"\") {\n\t\t\tconsumer.withEnvironment(environmentName);\n\t\t}\n\n\t\tif (positionInStream != null && positionInStream != \"\") {\n\t\t\tconsumer.withInitialPositionInStream(positionInStream);\n\t\t}\n\n\t\tSystem.exit(consumer.run());\n\t}\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/datastore/AggregateAttributeModification.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.datastore;\n\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation;\n\npublic class AggregateAttributeModification {\n    private String attributeName, originatingValueName;\n\n    private Double oldValue, newValue, finalValue;\n\n    private SummaryCalculation calculationApplied;\n\n    private int writesSoFar;\n\n    private AggregateAttributeModification() {\n    }\n\n    public AggregateAttributeModification(String attributeName, String originatingValueName,\n            Double finalValue, SummaryCalculation calculationApplied) {\n        this(attributeName, originatingValueName, null, null, finalValue, calculationApplied, 0);\n    }\n\n    public AggregateAttributeModification(String attributeName, String originatingValueName,\n            Double finalValue, SummaryCalculation calculationApplied, int writesSoFar) {\n        this(attributeName, originatingValueName, null, null, finalValue, calculationApplied,\n                writesSoFar);\n    }\n\n    public AggregateAttributeModification(String attributeName, String originatingValueName,\n            Double oldValue, Double newValue, Double finalValue,\n            SummaryCalculation calculationApplied, int writesSoFar) {\n        this.attributeName = attributeName;\n        this.originatingValueName = originatingValueName;\n        this.oldValue = oldValue;\n        this.newValue = newValue;\n        this.finalValue = finalValue;\n        this.calculationApplied = calculationApplied;\n        this.writesSoFar = writesSoFar;\n    }\n\n    public String getAttributeName() {\n        return attributeName;\n    }\n\n    public String getOriginatingValueName() {\n        return originatingValueName;\n    }\n\n    public Double getOldValue() {\n        return oldValue;\n    }\n\n    public Double getNewValue() {\n        return newValue;\n    }\n\n    public Double getFinalValue() {\n        return finalValue;\n    }\n\n    public SummaryCalculation getCalculationApplied() {\n        return calculationApplied;\n    }\n\n    public int getWritesSoFar() {\n        return writesSoFar;\n    }\n\n    @Override\n    public String toString() {\n        return String.format(\n                \"Aggregate Attribute Modification - Originating Value Name: %s, Attribute Name: %s, Calculation Applied: %s, Old Value: %s, New Value: %s, Final Value: %s\",\n                this.originatingValueName, this.attributeName, this.calculationApplied.name(),\n                this.oldValue, this.newValue, this.finalValue);\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/datastore/DevNullDataStore.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.datastore;\n\nimport java.util.HashMap;\nimport java.util.Map;\n\nimport com.amazonaws.regions.Region;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.cache.UpdateKey;\nimport com.amazonaws.services.kinesis.aggregators.cache.UpdateValue;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation;\n\npublic class DevNullDataStore implements IDataStore {\n\n    @Override\n    public Map<UpdateKey, Map<String, AggregateAttributeModification>> write(\n            Map<UpdateKey, UpdateValue> data) throws Exception {\n        /*\n         * Simply return a remapped set of what the caller sent us - their\n         * values are the final values for this data store\n         */\n        Map<UpdateKey, Map<String, AggregateAttributeModification>> output = new HashMap<>();\n\n        for (UpdateKey key : data.keySet()) {\n            Map<String, AggregateAttributeModification> updates = new HashMap<>();\n\n            updates.put(StreamAggregator.EVENT_COUNT, new AggregateAttributeModification(\n                    StreamAggregator.EVENT_COUNT, StreamAggregator.EVENT_COUNT,\n                    data.get(key).getAggregateCount(), SummaryCalculation.SUM));\n\n            for (String value : data.get(key).getSummaryValues().keySet()) {\n                updates.put(value, data.get(key).getSummary(value));\n            }\n\n            output.put(key, updates);\n        }\n\n        return output;\n    }\n\n    @Override\n    public void initialise() throws Exception {\n    }\n\n    @Override\n    public long refreshForceCheckpointThresholds() throws Exception {\n        return 0;\n    }\n\n    @Override\n    public void setRegion(Region region) {\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/datastore/DynamoDataStore.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.datastore;\n\nimport java.util.ArrayList;\nimport java.util.Collection;\nimport java.util.Date;\nimport java.util.HashMap;\nimport java.util.List;\nimport java.util.Map;\nimport java.util.Random;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.auth.AWSCredentialsProvider;\nimport com.amazonaws.regions.Region;\nimport com.amazonaws.regions.Regions;\nimport com.amazonaws.services.dynamodbv2.AmazonDynamoDB;\nimport com.amazonaws.services.dynamodbv2.AmazonDynamoDBAsyncClient;\nimport com.amazonaws.services.dynamodbv2.model.AttributeAction;\nimport com.amazonaws.services.dynamodbv2.model.AttributeDefinition;\nimport com.amazonaws.services.dynamodbv2.model.AttributeValue;\nimport com.amazonaws.services.dynamodbv2.model.AttributeValueUpdate;\nimport com.amazonaws.services.dynamodbv2.model.ConditionalCheckFailedException;\nimport com.amazonaws.services.dynamodbv2.model.ExpectedAttributeValue;\nimport com.amazonaws.services.dynamodbv2.model.GlobalSecondaryIndex;\nimport com.amazonaws.services.dynamodbv2.model.KeySchemaElement;\nimport com.amazonaws.services.dynamodbv2.model.KeyType;\nimport com.amazonaws.services.dynamodbv2.model.Projection;\nimport com.amazonaws.services.dynamodbv2.model.ProjectionType;\nimport com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput;\nimport com.amazonaws.services.dynamodbv2.model.ReturnValue;\nimport com.amazonaws.services.dynamodbv2.model.ScanRequest;\nimport com.amazonaws.services.dynamodbv2.model.ScanResult;\nimport com.amazonaws.services.dynamodbv2.model.Select;\nimport com.amazonaws.services.dynamodbv2.model.UpdateItemRequest;\nimport com.amazonaws.services.dynamodbv2.model.UpdateItemResult;\nimport com.amazonaws.services.kinesis.AmazonKinesisClient;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorType;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils;\nimport com.amazonaws.services.kinesis.aggregators.cache.UpdateKey;\nimport com.amazonaws.services.kinesis.aggregators.cache.UpdateValue;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation;\nimport com.amazonaws.services.kinesis.model.ResourceNotFoundException;\n\npublic class DynamoDataStore implements IDataStore {\n    public enum DynamoSummaryUpdateMethod {\n        PUT(AttributeAction.PUT), ADD(AttributeAction.ADD), CONDITIONAL(null);\n        private AttributeAction action;\n\n        private DynamoSummaryUpdateMethod(AttributeAction a) {\n            this.action = a;\n        }\n\n        public AttributeAction getAction() {\n            return this.action;\n        }\n    }\n\n    /**\n     * The default amount of read IOPS to be provisioned, if the aggregator does\n     * not override.\n     */\n    public static final long DEFAULT_READ_CAPACITY = 10L;\n\n    /**\n     * The default amount of write IOPS to be provisioned, if the aggregator\n     * does not override.\n     */\n    public static final long DEFAULT_WRITE_CAPACITY = 10L;\n\n    private final Log LOG = LogFactory.getLog(DynamoDataStore.class);\n\n    private String environment, tableName, streamName;\n\n    private AggregatorType aggregatorType;\n\n    private boolean reportedStructure = false;\n\n    private AmazonDynamoDB dynamoClient;\n\n    private AmazonKinesisClient kinesisClient;\n\n    private long readCapacity = DEFAULT_READ_CAPACITY;\n\n    private long writeCapacity = DEFAULT_WRITE_CAPACITY;\n\n    private String labelAttribute, dateAttribute;\n\n    private boolean online = false;\n\n    private Region region = Region.getRegion(Regions.US_EAST_1);\n\n    public static final String SCATTER_PREFIX_ATTRIBUTE = \"scatterPrefix\";\n\n    public static final int SCATTER_WIDTH = 99;\n\n    private final Random r = new Random();\n\n    private DynamoQueryEngine queryEngine;\n\n    public DynamoDataStore(AmazonDynamoDB dynamoClient, AmazonKinesisClient kinesisClient,\n            AggregatorType aggregatorType, String streamName, String tableName,\n            String labelAttribute, String dateAttribute) {\n        this.dynamoClient = dynamoClient;\n        this.kinesisClient = kinesisClient;\n        this.aggregatorType = aggregatorType;\n        this.streamName = streamName;\n        this.tableName = tableName;\n        this.labelAttribute = labelAttribute;\n        this.dateAttribute = dateAttribute;\n    }\n\n    public DynamoDataStore(AWSCredentialsProvider credentials, AggregatorType aggregatorType,\n            String streamName, String tableName, String labelAttribute, String dateAttribute) {\n        this(new AmazonDynamoDBAsyncClient(credentials), new AmazonKinesisClient(credentials),\n                aggregatorType, streamName, tableName, labelAttribute, dateAttribute);\n    }\n\n    @Override\n    public void initialise() throws Exception {\n        if (!this.online) {\n            if (this.region != null) {\n                this.dynamoClient.setRegion(this.region);\n                if (this.streamName != null) {\n                    this.kinesisClient.setRegion(this.region);\n                }\n            }\n\n            initAggTable(this.labelAttribute, this.dateAttribute, this.readCapacity,\n                    this.writeCapacity);\n\n            this.queryEngine = new DynamoQueryEngine(this.dynamoClient, this.tableName,\n                    this.labelAttribute, this.dateAttribute);\n            this.online = true;\n        }\n    }\n\n    @Override\n    public Map<UpdateKey, Map<String, AggregateAttributeModification>> write(\n            Map<UpdateKey, UpdateValue> data) throws Exception {\n        UpdateItemRequest req = null;\n        UpdateItemResult result;\n        Map<String, AggregateAttributeModification> updatedValues;\n        Map<UpdateKey, Map<String, AggregateAttributeModification>> updatedData = new HashMap<>();\n\n        int conditionals = 0;\n\n        if (data != null && data.keySet().size() > 0) {\n            LOG.debug(String.format(\"Flushing %s Cache Updates\", data.size()));\n\n            // go through all pending updates and write down increments to event\n            // counts and SUM operations first, then do other types of\n            // calculations which need conditional updates after\n            for (final UpdateKey key1 : data.keySet()) {\n                // initialise the map of all updates made for final value\n                // processing\n                if (!updatedData.containsKey(key1)) {\n                    updatedValues = new HashMap<>();\n                } else {\n                    updatedValues = updatedData.get(key1);\n                }\n\n                Map<String, AttributeValueUpdate> updates = new HashMap<>();\n\n                updates.put(\n                        SCATTER_PREFIX_ATTRIBUTE,\n                        new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue(\n                                new AttributeValue().withN(\"\" + r.nextInt(SCATTER_WIDTH))));\n\n                // add the event count update to the list of updates to be made\n                updates.put(\n                        StreamAggregator.EVENT_COUNT,\n                        new AttributeValueUpdate().withAction(AttributeAction.ADD).withValue(\n                                new AttributeValue().withN(\"\" + data.get(key1).getAggregateCount())));\n\n                // add the time horizon type to the item\n                updates.put(\n                        StreamAggregator.TIME_HORIZON_ATTR,\n                        new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue(\n                                new AttributeValue().withS(key1.getTimeHorizon().getAbbrev())));\n\n                // add last update time and sequence\n                updates.put(\n                        StreamAggregator.LAST_WRITE_SEQ,\n                        new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue(\n                                new AttributeValue().withS(data.get(key1).getLastWriteSeq())));\n                updates.put(\n                        StreamAggregator.LAST_WRITE_TIME,\n                        new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue(\n                                new AttributeValue().withS(StreamAggregator.dateFormatter.format(new Date(\n                                        data.get(key1).getLastWriteTime())))));\n\n                if (this.aggregatorType.equals(AggregatorType.SUM)) {\n                    for (final String attribute : data.get(key1).getSummaryValues().keySet()) {\n                        final AggregateAttributeModification update = data.get(key1).getSummaryValues().get(\n                                attribute);\n\n                        if (!update.getCalculationApplied().getSummaryUpdateMethod().equals(\n                                DynamoSummaryUpdateMethod.CONDITIONAL)) {\n                            String setAttributeName = StreamAggregatorUtils.methodToColumn(attribute);\n\n                            updates.put(\n                                    setAttributeName,\n                                    new AttributeValueUpdate().withAction(\n                                            update.getCalculationApplied().getSummaryUpdateMethod().getAction()).withValue(\n                                            new AttributeValue().withN(\"\" + update.getFinalValue())));\n\n                            // add a stub entry so that we can extract the\n                            // updated value from the resultset\n                            updatedValues.put(setAttributeName, new AggregateAttributeModification(\n                                    update.getAttributeName(), update.getOriginatingValueName(),\n                                    null, update.getCalculationApplied()));\n                        }\n                    }\n                }\n\n                // do the update to all sum and count attributes as well\n                // as the last write sequence and time - this gives us a key to\n                // write other calculations onto\n                req = new UpdateItemRequest().withTableName(tableName).withKey(\n                        StreamAggregatorUtils.getTableKey(key1)).withAttributeUpdates(updates).withReturnValues(\n                        ReturnValue.UPDATED_NEW);\n                result = DynamoUtils.updateWithRetries(dynamoClient, req);\n\n                // add the event count to the modifications made\n                updatedValues.put(\n                        StreamAggregator.EVENT_COUNT,\n                        new AggregateAttributeModification(StreamAggregator.EVENT_COUNT,\n                                StreamAggregator.EVENT_COUNT,\n                                Double.parseDouble(result.getAttributes().get(\n                                        StreamAggregator.EVENT_COUNT).getN()),\n                                SummaryCalculation.SUM));\n\n                // extract all updated values processed by the previous update\n                for (String attribute : updatedValues.keySet()) {\n                    updatedValues.put(\n                            attribute,\n                            new AggregateAttributeModification(\n                                    updatedValues.get(attribute).getAttributeName(),\n                                    updatedValues.get(attribute).getOriginatingValueName(),\n                                    Double.parseDouble(result.getAttributes().get(attribute).getN()),\n                                    updatedValues.get(attribute).getCalculationApplied(),\n                                    updatedValues.get(attribute).getWritesSoFar() + 1));\n                }\n\n                // add all the updates for this key\n                updatedData.put(key1, updatedValues);\n\n                // log the structure of the table once, so the customer can\n                // retrieve it directly\n                if (!reportedStructure) {\n                    LOG.info(getTableStructure());\n                    reportedStructure = true;\n                }\n            }\n\n            // now process all non summing calculations which are conditional\n            // and\n            // require that the table keys already exist\n            if (this.aggregatorType.equals(AggregatorType.SUM)) {\n                for (final UpdateKey key2 : data.keySet()) {\n                    updatedValues = updatedData.get(key2);\n\n                    // we perform a single update for all SUM operations and the\n                    // count, last write sequence and time, and a\n                    // separate conditional update for every instance of MIN or\n                    // MAX\n                    // calculations as these must be conditionally applied to be\n                    // correct\n                    for (final String attribute : data.get(key2).getSummaryValues().keySet()) {\n                        final AggregateAttributeModification update = data.get(key2).getSummaryValues().get(\n                                attribute);\n\n                        if (update.getCalculationApplied().getSummaryUpdateMethod().equals(\n                                DynamoSummaryUpdateMethod.CONDITIONAL)) {\n                            conditionals++;\n                            result = updateConditionalValue(dynamoClient, tableName, key2,\n                                    attribute, update);\n\n                            // if the update was made by this conditional\n                            // update, then add its items to the update set\n                            Double finalValue = null;\n                            int increment = update.getWritesSoFar();\n                            if (result != null && result.getAttributes() != null) {\n                                finalValue = Double.parseDouble(result.getAttributes().get(\n                                        attribute).getN());\n                                increment++;\n                            }\n                            updatedValues.put(\n                                    attribute,\n                                    new AggregateAttributeModification(update.getAttributeName(),\n                                            update.getOriginatingValueName(), finalValue,\n                                            update.getCalculationApplied(), increment));\n\n                        }\n                    }\n\n                    // add the conditional update items into the overall update\n                    // set\n                    updatedData.put(key2, updatedValues);\n                }\n\n                LOG.debug(String.format(\"Processed %s Conditional Updates\", conditionals));\n            }\n        }\n\n        return updatedData;\n    }\n\n    public UpdateItemResult updateConditionalValue(final AmazonDynamoDB dynamoClient,\n            final String tableName, final UpdateKey key, final String attribute,\n            final AggregateAttributeModification update) throws Exception {\n        Map<String, AttributeValue> updateKey = StreamAggregatorUtils.getTableKey(key);\n        UpdateItemResult result;\n        final ReturnValue returnValue = ReturnValue.UPDATED_NEW;\n        final String setAttribute = StreamAggregatorUtils.methodToColumn(attribute);\n\n        // create the update that we want to write\n        final Map<String, AttributeValueUpdate> thisCalcUpdate = new HashMap<String, AttributeValueUpdate>() {\n            {\n                put(setAttribute,\n                        new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue(\n                                new AttributeValue().withN(\"\" + update.getFinalValue())));\n            }\n        };\n        // create the request\n        UpdateItemRequest req = new UpdateItemRequest().withTableName(tableName).withKey(updateKey).withReturnValues(\n                returnValue).withAttributeUpdates(thisCalcUpdate);\n\n        Map<String, ExpectedAttributeValue> expected = new HashMap<>();\n\n        final SummaryCalculation calc = update.getCalculationApplied();\n\n        // try an update to PUT the value if NOT EXISTS, to establish if we\n        // are the first writer for this key\n        expected = new HashMap<String, ExpectedAttributeValue>() {\n            {\n                put(setAttribute, new ExpectedAttributeValue().withExists(false));\n            }\n        };\n\n        req.setExpected(expected);\n\n        try {\n            result = DynamoUtils.updateWithRetries(dynamoClient, req);\n\n            // yay - we were the first writer, so our value was written\n            return result;\n        } catch (ConditionalCheckFailedException e1) {\n            // set the expected to the comparison contained in the update\n            // calculation\n            expected.clear();\n            expected.put(\n                    setAttribute,\n                    new ExpectedAttributeValue().withComparisonOperator(\n                            calc.getDynamoComparisonOperator()).withValue(\n                            new AttributeValue().withN(\"\" + update.getFinalValue())));\n            req.setExpected(expected);\n\n            // do the conditional update on the summary\n            // calculation. this may result in no update being\n            // applied because the new value is greater than the\n            // current minimum for MIN, or less than the current\n            // maximum for MAX.\n            try {\n                result = DynamoUtils.updateWithRetries(dynamoClient, req);\n\n                return result;\n            } catch (ConditionalCheckFailedException e2) {\n                // no worries - we just weren't the min or max!\n                return null;\n            }\n        }\n    }\n\n    /**\n     * Method which examines an table which backs an Aggregator, and returns a\n     * string value which represents the list of attributes in the table. This\n     * method assumes that all elements in an aggregate table are the same.\n     * \n     * @param dynamoClient Dynamo DB Client to use for connection to Dynamo DB.\n     * @param dynamoTable The Table to get the structure of.\n     * @return A String representation of the attribute names in the table.\n     * @throws Exception\n     */\n    public String getTableStructure() throws Exception {\n        List<String> columns = getDictionaryEntry();\n        StringBuffer sb = new StringBuffer();\n        for (String s : columns) {\n            sb.append(String.format(\"%s,\", s));\n        }\n        return String.format(\"Dynamo Table %s (%s)\", sb.toString().substring(0, sb.length() - 1),\n                this.tableName);\n    }\n\n    /**\n     * Generate a list of attribute names found in the Aggregator's dynamo\n     * table. Assumes that all Items in the Aggregator table are of the same\n     * structure.\n     * \n     * @param dynamoClient Dynamo DB Client to use for connection to Dynamo DB.\n     * @param dynamoTable The Dynamo Table for the Aggregator\n     * @return A list of attribute names from the Dynamo table\n     * @throws Exception\n     */\n    protected List<String> getDictionaryEntry() throws Exception {\n        // get a list of all columns in the table, with keys first\n        List<String> columns = new ArrayList<>();\n        List<KeySchemaElement> keys = dynamoClient.describeTable(this.tableName).getTable().getKeySchema();\n        for (KeySchemaElement key : keys) {\n            columns.add(key.getAttributeName());\n        }\n        ScanResult scan = dynamoClient.scan(new ScanRequest().withTableName(this.tableName).withSelect(\n                Select.ALL_ATTRIBUTES).withLimit(1));\n        List<Map<String, AttributeValue>> scannedItems = scan.getItems();\n        for (Map<String, AttributeValue> map : scannedItems) {\n            for (String s : map.keySet()) {\n                if (!columns.contains(s))\n                    columns.add(s);\n            }\n        }\n\n        return columns;\n    }\n\n    /*\n     * Configure the aggregate table with the indicated capacity, including\n     * global secondary index on last_write_seq for facilitating aggregate\n     * cleanup\n     */\n    public void initAggTable(final String keyColumn, final String dateColumnName,\n            final long readCapacity, final long writeCapacity) throws Exception {\n        final String setDateColumn = dateColumnName == null ? StreamAggregator.DEFAULT_DATE_VALUE\n                : dateColumnName;\n\n        long setReadCapacity = readCapacity == -1 ? DEFAULT_READ_CAPACITY : readCapacity;\n        long setWriteCapacity = writeCapacity == -1 ? DEFAULT_WRITE_CAPACITY : writeCapacity;\n\n        // we have to add this attribute list so that we can project the key\n        // into the GSI\n        List<AttributeDefinition> attributes = new ArrayList<AttributeDefinition>() {\n            {\n                add(new AttributeDefinition().withAttributeName(keyColumn).withAttributeType(\"S\"));\n                add(new AttributeDefinition().withAttributeName(setDateColumn).withAttributeType(\n                        \"S\"));\n            }\n        };\n\n        Collection<GlobalSecondaryIndex> gsi = new ArrayList<>();\n\n        // Global Secondary Index for accessing the table by date item\n        gsi.add(new GlobalSecondaryIndex().withIndexName(\n                StreamAggregatorUtils.getDateDimensionIndexName(tableName, setDateColumn)).withKeySchema(\n                new KeySchemaElement().withAttributeName(SCATTER_PREFIX_ATTRIBUTE).withKeyType(\n                        KeyType.HASH),\n                new KeySchemaElement().withAttributeName(setDateColumn).withKeyType(KeyType.RANGE)).withProjection(\n                new Projection().withProjectionType(ProjectionType.KEYS_ONLY)).withProvisionedThroughput(\n                new ProvisionedThroughput().withReadCapacityUnits(setReadCapacity).withWriteCapacityUnits(\n                        setWriteCapacity)));\n\n        attributes.add(new AttributeDefinition().withAttributeName(SCATTER_PREFIX_ATTRIBUTE).withAttributeType(\n                \"N\"));\n\n        // table is hash/range on value and date\n        List<KeySchemaElement> key = new ArrayList<KeySchemaElement>() {\n            {\n                add(new KeySchemaElement().withAttributeName(keyColumn).withKeyType(KeyType.HASH));\n                add(new KeySchemaElement().withAttributeName(setDateColumn).withKeyType(\n                        KeyType.RANGE));\n            }\n        };\n\n        // initialise the table\n        DynamoUtils.initTable(this.dynamoClient, this.tableName, setReadCapacity, setWriteCapacity,\n                attributes, key, gsi);\n    }\n\n    public long refreshForceCheckpointThresholds() {\n        LOG.info(\"Refreshing Provisioned Throughput settings\");\n\n        // get the current provisioned capacity\n        this.writeCapacity = getProvisionedWrites();\n\n        // get the current number of provisioned kinesis shards for the stream,\n        // if we know what stream we are working against\n        int currentShardCount = 1;\n        if (this.streamName != null) {\n            try {\n                currentShardCount = StreamAggregatorUtils.getShardCount(this.kinesisClient,\n                        this.streamName);\n                return (4 * (60 * this.writeCapacity)) / currentShardCount;\n            } catch (Exception e) {\n                LOG.warn(String.format(\n                        \"Unable to get Shard Count for Stream %s. Using Overly Optimistic Throughput Settings\",\n                        this.streamName));\n            }\n        }\n        return (4 * (60 * this.writeCapacity));\n    }\n\n    private long getProvisionedWrites() {\n        return dynamoClient.describeTable(this.tableName).getTable().getProvisionedThroughput().getWriteCapacityUnits();\n    }\n\n    public DynamoQueryEngine queryEngine() {\n        return this.queryEngine;\n    }\n\n    public Region getRegion() {\n        return this.region;\n    }\n\n    @Override\n    public void setRegion(Region region) {\n        this.region = region;\n    }\n\n    public DynamoDataStore withStorageCapacity(long readCapacity, long writeCapacity) {\n        if (readCapacity > 0l)\n            this.readCapacity = readCapacity;\n\n        if (writeCapacity > 0l)\n            this.writeCapacity = writeCapacity;\n\n        return this;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/datastore/DynamoQueryEngine.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.datastore;\n\nimport java.util.ArrayList;\nimport java.util.Collection;\nimport java.util.Date;\nimport java.util.HashMap;\nimport java.util.HashSet;\nimport java.util.List;\nimport java.util.Map;\nimport java.util.Set;\nimport java.util.concurrent.ExecutorService;\nimport java.util.concurrent.Executors;\nimport java.util.concurrent.Future;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.AmazonServiceException;\nimport com.amazonaws.services.dynamodbv2.AmazonDynamoDB;\nimport com.amazonaws.services.dynamodbv2.model.AttributeValue;\nimport com.amazonaws.services.dynamodbv2.model.BatchGetItemRequest;\nimport com.amazonaws.services.dynamodbv2.model.BatchGetItemResult;\nimport com.amazonaws.services.dynamodbv2.model.ComparisonOperator;\nimport com.amazonaws.services.dynamodbv2.model.Condition;\nimport com.amazonaws.services.dynamodbv2.model.GetItemRequest;\nimport com.amazonaws.services.dynamodbv2.model.KeysAndAttributes;\nimport com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputExceededException;\nimport com.amazonaws.services.dynamodbv2.model.QueryRequest;\nimport com.amazonaws.services.dynamodbv2.model.QueryResult;\nimport com.amazonaws.services.dynamodbv2.model.ScanRequest;\nimport com.amazonaws.services.dynamodbv2.model.ScanResult;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils;\nimport com.amazonaws.services.kinesis.aggregators.TableKeyStructure;\n\npublic class DynamoQueryEngine {\n\tprivate final Log LOG = LogFactory.getLog(DynamoQueryEngine.class);\n\n\tprivate AmazonDynamoDB dynamoClient;\n\n\tprivate String tableName, labelAttribute, dateAttribute;\n\n\tprotected final int BACKOFF_MILLIS = 10;\n\n\tpublic DynamoQueryEngine(AmazonDynamoDB dynamoClient, String tableName,\n\t\t\tString labelAttribute, String dateAttribute) {\n\t\tthis.dynamoClient = dynamoClient;\n\t\tthis.tableName = tableName;\n\t\tthis.labelAttribute = labelAttribute;\n\t\tthis.dateAttribute = dateAttribute;\n\t}\n\n\tpublic enum QueryKeyScope {\n\t\tHashKey, HashAndRangeKey;\n\t}\n\n\tpublic List<TableKeyStructure> parallelQueryKeys(QueryKeyScope scope,\n\t\t\tint threads) throws Exception {\n\t\tList<ParallelKeyScanWorker> workers = new ArrayList<>();\n\t\tCollection<Future<?>> workerStatus = new ArrayList<>();\n\t\tList<TableKeyStructure> output = new ArrayList<>();\n\t\tint totalResultsProcessed = 0;\n\n\t\t// set up the executor thread pool\n\t\tExecutorService executor = Executors.newFixedThreadPool(threads);\n\n\t\t// create workers for each segment that we need to do queries against\n\t\tfor (int i = 0; i < threads; i++) {\n\t\t\tParallelKeyScanWorker worker = new ParallelKeyScanWorker(\n\t\t\t\t\tthis.tableName, i, threads, scope, this.labelAttribute,\n\t\t\t\t\tthis.dateAttribute);\n\t\t\tworkers.add(worker);\n\t\t\tworkerStatus.add(executor.submit(worker));\n\t\t}\n\n\t\tfor (Future<?> f : workerStatus) {\n\t\t\tf.get();\n\t\t}\n\t\texecutor.shutdown();\n\n\t\tfor (ParallelKeyScanWorker w : workers) {\n\t\t\t// throw any exceptions the worker incurred\n\t\t\tw.throwExceptions();\n\n\t\t\tif (w.getResultCount() > 0) {\n\t\t\t\toutput.addAll(w.getOutput());\n\t\t\t}\n\n\t\t\ttotalResultsProcessed += w.getResultsProcessed();\n\t\t}\n\n\t\tLOG.info(String.format(\n\t\t\t\t\"Key Extraction Complete - Processed %s Key Items\",\n\t\t\t\ttotalResultsProcessed));\n\n\t\treturn output;\n\t}\n\n\tpublic List<Map<String, AttributeValue>> queryByKey(String label,\n\t\t\tDate dateValue, ComparisonOperator operator) throws Exception {\n\t\tif (dateValue != null && !operator.equals(ComparisonOperator.EQ)) {\n\t\t\tString dateAsString = StreamAggregator.dateFormatter\n\t\t\t\t\t.format(dateValue);\n\n\t\t\tLOG.info(String.format(\"Issuing Hash/Range Query for %s - %s\",\n\t\t\t\t\tlabel, dateAsString));\n\n\t\t\t// range query\n\t\t\tMap<String, Condition> keyConditions = new HashMap<>();\n\n\t\t\t// hash key\n\t\t\tCondition c = new Condition().withAttributeValueList(\n\t\t\t\t\tnew AttributeValue().withS(label)).withComparisonOperator(\n\t\t\t\t\tComparisonOperator.EQ);\n\t\t\tkeyConditions.put(this.labelAttribute, c);\n\n\t\t\t// range key\n\t\t\tc = new Condition().withAttributeValueList(\n\t\t\t\t\tnew AttributeValue().withS(dateAsString))\n\t\t\t\t\t.withComparisonOperator(operator);\n\t\t\tkeyConditions.put(this.dateAttribute, c);\n\n\t\t\tQueryRequest req = new QueryRequest().withTableName(this.tableName)\n\t\t\t\t\t.withKeyConditions(keyConditions);\n\n\t\t\treturn DynamoUtils\n\t\t\t\t\t.queryUntilDone(dynamoClient, req, BACKOFF_MILLIS);\n\t\t} else {\n\t\t\tif (dateValue == null) {\n\t\t\t\tLOG.info(String.format(\"Issuing Hash Key Only Query for %s\",\n\t\t\t\t\t\tlabel));\n\n\t\t\t\t// hash key only query\n\t\t\t\tMap<String, Condition> keyConditions = new HashMap<>();\n\t\t\t\tCondition c = new Condition().withAttributeValueList(\n\t\t\t\t\t\tnew AttributeValue().withS(label))\n\t\t\t\t\t\t.withComparisonOperator(ComparisonOperator.EQ);\n\t\t\t\tkeyConditions.put(this.labelAttribute, c);\n\t\t\t\tQueryRequest req = new QueryRequest().withTableName(\n\t\t\t\t\t\tthis.tableName).withKeyConditions(keyConditions);\n\n\t\t\t\treturn DynamoUtils.queryUntilDone(dynamoClient, req,\n\t\t\t\t\t\tBACKOFF_MILLIS);\n\t\t\t} else {\n\t\t\t\tString dateAsString = StreamAggregator.dateFormatter\n\t\t\t\t\t\t.format(dateValue);\n\n\t\t\t\tLOG.info(String.format(\n\t\t\t\t\t\t\"Performing exact Hash/Range Lookup for %s - %s\",\n\t\t\t\t\t\tlabel, dateAsString));\n\n\t\t\t\t// exact key lookup\n\t\t\t\tList<Map<String, AttributeValue>> output = new ArrayList<>();\n\t\t\t\tMap<String, AttributeValue> keyMap = new HashMap<>();\n\t\t\t\tkeyMap.put(this.labelAttribute,\n\t\t\t\t\t\tnew AttributeValue().withS(label));\n\t\t\t\tkeyMap.put(this.dateAttribute,\n\t\t\t\t\t\tnew AttributeValue().withS(dateAsString));\n\t\t\t\tGetItemRequest req = new GetItemRequest().withTableName(\n\t\t\t\t\t\tthis.tableName).withKey(keyMap);\n\t\t\t\toutput.add(this.dynamoClient.getItem(req).getItem());\n\t\t\t\treturn output;\n\t\t\t}\n\t\t}\n\t}\n\n\tprivate class ParallelKeyScanWorker implements Runnable {\n\t\tList<TableKeyStructure> output = new ArrayList<>();\n\n\t\tprivate String tableName, hashKey, rangeKey;\n\n\t\tprivate QueryKeyScope scope;\n\n\t\tprivate int workerInstance, threads;\n\n\t\tprivate int resultsProcessed = 0;\n\n\t\tprivate Exception exception;\n\n\t\tpublic ParallelKeyScanWorker(String tableName, int workerInstance,\n\t\t\t\tint threads, QueryKeyScope scope, String hashKey,\n\t\t\t\tString rangeKey) {\n\t\t\tthis.tableName = tableName;\n\t\t\tthis.workerInstance = workerInstance;\n\t\t\tthis.hashKey = hashKey;\n\t\t\tthis.rangeKey = rangeKey;\n\t\t\tthis.threads = threads;\n\t\t\tthis.scope = scope;\n\t\t}\n\n\t\tpublic int getResultCount() {\n\t\t\tif (this.output == null) {\n\t\t\t\treturn 0;\n\t\t\t} else {\n\t\t\t\treturn this.output.size();\n\t\t\t}\n\t\t}\n\n\t\tpublic int getResultsProcessed() {\n\t\t\treturn this.resultsProcessed;\n\t\t}\n\n\t\tpublic void throwExceptions() throws Exception {\n\t\t\tif (this.exception != null) {\n\t\t\t\tthrow this.exception;\n\t\t\t}\n\t\t}\n\n\t\t@Override\n\t\tpublic void run() {\n\t\t\tScanRequest scanRequest = new ScanRequest()\n\t\t\t\t\t.withTableName(this.tableName)\n\t\t\t\t\t.withAttributesToGet(this.hashKey)\n\t\t\t\t\t.withSegment(this.workerInstance)\n\t\t\t\t\t.withTotalSegments(threads);\n\t\t\tMap<String, Set<String>> deduplicated = new HashMap<>();\n\t\t\tSet<String> rangeValues = null;\n\t\t\tMap<String, AttributeValue> lastKeyEvaluated = null;\n\t\t\tint scanAttempts = 0;\n\t\t\tint limit = -1;\n\t\t\tboolean returnedResults = false;\n\t\t\tString lastLabel = null;\n\t\t\tint uniqueLabels = 0;\n\n\t\t\tdo {\n\t\t\t\tScanResult result = null;\n\n\t\t\t\t// set query limits, to optimise for skip scan or for hash/range\n\t\t\t\t// query with no limit\n\t\t\t\tif (this.scope.equals(QueryKeyScope.HashKey)) {\n\t\t\t\t\tif (uniqueLabels > 0 && uniqueLabels == resultsProcessed) {\n\t\t\t\t\t\t// remove the query limit if every row being returned is\n\t\t\t\t\t\t// unique\n\t\t\t\t\t\tlimit = -1;\n\t\t\t\t\t} else {\n\t\t\t\t\t\t// set a limit of twice the number of uniques, so we can\n\t\t\t\t\t\t// get a larger result set as we go\n\t\t\t\t\t\tif (uniqueLabels == 0) {\n\t\t\t\t\t\t\tlimit = 100;\n\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\tlimit = uniqueLabels * 2;\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\t// reset the unique labels so it doesn't grow without\n\t\t\t\t\t\t// limit\n\t\t\t\t\t\tuniqueLabels = 0;\n\t\t\t\t\t}\n\t\t\t\t} else {\n\t\t\t\t\tscanRequest.withAttributesToGet(this.rangeKey);\n\t\t\t\t}\n\n\t\t\t\tdo {\n\t\t\t\t\ttry {\n\t\t\t\t\t\t// set the limit if we have one\n\t\t\t\t\t\tif (limit != -1) {\n\t\t\t\t\t\t\tscanRequest.withLimit(limit);\n\t\t\t\t\t\t}\n\t\t\t\t\t\tresult = dynamoClient.scan(scanRequest\n\t\t\t\t\t\t\t\t.withExclusiveStartKey(lastKeyEvaluated));\n\n\t\t\t\t\t\tif (result.getItems().size() > 0) {\n\t\t\t\t\t\t\treturnedResults = true;\n\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\treturnedResults = false;\n\t\t\t\t\t\t}\n\t\t\t\t\t} catch (ProvisionedThroughputExceededException e) {\n\t\t\t\t\t\tLOG.warn(String\n\t\t\t\t\t\t\t\t.format(\"Provisioned Throughput Exceeded - Retry Attempt %s\",\n\t\t\t\t\t\t\t\t\t\tscanAttempts));\n\n\t\t\t\t\t\t// back off\n\t\t\t\t\t\ttry {\n\t\t\t\t\t\t\tThread.sleep(2 ^ scanAttempts * BACKOFF_MILLIS);\n\t\t\t\t\t\t} catch (InterruptedException interrupted) {\n\t\t\t\t\t\t\tthis.exception = interrupted;\n\t\t\t\t\t\t\treturn;\n\t\t\t\t\t\t}\n\t\t\t\t\t\tscanAttempts++;\n\t\t\t\t\t}\n\t\t\t\t} while (scanAttempts < 10 && result == null);\n\n\t\t\t\tif (result == null) {\n\t\t\t\t\tthis.exception = new Exception(String.format(\n\t\t\t\t\t\t\t\"Unable to execute Scan after %s attempts\",\n\t\t\t\t\t\t\tscanAttempts));\n\t\t\t\t\treturn;\n\t\t\t\t}\n\n\t\t\t\t// process the results, creating a deduplicated map/set of\n\t\t\t\t// hash/range keys\n\t\t\t\tString labelValue = null;\n\t\t\t\tif (returnedResults) {\n\t\t\t\t\tfor (Map<String, AttributeValue> map : result.getItems()) {\n\t\t\t\t\t\tresultsProcessed++;\n\n\t\t\t\t\t\tlabelValue = map.get(this.hashKey).getS();\n\n\t\t\t\t\t\t// only enter the label value into the hash once\n\t\t\t\t\t\tif (scope.equals(QueryKeyScope.HashKey)) {\n\t\t\t\t\t\t\tif (!labelValue.equals(lastLabel)\n\t\t\t\t\t\t\t\t\t|| lastLabel == null) {\n\t\t\t\t\t\t\t\tdeduplicated.put(labelValue, null);\n\t\t\t\t\t\t\t\tlastLabel = labelValue;\n\t\t\t\t\t\t\t\tuniqueLabels++;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\tif (deduplicated.containsKey(labelValue)) {\n\t\t\t\t\t\t\t\trangeValues = deduplicated.get(labelValue);\n\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\trangeValues = new HashSet<String>();\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\trangeValues.add(map.get(this.rangeKey).getS());\n\n\t\t\t\t\t\t\tdeduplicated.put(labelValue, rangeValues);\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\t// set the last evaluated key. if we have processed a bunch\n\t\t\t\t\t// of data and are not at the end of the result set, then\n\t\t\t\t\t// we'll force a skip forward on date, to eliminate\n\t\t\t\t\t// continued processing of high cardinality hash values\n\t\t\t\t\tif (this.scope.equals(QueryKeyScope.HashKey)\n\t\t\t\t\t\t\t&& result.getLastEvaluatedKey() != null) {\n\t\t\t\t\t\t// skip scan\n\t\t\t\t\t\tlastKeyEvaluated = new HashMap<>();\n\t\t\t\t\t\tlastKeyEvaluated.put(this.hashKey,\n\t\t\t\t\t\t\t\tnew AttributeValue().withS(labelValue));\n\t\t\t\t\t\tlastKeyEvaluated.put(this.rangeKey,\n\t\t\t\t\t\t\t\tnew AttributeValue()\n\t\t\t\t\t\t\t\t\t\t.withS(\"4000-01-01 00:00:00\"));\n\t\t\t\t\t} else {\n\t\t\t\t\t\tlastKeyEvaluated = result.getLastEvaluatedKey();\n\t\t\t\t\t}\n\t\t\t\t} else {\n\t\t\t\t\tlastKeyEvaluated = null;\n\t\t\t\t}\n\t\t\t} while (lastKeyEvaluated != null);\n\n\t\t\tif (this.scope.equals(QueryKeyScope.HashKey)) {\n\t\t\t\tLOG.debug(String.format(\"Worker %s extracted %s results\",\n\t\t\t\t\t\tthis.workerInstance, deduplicated.size()));\n\t\t\t} else {\n\t\t\t\tLOG.debug(String\n\t\t\t\t\t\t.format(\"Worker %s deduplicated %s results, creating distinct set of %s keys\",\n\t\t\t\t\t\t\t\tthis.workerInstance, resultsProcessed,\n\t\t\t\t\t\t\t\tdeduplicated.size()));\n\t\t\t}\n\n\t\t\tthis.output = new ArrayList<>();\n\t\t\tif (deduplicated.size() > 0) {\n\t\t\t\tfor (String s : deduplicated.keySet()) {\n\t\t\t\t\tTableKeyStructure t = new TableKeyStructure(this.hashKey,\n\t\t\t\t\t\t\ts, this.rangeKey);\n\n\t\t\t\t\tif (scope.equals(QueryKeyScope.HashAndRangeKey)) {\n\t\t\t\t\t\tfor (String rangeValue : deduplicated.get(s)) {\n\t\t\t\t\t\t\tt.withDateValue(rangeValue);\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\toutput.add(t);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tpublic List<TableKeyStructure> getOutput() {\n\t\t\treturn this.output;\n\t\t}\n\t}\n\n\tprivate class ParallelDateQueryWorker implements Runnable {\n\t\tprivate int start, range;\n\n\t\tprivate String tableName, indexName, labelAttribute, dateAttribute;\n\n\t\tprivate Map<String, Condition> conditions;\n\n\t\tprivate Exception exception;\n\n\t\tprivate Map<String, Set<String>> resultKeys = new HashMap<>();\n\n\t\tpublic void throwException() throws Exception {\n\t\t\tif (this.exception != null)\n\t\t\t\tthrow this.exception;\n\t\t}\n\n\t\tpublic ParallelDateQueryWorker(String tableName, String indexName,\n\t\t\t\tint start, int range, Map<String, Condition> conditions,\n\t\t\t\tString labelAttribute, String dateAttribute) {\n\t\t\tthis.tableName = tableName;\n\t\t\tthis.indexName = indexName;\n\t\t\tthis.start = start;\n\t\t\tthis.range = range;\n\t\t\tthis.conditions = conditions;\n\t\t\tthis.labelAttribute = labelAttribute;\n\t\t\tthis.dateAttribute = dateAttribute;\n\t\t}\n\n\t\t@Override\n\t\tpublic void run() {\n\t\t\tList<Map<String, AttributeValue>> results = new ArrayList<>();\n\n\t\t\tfor (int i = this.start; i < this.start + this.range; i++) {\n\t\t\t\tCondition c = new Condition().withComparisonOperator(\n\t\t\t\t\t\tComparisonOperator.EQ).withAttributeValueList(\n\t\t\t\t\t\tnew AttributeValue().withN(\"\" + i));\n\t\t\t\tthis.conditions\n\t\t\t\t\t\t.put(DynamoDataStore.SCATTER_PREFIX_ATTRIBUTE, c);\n\t\t\t\tQueryRequest req = new QueryRequest()\n\t\t\t\t\t\t.withIndexName(this.indexName)\n\t\t\t\t\t\t.withTableName(this.tableName)\n\t\t\t\t\t\t.withKeyConditions(this.conditions);\n\n\t\t\t\tMap<String, AttributeValue> lastKeyEvaluated = null;\n\t\t\t\tdo {\n\t\t\t\t\tint queryAttempts = 0;\n\t\t\t\t\tQueryResult result = null;\n\n\t\t\t\t\tdo {\n\t\t\t\t\t\ttry {\n\t\t\t\t\t\t\tresult = dynamoClient.query(req)\n\t\t\t\t\t\t\t\t\t.withLastEvaluatedKey(lastKeyEvaluated);\n\n\t\t\t\t\t\t\tresults.addAll(result.getItems());\n\t\t\t\t\t\t} catch (ProvisionedThroughputExceededException e) {\n\t\t\t\t\t\t\tLOG.warn(String\n\t\t\t\t\t\t\t\t\t.format(\"Provisioned Throughput Exceeded - Retry Attempt %s\",\n\t\t\t\t\t\t\t\t\t\t\tqueryAttempts));\n\n\t\t\t\t\t\t\ttry {\n\t\t\t\t\t\t\t\tThread.sleep(2 ^ queryAttempts * BACKOFF_MILLIS);\n\t\t\t\t\t\t\t} catch (InterruptedException interrupted) {\n\t\t\t\t\t\t\t\tthis.exception = interrupted;\n\t\t\t\t\t\t\t\treturn;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\tqueryAttempts++;\n\t\t\t\t\t\t}\n\t\t\t\t\t} while (queryAttempts < 10 && result == null);\n\n\t\t\t\t\tif (result == null) {\n\t\t\t\t\t\tthis.exception = new Exception(String.format(\n\t\t\t\t\t\t\t\t\"Unable to execute Query after %s attempts\",\n\t\t\t\t\t\t\t\tqueryAttempts));\n\t\t\t\t\t\treturn;\n\t\t\t\t\t}\n\n\t\t\t\t\tlastKeyEvaluated = result.getLastEvaluatedKey();\n\t\t\t\t} while (lastKeyEvaluated != null);\n\n\t\t\t\t// pivot the results into a list of label values and set of date\n\t\t\t\t// values\n\t\t\t\tString labelValue = null;\n\t\t\t\tString dateValue = null;\n\t\t\t\tSet<String> values;\n\n\t\t\t\tfor (Map<String, AttributeValue> map : results) {\n\t\t\t\t\t// process each attribute\n\t\t\t\t\tfor (String s : map.keySet()) {\n\t\t\t\t\t\t// grab the label and date values\n\t\t\t\t\t\tif (s.equals(this.labelAttribute)) {\n\t\t\t\t\t\t\tlabelValue = map.get(s).getS();\n\t\t\t\t\t\t} else if (s.equals(this.dateAttribute)) {\n\t\t\t\t\t\t\tdateValue = map.get(s).getS();\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\tif (labelValue != null && dateValue != null) {\n\t\t\t\t\t\t// get the current set of date values for the label, or\n\t\t\t\t\t\t// create a new one\n\t\t\t\t\t\tif (!resultKeys.containsKey(labelValue)) {\n\t\t\t\t\t\t\tvalues = new HashSet<>();\n\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\tvalues = resultKeys.get(labelValue);\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\t// add the current date value to the set of all date\n\t\t\t\t\t\t// values\n\t\t\t\t\t\t// fore label\n\t\t\t\t\t\tvalues.add(dateValue);\n\n\t\t\t\t\t\t// write back the map of label to date values\n\t\t\t\t\t\tresultKeys.put(labelValue, values);\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tpublic Map<String, Set<String>> getResultKeys() {\n\t\t\treturn this.resultKeys;\n\t\t}\n\t}\n\n\tprivate KeysAndAttributes convertResultKeys(\n\t\t\tMap<String, Set<String>> resultKeys) {\n\t\tKeysAndAttributes keys = new KeysAndAttributes();\n\n\t\tfor (final String s : resultKeys.keySet()) {\n\t\t\tfor (final String value : resultKeys.get(s)) {\n\t\t\t\tkeys.withKeys(new HashMap<String, AttributeValue>() {\n\t\t\t\t\t{\n\t\t\t\t\t\tput(labelAttribute, new AttributeValue().withS(s));\n\t\t\t\t\t\tput(dateAttribute, new AttributeValue().withS(value));\n\t\t\t\t\t}\n\t\t\t\t});\n\t\t\t}\n\t\t}\n\n\t\treturn keys;\n\t}\n\n\tprivate List<Map<String, AttributeValue>> batchGetDataByKeys(\n\t\t\tfinal String tableName, final KeysAndAttributes keys) {\n\t\tMap<String, KeysAndAttributes> requestMap = new HashMap<>();\n\t\tkeys.setConsistentRead(true);\n\t\trequestMap.put(tableName, keys);\n\n\t\tBatchGetItemResult result = null;\n\t\ttry {\n\t\t\tresult = dynamoClient.batchGetItem(new BatchGetItemRequest(\n\t\t\t\t\trequestMap));\n\t\t} catch (AmazonServiceException e) {\n\t\t\tLOG.error(e);\n\t\t\tthrow e;\n\t\t}\n\n\t\treturn result.getResponses().get(this.tableName);\n\t}\n\n\t@SuppressWarnings(\"unchecked\")\n\tpublic List<Map<String, AttributeValue>> parallelQueryDate(\n\t\t\tString onAttribute, Map<String, Condition> conditions, int threads)\n\t\t\tthrows Exception {\n\t\t// figure out the range of scatter prefix values we are going to assign\n\t\t// to each thread\n\t\tint range = (DynamoDataStore.SCATTER_WIDTH / threads) + 1;\n\t\tList<ParallelDateQueryWorker> workers = new ArrayList<>();\n\t\tCollection<Future<?>> workerStatus = new ArrayList<>();\n\t\tList<Map<String, AttributeValue>> output = new ArrayList<>();\n\n\t\t// set up the executor thread pool\n\t\tExecutorService executor = Executors.newFixedThreadPool(threads);\n\n\t\t// determine which index we should work with\n\t\tString indexName;\n\t\tif (onAttribute.equals(StreamAggregator.LAST_WRITE_SEQ)) {\n\t\t\tindexName = StreamAggregatorUtils\n\t\t\t\t\t.getLastWriteSeqIndexName(this.tableName);\n\t\t} else {\n\t\t\tindexName = StreamAggregatorUtils.getDateDimensionIndexName(\n\t\t\t\t\tthis.tableName, onAttribute);\n\t\t}\n\n\t\tStringBuilder conditionString = new StringBuilder();\n\t\tfor (String s : conditions.keySet()) {\n\t\t\tconditionString.append(String.format(\"%s %s %s,\", s, conditions\n\t\t\t\t\t.get(s).getComparisonOperator(), conditions.get(s)\n\t\t\t\t\t.getAttributeValueList().get(0)));\n\t\t}\n\n\t\tLOG.info(String.format(\n\t\t\t\t\"Querying %s with %s Threads on %s (Conditions: %s)\",\n\t\t\t\tindexName,\n\t\t\t\tthreads,\n\t\t\t\tonAttribute,\n\t\t\t\tconditionString.length() > 0 ? conditionString.substring(0,\n\t\t\t\t\t\tconditionString.length() - 1).toString() : \"None\"));\n\n\t\t// create workers for each segment that we need to do queries against\n\t\tfor (int i = 0; i < DynamoDataStore.SCATTER_WIDTH; i++) {\n\t\t\tif (i == 0 || i % range == 0) {\n\t\t\t\tParallelDateQueryWorker worker = new ParallelDateQueryWorker(\n\t\t\t\t\t\tthis.tableName, indexName, i, range, conditions,\n\t\t\t\t\t\tthis.labelAttribute, this.dateAttribute);\n\t\t\t\tworkers.add(worker);\n\t\t\t\tworkerStatus.add(executor.submit(worker));\n\t\t\t}\n\t\t}\n\t\tfor (Future<?> f : workerStatus) {\n\t\t\tf.get();\n\t\t}\n\t\texecutor.shutdown();\n\n\t\t// collect the results from the workers\n\t\tint outputCounter = 0;\n\n\t\tfor (ParallelDateQueryWorker w : workers) {\n\t\t\t// throw any exceptions that the worker handled\n\t\t\tw.throwException();\n\n\t\t\t// generate a set of KeysAndAttributes from the deduplicated output\n\t\t\t// map of table keys\n\t\t\tMap<String, Set<String>> workerKeys = w.getResultKeys();\n\t\t\tKeysAndAttributes k = convertResultKeys(workerKeys);\n\n\t\t\t// break the KeysAndAttributes up into batches of 25 and\n\t\t\t// query for them\n\t\t\tKeysAndAttributes queryKeys = new KeysAndAttributes();\n\t\t\tif (k != null && k.getKeys() != null) {\n\t\t\t\tfor (Map<String, AttributeValue> key : k.getKeys()) {\n\t\t\t\t\tqueryKeys.withKeys(key);\n\n\t\t\t\t\toutputCounter++;\n\n\t\t\t\t\tif (outputCounter % 25 == 0) {\n\t\t\t\t\t\toutput.addAll(batchGetDataByKeys(this.tableName,\n\t\t\t\t\t\t\t\tqueryKeys));\n\t\t\t\t\t\tqueryKeys = new KeysAndAttributes();\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\t// one final query for anything < mod(25)=0\n\t\t\t\tif (queryKeys.getKeys() != null\n\t\t\t\t\t\t&& queryKeys.getKeys().size() > 0) {\n\t\t\t\t\toutput.addAll(batchGetDataByKeys(this.tableName, queryKeys));\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\treturn output;\n\t}\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/datastore/DynamoUtils.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.datastore;\n\nimport java.util.ArrayList;\nimport java.util.Collection;\nimport java.util.HashMap;\nimport java.util.List;\nimport java.util.Map;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.auth.AWSCredentialsProvider;\nimport com.amazonaws.regions.Region;\nimport com.amazonaws.services.dynamodbv2.AmazonDynamoDB;\nimport com.amazonaws.services.dynamodbv2.AmazonDynamoDBAsyncClient;\nimport com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient;\nimport com.amazonaws.services.dynamodbv2.model.AttributeDefinition;\nimport com.amazonaws.services.dynamodbv2.model.AttributeValue;\nimport com.amazonaws.services.dynamodbv2.model.ConditionalCheckFailedException;\nimport com.amazonaws.services.dynamodbv2.model.CreateTableRequest;\nimport com.amazonaws.services.dynamodbv2.model.CreateTableResult;\nimport com.amazonaws.services.dynamodbv2.model.DescribeTableResult;\nimport com.amazonaws.services.dynamodbv2.model.GlobalSecondaryIndex;\nimport com.amazonaws.services.dynamodbv2.model.KeySchemaElement;\nimport com.amazonaws.services.dynamodbv2.model.KeyType;\nimport com.amazonaws.services.dynamodbv2.model.LimitExceededException;\nimport com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput;\nimport com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputExceededException;\nimport com.amazonaws.services.dynamodbv2.model.QueryRequest;\nimport com.amazonaws.services.dynamodbv2.model.QueryResult;\nimport com.amazonaws.services.dynamodbv2.model.ResourceInUseException;\nimport com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;\nimport com.amazonaws.services.dynamodbv2.model.ScanRequest;\nimport com.amazonaws.services.dynamodbv2.model.ScanResult;\nimport com.amazonaws.services.dynamodbv2.model.Select;\nimport com.amazonaws.services.dynamodbv2.model.TableStatus;\nimport com.amazonaws.services.dynamodbv2.model.UpdateItemRequest;\nimport com.amazonaws.services.dynamodbv2.model.UpdateItemResult;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\n\npublic class DynamoUtils {\n\tprivate static final Log LOG = LogFactory.getLog(DynamoUtils.class);\n\n\tprivate DynamoUtils() {\n\t}\n\n\t/**\n\t * Private interface for creating tables which handles any instances of\n\t * Throttling of the API\n\t * \n\t * @param dynamoClient\n\t * @param dynamoTable\n\t * @return\n\t * @throws Exception\n\t */\n\tpublic static CreateTableResult safeCreateTable(\n\t\t\tfinal AmazonDynamoDB dynamoClient,\n\t\t\tfinal CreateTableRequest createTableRequest) throws Exception {\n\t\tCreateTableResult res = null;\n\t\tfinal int tryMax = 10;\n\t\tint tries = 0;\n\t\twhile (true) {\n\t\t\ttry {\n\t\t\t\tres = dynamoClient.createTable(createTableRequest);\n\t\t\t\treturn res;\n\t\t\t} catch (LimitExceededException le) {\n\t\t\t\tif (tries < tryMax) {\n\t\t\t\t\t// back off for 1 second\n\t\t\t\t\tThread.sleep(1000);\n\t\t\t\t\ttries++;\n\t\t\t\t} else {\n\t\t\t\t\tthrow le;\n\t\t\t\t}\n\t\t\t} catch (ResourceInUseException rie) {\n\t\t\t\t// someone else is trying to create the table while we are, so\n\t\t\t\t// return ok\n\t\t\t\treturn null;\n\t\t\t}\n\t\t}\n\t}\n\n\t/**\n\t * Creates a table in Dynamo DB with the requested read and write capacity,\n\t * attributes, key schema and GSI's. This method will block until the table\n\t * is Active in Dynamo DB.\n\t * \n\t * @param dynamoClient\n\t *            Dynamo DB Client to use for connection to Dynamo DB.\n\t * @param dynamoTable\n\t *            The table name to create in Dynamo DB.\n\t * @param readCapacity\n\t *            The requested amount of read IOPS to be provisioned.\n\t * @param writeCapacity\n\t *            The requested amount of write IOPS to be provisioned.\n\t * @param attributes\n\t *            Attribute Names which must be indicated to create the key\n\t *            schema and/or GSI's.\n\t * @param keySchema\n\t *            The keys used for the primary key of the table.\n\t * @param gsi\n\t *            List of Global Secondary Indexes to be created on the table\n\t * @throws Exception\n\t */\n\tpublic static void initTable(final AmazonDynamoDB dynamoClient,\n\t\t\tfinal String dynamoTable, final long readCapacity,\n\t\t\tfinal long writeCapacity, List<AttributeDefinition> attributes,\n\t\t\tList<KeySchemaElement> keySchema,\n\t\t\tfinal Collection<GlobalSecondaryIndex> gsi) throws Exception {\n\t\ttry {\n\t\t\tDescribeTableResult res = safeDescribeTable(dynamoClient,\n\t\t\t\t\tdynamoTable);\n\n\t\t\tif (!res.getTable().getTableStatus().equals(\"ACTIVE\")) {\n\t\t\t\twaitForTableActive(dynamoClient, dynamoTable);\n\t\t\t}\n\t\t} catch (ResourceInUseException r) {\n\t\t\twaitForTableActive(dynamoClient, dynamoTable);\n\t\t} catch (ResourceNotFoundException e) {\n\t\t\tLOG.info(String\n\t\t\t\t\t.format(\"Table %s Not Found - Creating with %s Reads/sec & %s Writes/sec\",\n\t\t\t\t\t\t\tdynamoTable, readCapacity, writeCapacity));\n\n\t\t\tCreateTableRequest createTableRequest = new CreateTableRequest()\n\t\t\t\t\t.withTableName(dynamoTable)\n\t\t\t\t\t.withProvisionedThroughput(\n\t\t\t\t\t\t\tnew ProvisionedThroughput().withReadCapacityUnits(\n\t\t\t\t\t\t\t\t\treadCapacity).withWriteCapacityUnits(\n\t\t\t\t\t\t\t\t\twriteCapacity)).withKeySchema(keySchema)\n\t\t\t\t\t.withAttributeDefinitions(attributes);\n\n\t\t\tif (gsi != null)\n\t\t\t\tcreateTableRequest.withGlobalSecondaryIndexes(gsi);\n\n\t\t\t// create the table\n\t\t\ttry {\n\t\t\t\tsafeCreateTable(dynamoClient, createTableRequest);\n\t\t\t} catch (Exception ex) {\n\t\t\t\tLOG.error(ex);\n\t\t\t\tthrow e;\n\t\t\t}\n\n\t\t\t// wait for it to go to active state\n\t\t\twaitForTableActive(dynamoClient, dynamoTable);\n\t\t}\n\t}\n\n\t/**\n\t * Private interface for describing tables which handles any instances of\n\t * Throttling of the API\n\t * \n\t * @param dynamoClient\n\t * @param dynamoTable\n\t * @return\n\t * @throws Exception\n\t */\n\tpublic static DescribeTableResult safeDescribeTable(\n\t\t\tfinal AmazonDynamoDB dynamoClient, final String dynamoTable)\n\t\t\tthrows Exception {\n\t\tDescribeTableResult res = null;\n\t\tfinal int tryMax = 10;\n\t\tint tries = 0;\n\t\twhile (true) {\n\t\t\ttry {\n\t\t\t\tres = dynamoClient.describeTable(dynamoTable);\n\n\t\t\t\treturn res;\n\t\t\t} catch (ResourceNotFoundException e) {\n\t\t\t\tif (tries < tryMax) {\n\t\t\t\t\t// sleep for a short time as this is potentially an eventual\n\t\t\t\t\t// consistency issue with the table having been created ms\n\t\t\t\t\t// ago\n\t\t\t\t\tThread.sleep(10);\n\t\t\t\t\ttries++;\n\t\t\t\t} else {\n\t\t\t\t\tthrow e;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\t/**\n\t * Method which waits for a Dynamo table to enter status 'Active'.\n\t * \n\t * @param dynamoClient\n\t *            Dynamo DB Client to use for connection to Dynamo DB.\n\t * @param dynamoTable\n\t *            The Table in Dynamo.\n\t * @throws Exception\n\t */\n\tpublic static void waitForTableActive(final AmazonDynamoDB dynamoClient,\n\t\t\tfinal String dynamoTable) throws Exception {\n\t\twaitForTableState(dynamoClient, dynamoTable, TableStatus.ACTIVE);\n\t}\n\n\t/**\n\t * Interface which will block until a dynamo table reaches a specified\n\t * state. Also returns immediately if the object doesn't exist\n\t * \n\t * @param dynamoClient\n\t *            Dynamo DB Client to use for connection to Dynamo DB.\n\t * @param dynamoTable\n\t *            The table name to check.\n\t * @param status\n\t *            The status to wait for\n\t * @throws Exception\n\t */\n\tprivate static void waitForTableState(final AmazonDynamoDB dynamoClient,\n\t\t\tfinal String dynamoTable, TableStatus status) throws Exception {\n\t\tDescribeTableResult tableRequest = null;\n\t\twhile (true) {\n\t\t\ttry {\n\t\t\t\ttableRequest = dynamoClient.describeTable(dynamoTable);\n\t\t\t\tif (tableRequest.getTable().getTableStatus()\n\t\t\t\t\t\t.equals(status.name()))\n\t\t\t\t\tbreak;\n\n\t\t\t\tThread.sleep(1000);\n\t\t\t} catch (InterruptedException e) {\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t}\n\n\tpublic static void dropTable(final AmazonDynamoDB dynamoClient,\n\t\t\tfinal String dynamoTable) throws Exception {\n\t\tif (dynamoTable != null) {\n\t\t\tLOG.info(String.format(\"Dropping Dynamo Table %s\", dynamoTable));\n\t\t\ttry {\n\t\t\t\tdynamoClient.deleteTable(dynamoTable);\n\t\t\t\twaitForTableState(dynamoClient, dynamoTable,\n\t\t\t\t\t\tTableStatus.DELETING);\n\t\t\t} catch (ResourceNotFoundException e) {\n\t\t\t\tLOG.info(\"OK - Table Not Found\");\n\t\t\t}\n\t\t}\n\t}\n\n\tpublic static void cleanupAggTable(AWSCredentialsProvider credentials,\n\t\t\tRegion region, final String dynamoTable, final String toSeq)\n\t\t\tthrows Exception {\n\t\tfinal Double deleteBelow = Double.parseDouble(toSeq);\n\n\t\t// create two clients - one synchronous for the read of all candidate\n\t\t// values, and another for the delete operations\n\t\tfinal AmazonDynamoDB dynamoClient = new AmazonDynamoDBClient(\n\t\t\t\tcredentials);\n\t\tif (region != null)\n\t\t\tdynamoClient.setRegion(region);\n\t\tfinal AmazonDynamoDBAsyncClient deleteCli = new AmazonDynamoDBAsyncClient(\n\t\t\t\tcredentials);\n\t\tdeleteCli.setRegion(region);\n\t\tMap<String, AttributeValue> lastKey = null;\n\t\tMap<String, AttributeValue> deleteKey = null;\n\n\t\t// work out what the key and date column name is\n\t\tString keyColumn = null;\n\t\tString dateColumn = null;\n\n\t\tList<KeySchemaElement> keySchema = dynamoClient\n\t\t\t\t.describeTable(dynamoTable).getTable().getKeySchema();\n\t\tfor (KeySchemaElement element : keySchema) {\n\t\t\tif (element.getKeyType().equals(KeyType.HASH.name()))\n\t\t\t\tkeyColumn = element.getAttributeName();\n\n\t\t\tif (element.getKeyType().equals(KeyType.RANGE.name()))\n\t\t\t\tdateColumn = element.getAttributeName();\n\t\t}\n\n\t\tLOG.info(String.format(\n\t\t\t\t\"Deleting data from %s where %s values are below %s\",\n\t\t\t\tdynamoTable, StreamAggregator.LAST_WRITE_SEQ, deleteBelow));\n\t\tint deleteCount = 0;\n\n\t\tdo {\n\t\t\t// read data from the table\n\t\t\tScanRequest scan = new ScanRequest()\n\t\t\t\t\t.withTableName(dynamoTable)\n\t\t\t\t\t.withAttributesToGet(keyColumn, dateColumn,\n\t\t\t\t\t\t\tStreamAggregator.LAST_WRITE_SEQ)\n\t\t\t\t\t.withExclusiveStartKey(lastKey);\n\n\t\t\tScanResult results = dynamoClient.scan(scan);\n\n\t\t\t// delete everything up to the system provided change number\n\t\t\tfor (Map<String, AttributeValue> map : results.getItems()) {\n\t\t\t\tdeleteKey = new HashMap<>();\n\t\t\t\tdeleteKey.put(keyColumn, map.get(keyColumn));\n\t\t\t\tdeleteKey.put(dateColumn, map.get(dateColumn));\n\n\t\t\t\tif (Double.parseDouble(map.get(StreamAggregator.LAST_WRITE_SEQ)\n\t\t\t\t\t\t.getS()) < deleteBelow) {\n\t\t\t\t\tdeleteCli.deleteItem(dynamoTable, deleteKey);\n\t\t\t\t\tdeleteCount++;\n\t\t\t\t}\n\t\t\t}\n\t\t\tlastKey = results.getLastEvaluatedKey();\n\t\t} while (lastKey != null);\n\n\t\tLOG.info(String.format(\n\t\t\t\t\"Operation Complete - %s Records removed from Aggregate Store\",\n\t\t\t\tdeleteCount));\n\t}\n\n\tpublic static UpdateItemResult updateWithRetries(\n\t\t\tAmazonDynamoDB dynamoClient, UpdateItemRequest req)\n\t\t\tthrows Exception {\n\t\tfinal double initialBackoff = 2D;\n\t\tfinal int updateRetries = 10;\n\t\tfinal double backoffRatio = 1.2;\n\n\t\tdouble backoff = initialBackoff;\n\n\t\tUpdateItemResult res = null;\n\n\t\tfor (int i = 0; i < updateRetries; i++) {\n\t\t\ttry {\n\t\t\t\tres = dynamoClient.updateItem(req);\n\t\t\t\tbreak;\n\t\t\t} catch (ProvisionedThroughputExceededException ptee) {\n\t\t\t\tLOG.warn(String.format(\n\t\t\t\t\t\t\"Exceeded Provisioned Througput - Backing off for %s\",\n\t\t\t\t\t\tbackoff));\n\t\t\t\ttry {\n\t\t\t\t\tThread.sleep(new Double(backoff).longValue());\n\t\t\t\t} catch (InterruptedException e) {\n\t\t\t\t\te.printStackTrace();\n\t\t\t\t}\n\t\t\t\t// simple linear backoff\n\t\t\t\tbackoff = backoff * backoffRatio;\n\t\t\t} catch (ConditionalCheckFailedException ccfe) {\n\t\t\t\t// silently rethrow these exceptions as they are part of the\n\t\t\t\t// conditional update logic for MIN/MAX calculations\n\t\t\t\tthrow ccfe;\n\t\t\t} catch (Exception e) {\n\t\t\t\tLOG.warn(e);\n\t\t\t\tthrow e;\n\t\t\t}\n\t\t}\n\n\t\tif (res == null) {\n\t\t\tthrow new Exception(String.format(\n\t\t\t\t\t\"Unable to write after %s retries\", updateRetries));\n\t\t} else {\n\t\t\treturn res;\n\t\t}\n\t}\n\n\t/**\n\t * Method which examines an table which backs an Aggregator, and returns a\n\t * string value which represents the list of attributes in the table. This\n\t * method assumes that all elements in an aggregate table are the same.\n\t * \n\t * @param dynamoClient\n\t *            Dynamo DB Client to use for connection to Dynamo DB.\n\t * @param dynamoTable\n\t *            The Table to get the structure of.\n\t * @return A String representation of the attribute names in the table.\n\t * @throws Exception\n\t */\n\tpublic static String getDynamoTableStructure(AmazonDynamoDB dynamoClient,\n\t\t\tString dynamoTable) throws Exception {\n\t\tList<String> columns = getDictionaryEntry(dynamoClient, dynamoTable);\n\t\tStringBuffer sb = new StringBuffer();\n\t\tfor (String s : columns) {\n\t\t\tsb.append(String.format(\"%s,\", s));\n\t\t}\n\t\treturn String.format(\"Dynamo Table %s (%s)\",\n\t\t\t\tsb.toString().substring(0, sb.length() - 1), dynamoTable);\n\t}\n\n\t/**\n\t * Generate a list of attribute names found in the Aggregator's dynamo\n\t * table. Assumes that all Items in the Aggregator table are of the same\n\t * structure.\n\t * \n\t * @param dynamoClient\n\t *            Dynamo DB Client to use for connection to Dynamo DB.\n\t * @param dynamoTable\n\t *            The Dynamo Table for the Aggregator\n\t * @return A list of attribute names from the Dynamo table\n\t * @throws Exception\n\t */\n\tpublic static List<String> getDictionaryEntry(\n\t\t\tfinal AmazonDynamoDB dynamoClient, final String dynamoTable)\n\t\t\tthrows Exception {\n\t\t// get a list of all columns in the table, with keys first\n\t\tList<String> columns = new ArrayList<>();\n\t\tList<KeySchemaElement> keys = dynamoClient.describeTable(dynamoTable)\n\t\t\t\t.getTable().getKeySchema();\n\t\tfor (KeySchemaElement key : keys) {\n\t\t\tcolumns.add(key.getAttributeName());\n\t\t}\n\t\tScanResult scan = dynamoClient.scan(new ScanRequest()\n\t\t\t\t.withTableName(dynamoTable).withSelect(Select.ALL_ATTRIBUTES)\n\t\t\t\t.withLimit(1));\n\t\tList<Map<String, AttributeValue>> scannedItems = scan.getItems();\n\t\tfor (Map<String, AttributeValue> map : scannedItems) {\n\t\t\tfor (String s : map.keySet()) {\n\t\t\t\tif (!columns.contains(s))\n\t\t\t\t\tcolumns.add(s);\n\t\t\t}\n\t\t}\n\n\t\treturn columns;\n\t}\n\n\tpublic static List<Map<String, AttributeValue>> queryUntilDone(\n\t\t\tAmazonDynamoDB dynamoClient, QueryRequest qr, int backoffMillis)\n\t\t\tthrows Exception {\n\t\tList<Map<String, AttributeValue>> output = new ArrayList<>();\n\n\t\tMap<String, AttributeValue> lastKeyEvaluated = null;\n\t\tdo {\n\t\t\tint queryAttempts = 0;\n\t\t\tQueryResult result = null;\n\n\t\t\tdo {\n\t\t\t\ttry {\n\t\t\t\t\tresult = dynamoClient.query(qr).withLastEvaluatedKey(\n\t\t\t\t\t\t\tlastKeyEvaluated);\n\n\t\t\t\t\toutput.addAll(result.getItems());\n\t\t\t\t} catch (ProvisionedThroughputExceededException e) {\n\t\t\t\t\tLOG.warn(String\n\t\t\t\t\t\t\t.format(\"Provisioned Throughput Exceeded - Retry Attempt %s\",\n\t\t\t\t\t\t\t\t\tqueryAttempts));\n\n\t\t\t\t\tThread.sleep(2 ^ queryAttempts * backoffMillis);\n\n\t\t\t\t\tqueryAttempts++;\n\t\t\t\t}\n\t\t\t} while (queryAttempts < 10 && result == null);\n\n\t\t\tif (result == null) {\n\t\t\t\tthrow new Exception(String.format(\n\t\t\t\t\t\t\"Unable to execute Query after %s attempts\",\n\t\t\t\t\t\tqueryAttempts));\n\t\t\t}\n\n\t\t\tlastKeyEvaluated = result.getLastEvaluatedKey();\n\t\t} while (lastKeyEvaluated != null);\n\n\t\treturn output;\n\t}\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/datastore/IDataStore.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.datastore;\n\nimport java.util.Map;\n\nimport com.amazonaws.regions.Region;\nimport com.amazonaws.services.kinesis.aggregators.cache.UpdateKey;\nimport com.amazonaws.services.kinesis.aggregators.cache.UpdateValue;\n\n/**\n * Interface which is used to allow the in memory cached aggregates to be saved\n * to a persistent store\n */\npublic interface IDataStore {\n    /**\n     * Write a set of Update Key/Value pairs back to the backing store\n     * \n     * @param data The Input Dataset to be updated\n     * @return A data structure which maps a set of\n     *         AggregateAttributeModifications back to the values that were\n     *         affected on the underlying datastore, by UpdateKey\n     * @throws Exception\n     */\n    public Map<UpdateKey, Map<String, AggregateAttributeModification>> write(\n            Map<UpdateKey, UpdateValue> data) throws Exception;\n\n    /**\n     * Method called on creation of the IDataStore\n     * \n     * @throws Exception\n     */\n    public void initialise() throws Exception;\n\n    /**\n     * Method which will be periodically invoked to allow the IDataStore to\n     * refresh tolerated limits for how often write() should be called\n     * \n     * @return\n     * @throws Exception\n     */\n    public long refreshForceCheckpointThresholds() throws Exception;\n\n    /**\n     * Method called to set the region for the IDataStore\n     * \n     * @param region\n     */\n    public void setRegion(Region region);\n\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/exception/ClassNotAnnotatedException.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.exception;\n\n@SuppressWarnings(\"serial\")\npublic class ClassNotAnnotatedException extends Exception {\n    private String message;\n\n    public ClassNotAnnotatedException(Exception e) {\n        super(e);\n    }\n\n    public ClassNotAnnotatedException(String message, Exception e) {\n        super(message, e);\n    }\n\n    public ClassNotAnnotatedException(String message) {\n        super(message);\n    }\n\n    @Override\n    public String getMessage() {\n        return this.message;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/exception/InvalidConfigurationException.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.exception;\n\npublic class InvalidConfigurationException extends Exception {\n\n    public InvalidConfigurationException() {\n    }\n\n    public InvalidConfigurationException(String message) {\n        super(message);\n        // TODO Auto-generated constructor stub\n    }\n\n    public InvalidConfigurationException(Throwable cause) {\n        super(cause);\n    }\n\n    public InvalidConfigurationException(String message, Throwable cause) {\n        super(message, cause);\n    }\n\n    public InvalidConfigurationException(String message, Throwable cause,\n            boolean enableSuppression, boolean writableStackTrace) {\n        super(message, cause, enableSuppression, writableStackTrace);\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/exception/SerializationException.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.exception;\n\n@SuppressWarnings(\"serial\")\npublic class SerializationException extends Exception {\n    private String message;\n\n    public SerializationException(String message) {\n        super(message);\n    }\n\n    public SerializationException(Exception e) {\n        super(e);\n    }\n\n    public SerializationException(String message, Exception e) {\n        super(message, e);\n    }\n\n    @Override\n    public String getMessage() {\n        return this.message;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/exception/UnsupportedCalculationException.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.exception;\n\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation;\n\n/**\n * Exception thrown when a summary value is indicated that is not one of\n * {@link com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation}\n */\npublic class UnsupportedCalculationException extends Exception {\n    private String message;\n\n    public UnsupportedCalculationException(String message) {\n        super();\n        this.message = message;\n    }\n\n    @Override\n    public String getMessage() {\n        return this.message;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/factory/CSVAggregatorFactory.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.factory;\n\nimport java.util.Arrays;\nimport java.util.List;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregatorType;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.TimeHorizon;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation;\nimport com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration;\nimport com.amazonaws.services.kinesis.io.CsvDataExtractor;\nimport com.amazonaws.services.kinesis.io.StringDataExtractor;\n\n/**\n * Factory Class used for generating Aggregators which support CSV data on the\n * Kinesis Stream.\n */\npublic class CSVAggregatorFactory {\n    private CSVAggregatorFactory() {\n    }\n\n    /**\n     * Factory Method to generate a new Aggregator for CSV Data.\n     * \n     * @param streamName The name of the Stream to aggregate against.\n     * @param appName The application name to associate with the aggregator.\n     * @param config The Kinesis Configuration used for the containing worker.\n     * @param namespace The namespace to associate with the aggregated data.\n     * @param timeHorizon The time horizons on which to aggregate data.\n     * @param aggregatorType The type of aggregator to create.\n     * @param delimiter The character delimiter for data on the stream.\n     * @param labelIndicies The position of the field in the stream data which\n     *        should be used to aggregate data.\n     * @param dateIndex The position of the field which includes a date item\n     *        used to aggregate data by the timeHorizon. Values can be in String\n     *        format if dateFormat is supplied, or in epoch seconds.\n     * @param dateFormat The format of the date item, if provided as a String\n     * @param summaryIndicies The list of field positions, or expressions using\n     *        a {@link SummaryCalculation} against the field positions. For\n     *        example, simple summaries might have a list of '0,1,2' or when\n     *        expressions are used, a list of 'min(0),sum(1),max(2)'.\n     * @return Returns a new CSV Aggregator.\n     * @throws Exception\n     */\n    public static final StreamAggregator newInstance(String streamName, String appName,\n            KinesisClientLibConfiguration config, String namespace, TimeHorizon timeHorizon,\n            AggregatorType aggregatorType, String delimiter, List<Integer> labelIndicies,\n            String labelAttributeAlias, int dateIndex, String dateFormat, String dateAlias,\n            List<Object> summaryIndicies) throws Exception {\n        return newInstance(streamName, appName, config, namespace,\n                Arrays.asList(new TimeHorizon[] { timeHorizon }), aggregatorType, delimiter,\n                labelIndicies, labelAttributeAlias, dateIndex, dateFormat, dateAlias,\n                summaryIndicies);\n    }\n\n    public static final StreamAggregator newInstance(String streamName, String appName,\n            KinesisClientLibConfiguration config, String namespace, List<TimeHorizon> timeHorizons,\n            AggregatorType aggregatorType, String delimiter, List<Integer> labelIndicies,\n            String labelAttributeAlias, int dateIndex, String dateFormat, String dateAlias,\n            List<Object> summaryIndicies) throws Exception {\n        StringDataExtractor dataExtractor = new CsvDataExtractor(labelIndicies).withDelimiter(\n                delimiter).withDateValueIndex(dateIndex).withDateFormat(dateFormat).withSummaryIndicies(\n                summaryIndicies);\n        dataExtractor.setAggregatorType(aggregatorType);\n        if (labelAttributeAlias != null && !labelAttributeAlias.equals(\"\")) {\n            dataExtractor.withLabelAttributeAlias(labelAttributeAlias);\n        }\n        if (dateAlias != null && !dateAlias.equals(\"\")) {\n            dataExtractor.withDateAttributeAlias(dateAlias);\n        }\n        return new StreamAggregator(streamName, appName, namespace, config, dataExtractor).withTimeHorizon(\n                timeHorizons).withAggregatorType(aggregatorType);\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/factory/ExternallyConfiguredAggregatorFactory.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.factory;\n\nimport java.util.ArrayList;\nimport java.util.List;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregatorGroup;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.configuration.ExternalConfigurationModel;\nimport com.amazonaws.services.kinesis.aggregators.datastore.IDataStore;\nimport com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException;\nimport com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration;\nimport com.amazonaws.services.kinesis.io.CsvDataExtractor;\nimport com.amazonaws.services.kinesis.io.IDataExtractor;\nimport com.amazonaws.services.kinesis.io.JsonDataExtractor;\nimport com.amazonaws.services.kinesis.io.ObjectExtractor;\nimport com.amazonaws.services.kinesis.io.RegexDataExtractor;\n\npublic class ExternallyConfiguredAggregatorFactory {\n    private ExternallyConfiguredAggregatorFactory() {\n    }\n\n    private static List<Integer> intList(List<String> stringList) {\n        List<Integer> list = new ArrayList<>();\n\n        for (String s : stringList) {\n            list.add(Integer.parseInt(s));\n        }\n\n        return list;\n    }\n\n    public static AggregatorGroup buildFromConfig(String streamName, String applicationName,\n            KinesisClientLibConfiguration config, String configFile) throws Exception {\n        List<ExternalConfigurationModel> models = ExternalConfigurationModel.buildFromConfig(configFile);\n\n        if (models.size() == 0) {\n            throw new InvalidConfigurationException(String.format(\n                    \"Unable to build any Aggregators from External Configuration %s\", configFile));\n        }\n\n        AggregatorGroup aggregators = new AggregatorGroup();\n        StreamAggregator agg = null;\n        IDataExtractor dataExtractor = null;\n\n        // the configuration may have included many configuration models\n        for (ExternalConfigurationModel model : models) {\n            switch (model.getDataExtractor()) {\n                case CSV:\n                    CsvDataExtractor d = new CsvDataExtractor(intList(model.getLabelItems())).withDateValueIndex(\n                            Integer.parseInt(model.getDateItem())).withDelimiter(\n                            model.getDelimiter()).withItemTerminator(model.getItemTerminator()).withRegexFilter(\n                            model.getFilterRegex()).withDateFormat(model.getDateFormat()).withStringSummaryIndicies(\n                            model.getSummaryItems());\n\n                    if (model.getLabelAttributeAlias() != null) {\n                        d.withLabelAttributeAlias(model.getLabelAttributeAlias());\n                    }\n                    if (model.getDateAttributeAlias() != null) {\n                        d.withDateAttributeAlias(model.getDateAttributeAlias());\n                    }\n\n                    dataExtractor = d;\n                    break;\n                case REGEX:\n                    RegexDataExtractor e = new RegexDataExtractor(model.getRegularExpression(),\n                            intList(model.getLabelItems())).withItemTerminator(\n                            model.getItemTerminator()).withDateValueIndex(\n                            Integer.parseInt(model.getDateItem())).withDateFormat(\n                            model.getDateFormat()).withStringSummaryIndicies(\n                            model.getSummaryItems());\n                    if (model.getLabelAttributeAlias() != null) {\n                        e.withLabelAttributeAlias(model.getLabelAttributeAlias());\n                    }\n                    if (model.getDateAttributeAlias() != null) {\n                        e.withDateAttributeAlias(model.getDateAttributeAlias());\n                    }\n\n                    dataExtractor = e;\n                    break;\n                case JSON:\n                    dataExtractor = new JsonDataExtractor(model.getLabelItems())\n                            .withDateFormat(model.getDateFormat())\n                            .withDateValueAttribute(model.getDateItem())\n                            .withSummaryAttributes(model.getSummaryItems())\n                            .withItemTerminator(model.getItemTerminator())\n                            .withRegexFilter(model.getFilterRegex())\n                    ;\n                    break;\n                case OBJECT:\n                    ObjectExtractor extractor = null;\n                    if (model.isAnnotatedClass()) {\n                        extractor = new ObjectExtractor(model.getClazz());\n                    } else {\n                        extractor = new ObjectExtractor(model.getLabelItems(), model.getClazz());\n                    }\n\n                    extractor.withDateMethod(model.getDateItem()).withSummaryMethods(\n                            model.getSummaryItems());\n                    dataExtractor = extractor;\n                    break;\n\n            }\n\n            dataExtractor.setAggregatorType(model.getAggregatorType());\n\n            agg = new StreamAggregator(streamName, applicationName, model.getNamespace(), config,\n                    dataExtractor).withAggregatorType(model.getAggregatorType()).withStorageCapacity(\n                    model.getReadIOPs(), model.getWriteIOPs()).withTableName(model.getTableName()).withTimeHorizon(\n                    model.getTimeHorizons()).withRaiseExceptionOnDataExtractionErrors(\n                    model.shouldFailOnDataExtraction());\n\n            // configure metrics service on the aggregator if it's been\n            // configured\n            if (model.shouldEmitMetrics() || model.getMetricsEmitter() != null) {\n                if (model.getMetricsEmitter() != null) {\n                    agg.withMetricsEmitter(model.getMetricsEmitter().newInstance());\n                } else {\n                    agg.withCloudWatchMetrics();\n                }\n            }\n\n            // create a new instance of the Data Store if one has been\n            // configured. Currently we only support pluggable data stores that\n            // are configured via their environment or have self defined\n            // configuration models: only no args public constructors can be\n            // called\n            if (model.getDataStore() != null) {\n                agg.withDataStore((IDataStore) model.getDataStore().newInstance());\n            }\n\n            aggregators.registerAggregator(agg);\n        }\n\n        return aggregators;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/factory/JsonAggregatorFactory.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.factory;\n\nimport java.util.Arrays;\nimport java.util.List;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregatorType;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.TimeHorizon;\nimport com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration;\nimport com.amazonaws.services.kinesis.io.IDataExtractor;\nimport com.amazonaws.services.kinesis.io.JsonDataExtractor;\n\npublic class JsonAggregatorFactory {\n    private JsonAggregatorFactory() {\n    }\n\n    /**\n     * Creates an Aggregator for data that is formatted as JSON Strings on the\n     * Kinesis Stream.\n     * \n     * @param streamName The Stream Name that the Aggregator is receiving data\n     *        from.\n     * @param appName The Application Name that an Aggregator is part of.\n     * @param config The Kinesis Client Library Configuration to inherit\n     *        credentials and connectivity to the database from.\n     * @param namespace The namespace used to separate this Aggregator's output\n     *        data from other Aggregated data\n     * @param timeHorizon The Time Horizon value to use for the granularity of\n     *        the Aggregated data\n     * @param aggregatorType The type of Aggregator to create. Default is COUNT.\n     * @param labelAttributes The attribute name in the JSON document which\n     *        should be used as the label value for Aggregation\n     * @param dateAttribute The attribute name in the JSON document which should\n     *        be used for the time element of the Aggregation. If NULL then the\n     *        client receive time will be used.\n     * @param dateFormat The format of the dateAttribute, if String based dates\n     *        are used. This should follow {@link java.text.SimpleDateFormat}\n     *        convention.\n     * @param summaryAttributes List of attributes or expressions on attributes\n     *        which should be used for summary aggregation.\n     * @return A Stream Aggregator which can process JSON data containing the\n     *         indicated attributes.\n     * @throws Exception\n     */\n    public static final StreamAggregator newInstance(String streamName, String appName,\n            KinesisClientLibConfiguration config, String namespace, TimeHorizon timeHorizon,\n            AggregatorType aggregatorType, List<String> labelAttributes, String dateAttribute,\n            String dateFormat, List<String> summaryAttributes) throws Exception {\n        return newInstance(streamName, appName, config, namespace,\n                Arrays.asList(new TimeHorizon[] { timeHorizon }), aggregatorType, labelAttributes,\n                dateAttribute, dateFormat, summaryAttributes);\n    }\n\n    /**\n     * Creates an Aggregator for data that is formatted as JSON Strings on the\n     * Kinesis Stream.\n     * \n     * @param streamName The Stream Name that the Aggregator is receiving data\n     *        from.\n     * @param appName The Application Name that an Aggregator is part of.\n     * @param workerId The worker ID hosting the Aggregator.\n     * @param config The Kinesis Client Library Configuration to inherit\n     *        credentials and connectivity to the database from.\n     * @param namespace The namespace used to separate this Aggregator's output\n     *        data from other Aggregated data.\n     * @param timeHorizons The list of Time Horizon values to use the\n     *        aggregator. Data will be automatically managed at ALL of the\n     *        requested granularities using a prefixed namespace on dates.\n     * @param aggregatorType The type of Aggregator to create. Default is COUNT.\n     * @param labelAttributes The attribute name in the JSON document which\n     *        should be used as the label value for Aggregation.\n     * @param dateAttribute The attribute name in the JSON document which should\n     *        be used for the time element of the Aggregation. If NULL then the\n     *        client receive time will be used.\n     * @param dateFormat The format of the dateAttribute, if String based dates\n     *        are used. This should follow {@link java.text.SimpleDateFormat}\n     *        convention.\n     * @param summaryAttributes List of attributes or expressions on attributes\n     *        which should be used for summary aggregation.\n     * @return A Stream Aggregator which can process JSON data containing the\n     *         indicated attributes.\n     * @throws Exception\n     */\n    public static final StreamAggregator newInstance(String streamName, String appName,\n            KinesisClientLibConfiguration config, String namespace, List<TimeHorizon> timeHorizons,\n            AggregatorType aggregatorType, List<String> labelAttributes, String dateAttribute,\n            String dateFormat, List<String> summaryAttributes) throws Exception {\n        IDataExtractor dataExtractor = new JsonDataExtractor(labelAttributes).withDateValueAttribute(\n                dateAttribute).withSummaryAttributes(summaryAttributes).withDateFormat(dateFormat);\n        dataExtractor.setAggregatorType(aggregatorType);\n        return new StreamAggregator(streamName, appName, namespace, config, dataExtractor).withTimeHorizon(\n                timeHorizons).withAggregatorType(aggregatorType);\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/factory/ObjectAggregatorFactory.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.factory;\n\nimport java.util.Arrays;\nimport java.util.List;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregatorType;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.TimeHorizon;\nimport com.amazonaws.services.kinesis.aggregators.annotations.AnnotationProcessor;\nimport com.amazonaws.services.kinesis.aggregators.datastore.DynamoDataStore;\nimport com.amazonaws.services.kinesis.aggregators.datastore.IDataStore;\nimport com.amazonaws.services.kinesis.aggregators.metrics.CloudWatchMetricsEmitter;\nimport com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration;\nimport com.amazonaws.services.kinesis.io.IDataExtractor;\nimport com.amazonaws.services.kinesis.io.ObjectExtractor;\n\npublic class ObjectAggregatorFactory {\n    private ObjectAggregatorFactory() {\n    }\n\n    /**\n     * Create a new Aggregator for Object Serialised Data based upon a Class\n     * which is configured using Annotations from the base class.\n     * \n     * @param streamName The Stream Name that the Aggregator is receiving data\n     *        from.\n     * @param appName The Application Name that an Aggregator is part of.\n     * @param config The Kinesis Client Library Configuration to inherit\n     *        credentials and connectivity to the database from.\n     * @param clazz The annotated class to use for configuration of the\n     *        aggregator\n     * @return A Stream Aggregator which can process object serialised data\n     * @throws Exception\n     */\n    public static final StreamAggregator newInstance(String streamName, String appName,\n            KinesisClientLibConfiguration config, Class clazz) throws Exception {\n        AnnotationProcessor p = new AnnotationProcessor(clazz);\n        ObjectExtractor dataExtractor = new ObjectExtractor(p.getLabelMethodNames(), clazz).withDateMethod(p.getDateMethodName());\n\n        dataExtractor.withSummaryConfig(p.getSummaryConfig());\n        //dataExtractor.withSummaryMethods(new ArrayList<>(p.getSummaryMethods().keySet()));\n\n        StreamAggregator agg = new StreamAggregator(streamName, appName, p.getNamespace(), config,\n                dataExtractor).withTimeHorizon(p.getTimeHorizon()).withAggregatorType(p.getType()).withRaiseExceptionOnDataExtractionErrors(\n                p.shouldFailOnDataExtractionErrors());\n\n        // configure metrics service on the aggregator if it's been\n        // configured\n        if (p.shouldEmitMetrics()\n                || (p.getMetricsEmitter() != null && !p.getMetricsEmitter().equals(\n                        CloudWatchMetricsEmitter.class))) {\n            if (p.getMetricsEmitter() != null) {\n                agg.withMetricsEmitter(p.getMetricsEmitter().newInstance());\n            } else {\n                agg.withCloudWatchMetrics();\n            }\n        }\n\n        // create a new instance of the Data Store if one has been\n        // configured. Currently we only support pluggable data stores that\n        // are configured via their environment or have self defined\n        // configuration models: only no args public constructors can be\n        // called\n        if (p.getDataStore() != null && !p.getDataStore().equals(DynamoDataStore.class)) {\n            agg.withDataStore((IDataStore) p.getDataStore().newInstance());\n        }\n\n        return agg;\n    }\n\n    /**\n     * Create a new Aggregator for data which is object serialised on the stream\n     * using Jackson JSON Serialisation.\n     * \n     * @param streamName The Stream Name that the Aggregator is receiving data\n     *        from.\n     * @param appName The Application Name that an Aggregator is part of.\n     * @param config The Kinesis Client Library Configuration to inherit\n     *        credentials and connectivity to the database from.\n     * @param namespace The namespace used to separate this Aggregator's output\n     *        data from other Aggregated data\n     * @param timeHorizon The Time Horizon value to use for the granularity of\n     *        the Aggregated data\n     * @param aggregatorType The type of Aggregator to create. Default is COUNT.\n     * @param clazz The base class to use as a Transfer Object for the data\n     *        stream.\n     * @param labelMethods The method on the base class to use to obtain the\n     *        label for aggregation.\n     * @param dateMethod The method on the object which should be used to\n     *        establish the time. If NULL then the client receive time will be\n     *        used.\n     * @param summaryMethods List of summary method names or expressions to be\n     *        used when the AggregatorType is SUM, as secondary aggregated data\n     *        points\n     * @return A Stream Aggregator which can process object serialised data\n     * @throws Exception\n     */\n    public static final StreamAggregator newInstance(String streamName, String appName,\n            KinesisClientLibConfiguration config, String namespace, TimeHorizon timeHorizon,\n            AggregatorType aggregatorType, Class clazz, List<String> labelMethods,\n            String dateMethod, List<String> summaryMethods) throws Exception {\n        return newInstance(streamName, appName, config, namespace,\n                Arrays.asList(new TimeHorizon[] { timeHorizon }), aggregatorType, clazz,\n                labelMethods, dateMethod, summaryMethods);\n    }\n\n    /**\n     * Create a new Aggregator for data which is object serialised on the stream\n     * using Jackson JSON Serialisation.\n     * \n     * @param streamName The Stream Name that the Aggregator is receiving data\n     *        from.\n     * @param appName The Application Name that an Aggregator is part of.\n     * @param config The Kinesis Client Library Configuration to inherit\n     *        credentials and connectivity to the database from.\n     * @param namespace The namespace used to separate this Aggregator's output\n     *        data from other Aggregated data.\n     * @param timeHorizons The list of Time Horizon values to use the\n     *        aggregator. Data will be automatically managed at ALL of the\n     *        requested granularities using a prefixed namespace on dates.\n     * @param aggregatorType The type of Aggregator to create. Default is COUNT.\n     * @param clazz The base class to use as a Transfer Object for the data\n     *        stream.\n     * @param labelMethods The methods on the base class to use to obtain the\n     *        label for aggregation.\n     * @param dateMethod The method on the object which should be used to\n     *        establish the time. If NULL then the client receive time will be\n     *        used.\n     * @param summaryMethods List of summary method names or expressions to be\n     *        used when the AggregatorType is SUM, as secondary aggregated data\n     *        points.\n     * @return A Stream Aggregator which can process object serialised data.\n     * @return\n     * @throws Exception\n     */\n    public static final StreamAggregator newInstance(String streamName, String appName,\n            KinesisClientLibConfiguration config, String namespace, List<TimeHorizon> timeHorizons,\n            AggregatorType aggregatorType, Class clazz, List<String> labelMethods,\n            String dateMethod, List<String> summaryMethods) throws Exception {\n        IDataExtractor dataExtractor = new ObjectExtractor(labelMethods, clazz).withDateMethod(\n                dateMethod).withSummaryMethods(summaryMethods);\n        dataExtractor.setAggregatorType(aggregatorType);\n        return new StreamAggregator(streamName, appName, namespace, config, dataExtractor).withTimeHorizon(\n                timeHorizons).withAggregatorType(aggregatorType);\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/factory/RegexAggregatorFactory.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.factory;\n\nimport java.util.Arrays;\nimport java.util.List;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregatorType;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.TimeHorizon;\nimport com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration;\nimport com.amazonaws.services.kinesis.io.RegexDataExtractor;\nimport com.amazonaws.services.kinesis.io.StringDataExtractor;\n\n/**\n * Factory Class used for generating Aggregators which use Regular Expressions\n * to extract data from the Kinesis Stream.\n */\npublic class RegexAggregatorFactory {\n    private RegexAggregatorFactory() {\n    }\n\n    /**\n     * Factory Method which generates a Regular Expression based Aggregator for\n     * a number of Time Horizons\n     * \n     * @param streamName The name of the Stream to aggregate against.\n     * @param appName The application name to associate with the aggregator.\n     * @param config The Kinesis Configuration used for the containing worker.\n     * @param namespace The namespace to associate with the aggregated data.\n     * @param timeHorizon The time horizons on which to aggregate data.\n     * @param aggregatorType The type of aggregator to create.\n     * @param regularExpression The regular expression used to extract data from\n     *        the Kinesis Stream via Character Classes\n     * @param labelIndicies The index of the extracted data to be used as the\n     *        aggregation label\n     * @param dateIndex The index of the extracted data to be used as the time\n     *        value\n     * @param dateFormat The format of the data which represents the event time\n     *        when shipped as a String\n     * @param summaryIndicies The indicies or Summary Expressions on indicies\n     *        which contain summary values to be aggregated\n     * @return\n     * @throws Exception\n     */\n    public static final StreamAggregator newInstance(String streamName, String appName,\n            KinesisClientLibConfiguration config, String namespace, List<TimeHorizon> timeHorizons,\n            AggregatorType aggregatorType, String regularExpression, List<Integer> labelIndicies,\n            String labelAttributeAlias, int dateIndex, String dateFormat, String dateAlias,\n            List<Object> summaryIndicies) throws Exception {\n        StringDataExtractor dataExtractor = new RegexDataExtractor(regularExpression, labelIndicies).withDateValueIndex(\n                dateIndex).withDateFormat(dateFormat).withSummaryIndicies(summaryIndicies);\n        dataExtractor.setAggregatorType(aggregatorType);\n\n        if (labelAttributeAlias != null && !labelAttributeAlias.equals(\"\")) {\n            dataExtractor.withLabelAttributeAlias(labelAttributeAlias);\n        }\n        if (dateAlias != null && !dateAlias.equals(\"\")) {\n            dataExtractor.withDateAttributeAlias(dateAlias);\n        }\n        return new StreamAggregator(streamName, appName, namespace, config, dataExtractor).withTimeHorizon(\n                timeHorizons).withAggregatorType(aggregatorType);\n    }\n\n    /**\n     * Factory Method which generates a Regular Expression based Aggregator for\n     * a single Time Horizon\n     * \n     * @param streamName\n     * @param appName\n     * @param config\n     * @param namespace\n     * @param timeHorizon\n     * @param aggregatorType\n     * @param regularExpression\n     * @param labelIndicies\n     * @param dateIndex\n     * @param dateFormat\n     * @param summaryIndicies\n     * @return\n     * @throws Exception\n     */\n    public static final StreamAggregator newInstance(String streamName, String appName,\n            KinesisClientLibConfiguration config, String namespace, TimeHorizon timeHorizon,\n            AggregatorType aggregatorType, String regularExpression, List<Integer> labelIndicies,\n            String labelAttributeAlias, int dateIndex, String dateFormat, String dateAlias,\n            List<Object> summaryIndicies) throws Exception {\n        return newInstance(streamName, appName, config, namespace,\n                Arrays.asList(new TimeHorizon[] { timeHorizon }), aggregatorType,\n                regularExpression, labelIndicies, labelAttributeAlias, dateIndex, dateFormat,\n                dateAlias, summaryIndicies);\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/idempotency/DefaultIdempotencyCheck.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.idempotency;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregateData;\n\n/**\n * Default implementation of an Idempotency Check. Always returns True - that an\n * input element should be processed\n */\npublic class DefaultIdempotencyCheck implements IIdempotencyCheck {\n    public DefaultIdempotencyCheck() {\n    }\n\n    public boolean doProcess(String partitionKey, String sequenceNumber, AggregateData dataElement,\n            byte[] originalData) {\n        return true;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/idempotency/IIdempotencyCheck.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.idempotency;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregateData;\n\n/**\n * Interface which allows for the configuration of an Idempotency Check, which\n * will conditionally select whether a record should be processed\n */\npublic interface IIdempotencyCheck {\n    /**\n     * Should the input event be processed by the configured Aggregators?\n     * \n     * @param event The Deserialised and resolved data element\n     * @return True for process, False for don't\n     */\n    public boolean doProcess(String partitionKey, String sequenceNumber, AggregateData dataElement,\n            byte[] originalData);\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/metrics/CloudWatchMetricsEmitter.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.metrics;\n\nimport java.text.ParseException;\nimport java.util.ArrayList;\nimport java.util.Collection;\nimport java.util.Date;\nimport java.util.Map;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.AmazonServiceException;\nimport com.amazonaws.auth.AWSCredentialsProvider;\nimport com.amazonaws.regions.Region;\nimport com.amazonaws.services.cloudwatch.AmazonCloudWatchAsyncClient;\nimport com.amazonaws.services.cloudwatch.AmazonCloudWatchClient;\nimport com.amazonaws.services.cloudwatch.model.Dimension;\nimport com.amazonaws.services.cloudwatch.model.LimitExceededException;\nimport com.amazonaws.services.cloudwatch.model.MetricDatum;\nimport com.amazonaws.services.cloudwatch.model.PutMetricDataRequest;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.cache.UpdateKey;\nimport com.amazonaws.services.kinesis.aggregators.datastore.AggregateAttributeModification;\n\npublic class CloudWatchMetricsEmitter implements IMetricsEmitter {\n\tprivate final Log LOG = LogFactory.getLog(CloudWatchMetricsEmitter.class);\n\n\tprivate String metricsNamespace;\n\n\tprivate AmazonCloudWatchClient cloudWatchClient;\n\n\tprivate Region region;\n\n\tprivate static final int THROTTLING_RETRIES = 10;\n\n\tprivate static final int BACKOFF_MILLIS = 10;\n\n\tprivate static final int MAX_WRITE_ATTEMPTS = 10;\n\n\tpublic CloudWatchMetricsEmitter() {\n\t}\n\n\tpublic CloudWatchMetricsEmitter(String metricsNamespace,\n\t\t\tAWSCredentialsProvider credentials) {\n\t\tthis.metricsNamespace = metricsNamespace;\n\t\tthis.cloudWatchClient = new AmazonCloudWatchAsyncClient(credentials);\n\t}\n\n\t@Override\n\tpublic void emit(\n\t\t\tMap<UpdateKey, Map<String, AggregateAttributeModification>> metricData)\n\t\t\tthrows Exception {\n\t\tif (metricData != null) {\n\t\t\tDate metricDate = null;\n\n\t\t\tfor (UpdateKey key : metricData.keySet()) {\n\t\t\t\tPutMetricDataRequest req = new PutMetricDataRequest()\n\t\t\t\t\t\t.withNamespace(this.metricsNamespace);\n\t\t\t\tCollection<MetricDatum> data = new ArrayList<>();\n\n\t\t\t\tif (key.getDateValue().equals(\"*\")) {\n\t\t\t\t\tLOG.debug(\"Not Emitting Cloudwatch Metrics for Time Horizon FOREVER\");\n\t\t\t\t\treturn;\n\t\t\t\t} else {\n\t\t\t\t\ttry {\n\t\t\t\t\t\tmetricDate = key.getDateValueAsDate();\n\t\t\t\t\t} catch (ParseException pe) {\n\t\t\t\t\t\tLOG.error(String.format(\n\t\t\t\t\t\t\t\t\"Unable to Parse Date Value %s\",\n\t\t\t\t\t\t\t\tkey.getDateValue()));\n\t\t\t\t\t\treturn;\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\t// send in every update as a datum\n\t\t\t\tfor (String summary : metricData.get(key).keySet()) {\n\t\t\t\t\tfinal AggregateAttributeModification mod = metricData.get(\n\t\t\t\t\t\t\tkey).get(summary);\n\t\t\t\t\t// TODO Handle that we've been sent an update for which a\n\t\t\t\t\t// new final value which might not have been set. This\n\t\t\t\t\t// means, for example, that on an hourly aggregate of FIRST,\n\t\t\t\t\t// we'd get a single modification at the beginning of the\n\t\t\t\t\t// hour, and then not again after\n\t\t\t\t\tif (mod.getFinalValue() != null) {\n\t\t\t\t\t\tdata.add(new MetricDatum()\n\t\t\t\t\t\t\t\t.withMetricName(mod.getOriginatingValueName())\n\t\t\t\t\t\t\t\t.withTimestamp(metricDate)\n\t\t\t\t\t\t\t\t.withDimensions(\n\t\t\t\t\t\t\t\t\t\tnew Dimension()\n\t\t\t\t\t\t\t\t\t\t\t\t.withName(\"Calculation\")\n\t\t\t\t\t\t\t\t\t\t\t\t.withValue(\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tmod.getCalculationApplied()\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t.name()),\n\t\t\t\t\t\t\t\t\t\tnew Dimension()\n\t\t\t\t\t\t\t\t\t\t\t\t.withName(\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tkey.getAggregateColumnName())\n\t\t\t\t\t\t\t\t\t\t\t\t.withValue(\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tkey.getAggregatedValue()))\n\t\t\t\t\t\t\t\t.withValue(mod.getFinalValue()));\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tboolean success = false;\n\t\t\t\tint iterations = 0;\n\t\t\t\tint backoffMillis = BACKOFF_MILLIS;\n\t\t\t\twhile (!success && iterations < MAX_WRITE_ATTEMPTS) {\n\t\t\t\t\titerations++;\n\t\t\t\t\tboolean backoff = false;\n\t\t\t\t\ttry {\n\t\t\t\t\t\tcloudWatchClient\n\t\t\t\t\t\t\t\t.putMetricData(req.withMetricData(data));\n\t\t\t\t\t\tsuccess = true;\n\t\t\t\t\t} catch (LimitExceededException e) {\n\t\t\t\t\t\tbackoff = true;\n\t\t\t\t\t} catch (AmazonServiceException ase) {\n\t\t\t\t\t\tif (ase.getErrorCode().startsWith(\"Throttling\")) {\n\t\t\t\t\t\t\tbackoff = true;\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\tif (backoff) {\n\t\t\t\t\t\tLOG.warn(\"CloudWatch Limit Exceeded - backing off\");\n\t\t\t\t\t\tThread.sleep(2 ^ iterations * BACKOFF_MILLIS);\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tif (!success) {\n\t\t\t\t\tthrow new MetricsEmitterThrottledException(\n\t\t\t\t\t\t\tString.format(\n\t\t\t\t\t\t\t\t\t\"CloudWatch Metrics Emitter failed to write metrics after %s attempts\",\n\t\t\t\t\t\t\t\t\tMAX_WRITE_ATTEMPTS));\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\t@Override\n\tpublic void setRegion(Region region) {\n\t\tthis.region = region;\n\t\tthis.cloudWatchClient.setRegion(region);\n\t}\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/metrics/IMetricsEmitter.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.metrics;\n\nimport java.util.Map;\n\nimport com.amazonaws.regions.Region;\nimport com.amazonaws.services.kinesis.aggregators.cache.UpdateKey;\nimport com.amazonaws.services.kinesis.aggregators.datastore.AggregateAttributeModification;\n\n/**\n * Interface for providing classes which can write to metrics services. It\n * receives the output of the IDataStore modifications, and applies the data to\n * the metrics service\n */\npublic interface IMetricsEmitter {\n    /**\n     * Emit a new set of metrics to the metrics service\n     * \n     * @param metricData Input Data to be intrumented\n     * @throws Exception\n     */\n    public void emit(Map<UpdateKey, Map<String, AggregateAttributeModification>> metricData)\n            throws Exception;\n\n    /**\n     * Method called to indicate the Region of the metrics service\n     * \n     * @param region\n     */\n    public void setRegion(Region region);\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/metrics/MetricsEmitterThrottledException.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.metrics;\n\npublic class MetricsEmitterThrottledException extends Exception {\n    public MetricsEmitterThrottledException() {\n        super();\n    }\n\n    public MetricsEmitterThrottledException(String message) {\n        super(message);\n    }\n\n    public MetricsEmitterThrottledException(Exception e) {\n        super(e);\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/processor/AggregatorProcessor.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.processor;\n\nimport java.util.List;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.services.kinesis.aggregators.IStreamAggregator;\nimport com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException;\nimport com.amazonaws.services.kinesis.clientlibrary.exceptions.ShutdownException;\nimport com.amazonaws.services.kinesis.clientlibrary.exceptions.ThrottlingException;\nimport com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor;\nimport com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer;\nimport com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason;\nimport com.amazonaws.services.kinesis.model.Record;\n\n/**\n * Container IRecordProcessor Application which can be used as a standalone KCL\n * application. Simply build an Aggregator using a Factory method or direct\n * configuration, and then create the IRecordProcessor within your KCL\n * application.\n */\npublic class AggregatorProcessor implements IRecordProcessor {\n\tprivate static final Log LOG = LogFactory.getLog(AggregatorProcessor.class);\n\n\tprivate final int NUM_RETRIES = 10;\n\n\tprivate final long BACKOFF_TIME_IN_MILLIS = 100L;\n\n\tprivate String kinesisShardId;\n\n\tprivate IStreamAggregator agg;\n\n\tpublic AggregatorProcessor(IStreamAggregator agg) {\n\t\tsuper();\n\t\tthis.agg = agg;\n\t}\n\n\t/**\n\t * {@inheritDoc}\n\t */\n\t@Override\n\tpublic void initialize(String shardId) {\n\t\tLOG.info(\"Initializing AggregatorProcessor for Shard: \" + shardId);\n\t\tthis.kinesisShardId = shardId;\n\t\ttry {\n\t\t\tthis.agg.initialize(shardId);\n\t\t} catch (Exception e) {\n\t\t\te.printStackTrace();\n\t\t}\n\t}\n\n\t/**\n\t * {@inheritDoc}\n\t */\n\t@Override\n\tpublic void processRecords(List<Record> records,\n\t\t\tIRecordProcessorCheckpointer checkpointer) {\n\t\tLOG.info(\"Aggregating \" + records.size()\n\t\t\t\t+ \" records for Kinesis Shard \" + kinesisShardId);\n\t\ttry {\n\t\t\t// run data into the aggregator\n\t\t\tagg.aggregate(records);\n\n\t\t\t// checkpoint the aggregator and kcl\n\t\t\tagg.checkpoint();\n\t\t\tcheckpointer.checkpoint(records.get(records.size() - 1));\n\n\t\t\tLOG.debug(\"Kinesis Checkpoint for Shard \" + kinesisShardId\n\t\t\t\t\t+ \" Complete\");\n\t\t} catch (Exception e) {\n\t\t\te.printStackTrace();\n\t\t\tLOG.error(e);\n\t\t\tshutdown(checkpointer, ShutdownReason.ZOMBIE);\n\t\t}\n\t}\n\n\t/**\n\t * {@inheritDoc}\n\t */\n\t@Override\n\tpublic void shutdown(IRecordProcessorCheckpointer checkpointer,\n\t\t\tShutdownReason reason) {\n\t\tLOG.info(\"Shutting down record processor for shard: \" + kinesisShardId);\n\n\t\t// Important to checkpoint after reaching end of shard, so we can start\n\t\t// processing data from child shards.\n\t\tif (reason == ShutdownReason.TERMINATE) {\n\t\t\ttry {\n\t\t\t\tagg.shutdown(true);\n\t\t\t\tcheckpoint(checkpointer);\n\t\t\t} catch (Exception e) {\n\t\t\t\te.printStackTrace();\n\t\t\t}\n\t\t} else {\n\t\t\t// shutdown the aggregator without flushing state\n\t\t\ttry {\n\t\t\t\tagg.shutdown(false);\n\t\t\t} catch (Exception e) {\n\t\t\t\te.printStackTrace();\n\t\t\t}\n\t\t}\n\t}\n\n\t/**\n\t * Checkpoint with retries.\n\t * \n\t * @param checkpointer\n\t */\n\tprivate void checkpoint(IRecordProcessorCheckpointer checkpointer) {\n\t\tLOG.info(\"Checkpointing shard \" + kinesisShardId);\n\t\tfor (int i = 0; i < NUM_RETRIES; i++) {\n\t\t\ttry {\n\t\t\t\tcheckpointer.checkpoint();\n\t\t\t\tbreak;\n\t\t\t} catch (ShutdownException se) {\n\t\t\t\t// Ignore checkpoint if the processor instance has been shutdown\n\t\t\t\t// (fail over).\n\t\t\t\tLOG.info(\"Caught shutdown exception, skipping checkpoint.\", se);\n\t\t\t\tbreak;\n\t\t\t} catch (ThrottlingException e) {\n\t\t\t\t// Backoff and re-attempt checkpoint upon transient failures\n\t\t\t\tif (i >= (NUM_RETRIES - 1)) {\n\t\t\t\t\tLOG.error(\"Checkpoint failed after \" + (i + 1)\n\t\t\t\t\t\t\t+ \"attempts.\", e);\n\t\t\t\t\tbreak;\n\t\t\t\t} else {\n\t\t\t\t\tLOG.info(\"Transient issue when checkpointing - attempt \"\n\t\t\t\t\t\t\t+ (i + 1) + \" of \" + NUM_RETRIES, e);\n\t\t\t\t}\n\t\t\t} catch (InvalidStateException e) {\n\t\t\t\t// This indicates an issue with the DynamoDB table (check for\n\t\t\t\t// table, provisioned IOPS).\n\t\t\t\tLOG.error(\n\t\t\t\t\t\t\"Cannot save checkpoint to the DynamoDB table used by the KinesisClientLibrary.\",\n\t\t\t\t\t\te);\n\t\t\t\tbreak;\n\t\t\t}\n\t\t\ttry {\n\t\t\t\tThread.sleep(BACKOFF_TIME_IN_MILLIS);\n\t\t\t} catch (InterruptedException e) {\n\t\t\t\tLOG.debug(\"Interrupted sleep\", e);\n\t\t\t}\n\t\t}\n\t}\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/processor/AggregatorProcessorFactory.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.processor;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregatorGroup;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor;\nimport com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory;\n\n/**\n * Simple factory class to generate a standalone Kinesis Aggregator\n * IRecordProcessor for the application\n */\npublic class AggregatorProcessorFactory implements IRecordProcessorFactory {\n    private AggregatorGroup aggregators;\n\n    private final Log LOG = LogFactory.getLog(AggregatorProcessorFactory.class);\n\n    private AggregatorProcessorFactory() {\n    }\n\n    /**\n     * Create a Processor Factory that will create an Aggregator Processor which\n     * wraps the indicated Aggregator\n     * \n     * @param agg\n     */\n    public AggregatorProcessorFactory(StreamAggregator agg) {\n        this.aggregators = new AggregatorGroup();\n        this.aggregators.registerAggregator(agg);\n    }\n\n    public AggregatorProcessorFactory(AggregatorGroup group) {\n        this.aggregators = group;\n    }\n\n    /**\n     * {@inheritDoc}\n     */\n    public IRecordProcessor createProcessor() {\n        try {\n            // every time we create a new processor instance, we have to embed a\n            // new instance of the AggregatorGroup, to eliminate any thread\n            // contention\n            return new AggregatorProcessor(new AggregatorGroup(this.aggregators));\n        } catch (Exception e) {\n            LOG.error(e);\n            return null;\n        }\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/summary/SummaryCalculation.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.summary;\n\nimport com.amazonaws.services.dynamodbv2.model.ComparisonOperator;\nimport com.amazonaws.services.kinesis.aggregators.datastore.DynamoDataStore.DynamoSummaryUpdateMethod;\n\npublic enum SummaryCalculation {\n    /**\n     * SUM Calculations simply always increase the aggregate value based upon\n     * the value observed on the stream\n     */\n    SUM(null, DynamoSummaryUpdateMethod.ADD) {\n        @Override\n        public Double apply(Double currentValue, Double newValue) {\n            // add the values including dealing with nulls\n            return nvl(currentValue) + nvl(newValue);\n        }\n    },\n\n    /**\n     * FIRST Calculations always return the first value observed, without\n     * considering the newest\n     */\n    FIRST(null, DynamoSummaryUpdateMethod.CONDITIONAL) {\n        @Override\n        public Double apply(Double currentValue, Double newValue) {\n            // always return the current value unless its null, then overwrite\n            // with the first value\n            return currentValue == null ? newValue : currentValue;\n        }\n    },\n\n    /**\n     * LAST Calculations always return the latest value observed, without\n     * considering the previous\n     */\n    LAST(null, DynamoSummaryUpdateMethod.PUT) {\n        @Override\n        public Double apply(Double currentValue, Double newValue) {\n            // always return the latest value\n            return nvl(newValue);\n        }\n    },\n\n    /**\n     * The Min calculation seeks to always record only the lowest value ever\n     * observed for a data value in the specified time horizon\n     */\n    MIN(ComparisonOperator.GT, DynamoSummaryUpdateMethod.CONDITIONAL) {\n        // The comparison operator is compared to the existing values. So to\n        // apply a\n        // minimum value, the existing value should be greater than the new\n        // value\n        @Override\n        public Double apply(Double currentValue, Double newValue) {\n            // the lower value wins, or 0 if values have not yet been\n            // initialised\n            if (currentValue == null)\n                return nvl(newValue);\n\n            if (newValue == null)\n                return nvl(currentValue);\n\n            Double output = nvl(newValue) < currentValue ? nvl(newValue) : currentValue;\n\n            return output;\n        }\n    },\n\n    /**\n     * The Max calculation will store only the maximum value observed on the\n     * stream for the time period\n     */\n    MAX(ComparisonOperator.LT, DynamoSummaryUpdateMethod.CONDITIONAL) {\n        // apply a new value only if the existing value is less than the new\n        // value\n        @Override\n        public Double apply(Double currentValue, Double newValue) {\n            // the greater value wins, or 0 if values have not yet been\n            // initialised\n            if (currentValue == null)\n                return nvl(newValue);\n\n            if (newValue == null)\n                return nvl(currentValue);\n\n            return nvl(newValue) > currentValue ? nvl(newValue) : currentValue;\n        }\n    };\n\n    private ComparisonOperator comparisonOperator;\n\n    private DynamoSummaryUpdateMethod updateMethod;\n\n    private SummaryCalculation(ComparisonOperator c, DynamoSummaryUpdateMethod updateMethod) {\n        this.comparisonOperator = c;\n        this.updateMethod = updateMethod;\n    }\n\n    private SummaryCalculation() {\n    }\n\n    /**\n     * Apply the calculation to the values provided to the interface\n     * \n     * @param currentValue The current aggregate value being managed by the\n     *        {@link com.amazonaws.services.kinesis.aggregators.cache.AggregateCache}\n     * @param newValue The new value from the stream to be applied to the\n     *        calculation\n     * @return\n     */\n    public abstract Double apply(Double currentValue, Double newValue);\n\n    /**\n     * Return the\n     * {@link com.amazonaws.services.dynamodbv2.model.ComparisonOperator} which\n     * will be applied when this calculation is written to the database\n     * \n     * @return\n     */\n    public ComparisonOperator getDynamoComparisonOperator() {\n        return this.comparisonOperator;\n    }\n\n    public DynamoSummaryUpdateMethod getSummaryUpdateMethod() {\n        return this.updateMethod;\n    }\n\n    private static double nvl(Double val) {\n        return val == null ? 0D : val;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/summary/SummaryConfiguration.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.summary;\n\nimport java.util.ArrayList;\nimport java.util.HashMap;\nimport java.util.List;\nimport java.util.Map;\nimport java.util.Set;\n\nimport com.amazonaws.services.kinesis.aggregators.exception.UnsupportedCalculationException;\n\n/**\n * The Summary Configuration object contains the required calculations to be\n * performed against summary items extracted from a Kinesis Data Stream. For\n * each item listed as summary value to be extracted from the stream, the\n * Summary configuration will store a list of the calculations against the base\n * item being calculated. For example, if the summary expression was:\n * sum(value_a), max(value_a), min(value_b) then the SummaryConfiguration would\n * be: \"key\" :[list of\n * {@link com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation}]\n * --------- ------------ \"value_a\":[sum,max] \"value_b\":[min]\n */\npublic class SummaryConfiguration {\n    private Map<String, List<SummaryElement>> config = new HashMap<>();\n\n    /* closure over the map which contains the items to list */\n    final class ConfigWriter {\n        public void write(String s, SummaryElement e) {\n            List<SummaryElement> calculations = config.get(s);\n\n            // setup the list\n            if (calculations == null) {\n                calculations = new ArrayList<>();\n            }\n\n            calculations.add(e);\n\n            config.put(s, calculations);\n        }\n    }\n\n    private ConfigWriter writer = new ConfigWriter();\n\n    public SummaryConfiguration() {\n    }\n\n    public SummaryConfiguration(List<String> summaries) throws UnsupportedCalculationException {\n        for (String s : summaries) {\n            addConfig(s);\n        }\n    }\n\n    /**\n     * Add a calculation for a base attribute into the list of all calculations\n     * to be done\n     * \n     * @param value\n     * @param calc\n     */\n    public void add(String value, SummaryElement e) {\n        writer.write(value, e);\n    }\n\n    private void addConfig(String summary) throws UnsupportedCalculationException {\n        SummaryElement e = new SummaryElement(summary);\n        add(e.getStreamDataElement(), e);\n    }\n\n    /**\n     * Add a fully formed expression to the list of all calculations. This uses\n     * the parseSummary method to parse the expression into its component parts.\n     * \n     * @param summary The expression to add\n     * @throws UnsupportedCalculationException\n     */\n    public SummaryConfiguration withConfigItem(String summary)\n            throws UnsupportedCalculationException {\n        addConfig(summary);\n        return this;\n    }\n\n    /**\n     * Get all\n     * {@link com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation}s\n     * for an attribute item\n     * \n     * @param s The attribute of the stream to get the list of calculations for\n     * @return\n     */\n    public List<SummaryElement> getRequestedCalculations(String s) {\n        return this.config.get(s);\n    }\n\n    /**\n     * Get all attributes which this summary configuration is stored against\n     * \n     * @return\n     */\n    public Set<String> getItemSet() {\n        return this.config.keySet();\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/aggregators/summary/SummaryElement.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.aggregators.summary;\n\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils;\nimport com.amazonaws.services.kinesis.aggregators.exception.UnsupportedCalculationException;\n\npublic class SummaryElement {\n    private String streamDataElement, attributeAlias;\n\n    private SummaryCalculation calculation;\n\n    public SummaryElement(String streamDataElement, SummaryCalculation calculation) {\n        this(streamDataElement, calculation, makeStoreAttributeName(streamDataElement, calculation));\n\n    }\n\n    public SummaryElement(String streamDataElement, SummaryCalculation calculation,\n            String attributeAlias) {\n        this.streamDataElement = streamDataElement;\n        this.calculation = calculation;\n        if (attributeAlias != null) {\n            this.attributeAlias = attributeAlias;\n        } else {\n            this.attributeAlias = makeStoreAttributeName(streamDataElement, calculation);\n        }\n    }\n\n    /**\n     * Parse a summary calculation expression to a Pair of the base item name,\n     * and the SummaryCalculation to be applied to that base item. The\n     * expression must take the form of:\n     * {@link com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation}\n     * (attribute of the data stream)\n     * \n     * @param s\n     * @return\n     * @throws UnsupportedCalculationException\n     */\n    public SummaryElement(String s) throws UnsupportedCalculationException {\n        if (!s.contains(\"(\")) {\n            this.streamDataElement = s;\n            this.calculation = SummaryCalculation.SUM;\n            this.attributeAlias = makeStoreAttributeName(s, this.calculation);\n        } else {\n            if (!s.contains(\")\"))\n                throw new UnsupportedCalculationException(String.format(\n                        \"\\\"%s\\\" is not a valid summary calculation\", s));\n\n            String[] tokens = s.split(\"\\\\(\");\n            String requested = tokens[0].replaceAll(\" \", \"\").toUpperCase();\n\n            try {\n                SummaryCalculation c = SummaryCalculation.valueOf(requested);\n                String[] onItems = tokens[1].split(\"\\\\)\");\n                this.streamDataElement = onItems[0].replaceAll(\" \", \"\");\n                this.calculation = c;\n                if (onItems.length > 1 && onItems[1] != null) {\n                    this.attributeAlias = onItems[1].replaceAll(\" \", \"\");\n                } else {\n                    this.attributeAlias = makeStoreAttributeName(this.streamDataElement,\n                            this.calculation);\n                }\n            } catch (Exception e) {\n                throw new UnsupportedCalculationException(String.format(\n                        \"Unsupported Calculation %s\", requested));\n            }\n        }\n    }\n\n    public static String makeStoreAttributeName(String attribute, SummaryCalculation calculation) {\n        return String.format(\"%s-%s\", StreamAggregatorUtils.methodToColumn(attribute),\n                calculation.name());\n    }\n\n    public String getStreamDataElement() {\n        return streamDataElement;\n    }\n\n    public String getAttributeAlias() {\n        return attributeAlias;\n    }\n\n    public SummaryCalculation getCalculation() {\n        return calculation;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/io/AbstractDataExtractor.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.io;\n\nimport java.util.List;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregateData;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorType;\nimport com.amazonaws.services.kinesis.aggregators.InputEvent;\nimport com.amazonaws.services.kinesis.aggregators.exception.SerializationException;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryConfiguration;\n\n/**\n * Abstract class which provides some helper methods for creating IDataExtractor\n * classes.\n */\npublic abstract class AbstractDataExtractor implements IDataExtractor {\n    protected AggregatorType aggregatorType = AggregatorType.COUNT;\n\n    protected SummaryConfiguration summaryConfig = new SummaryConfiguration();\n\n    public abstract String getAggregateLabelName();\n\n    public abstract String getDateValueName();\n\n    public abstract List<AggregateData> getData(InputEvent event) throws SerializationException;\n\n    public abstract void validate() throws Exception;\n\n    public AggregatorType getAggregatorType() {\n        return this.aggregatorType;\n    }\n\n    public void setAggregatorType(AggregatorType type) {\n        this.aggregatorType = type;\n    }\n\n    public SummaryConfiguration getSummaryConfig() {\n        return this.summaryConfig;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/io/CsvDataExtractor.java",
    "content": "/**\n * Amazon Kinesis Aggregators Copyright 2014, Amazon.com, Inc. or its\n * affiliates. All Rights Reserved. Licensed under the Amazon Software License\n * (the \"License\"). You may not use this file except in compliance with the\n * License. A copy of the License is located at http://aws.amazon.com/asl/ or in\n * the \"license\" file accompanying this file. This file is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express\n * or implied. See the License for the specific language governing permissions\n * and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.io;\n\nimport java.util.List;\n\nimport com.amazonaws.services.kinesis.io.serializer.CsvSerializer;\n\n/**\n * IDataExtractor implementation which allows for extraction of data from\n * Streams formatted as Character Separated Values. Also optionally allows for\n * regular expression based filtering of the stream prior to aggregation.\n */\npublic class CsvDataExtractor extends StringDataExtractor<CsvDataExtractor> implements\n        IDataExtractor {\n    private static String delimiter = \",\";\n\n    private static String itemTerminator = \"\\n\";\n\n    private CsvSerializer serialiser;\n\n    /**\n     * Create a new data extractor using the indicated index for the label value\n     * to be aggregated on, and the delimiter for tokenising the data value.\n     * \n     * @param labelIndex Index (base 0) of where in the CSV stream the label\n     *        value occurs\n     * @param delimiter The character delimiter separating items in the stream\n     *        data.\n     */\n    public CsvDataExtractor(List<Integer> labelIndicies) {\n        super.labelIndicies = labelIndicies;\n        this.serialiser = new CsvSerializer().withFieldDelimiter(delimiter).withItemTerminator(\n                itemTerminator);\n        super.serialiser = serialiser;\n    }\n\n    public CsvDataExtractor(List<Integer> labelIndicies, String labelAttributeAlias,\n            int dateValueIndex, String dateAttributeAlias, String fieldDelimiter,\n            CsvSerializer serialiser) {\n        super.labelIndicies = labelIndicies;\n        super.labelAttributeAlias = labelAttributeAlias;\n        super.dateValueIndex = dateValueIndex;\n        super.dateAttributeAlias = dateAttributeAlias;\n        this.serialiser = serialiser;\n        super.serialiser = serialiser;\n    }\n\n    /**\n     * Add a regular expression filter to this data extractor. When configured,\n     * only string values which match the regular expression will be\n     * deserialised and have data extracted from it.\n     * \n     * @param filterRegex Regular expression which must match in order for data\n     *        to be subject to data extraction.\n     * @return\n     */\n    public CsvDataExtractor withRegexFilter(String filterRegex) {\n        if (filterRegex != null) {\n            this.serialiser.withFilterRegex(filterRegex);\n            super.serialiser = this.serialiser;\n        }\n        return this;\n    }\n\n    /**\n     * Add a non default field delimiter. The default is \",\"\n     * \n     * @param delimiter The characters used for delimiting items within a line\n     * @return\n     */\n    public CsvDataExtractor withDelimiter(String delimiter) {\n        if (delimiter != null) {\n            this.serialiser.withFieldDelimiter(delimiter);\n            super.serialiser = this.serialiser;\n        }\n        return this;\n    }\n\n    /**\n     * Add a non default item terminator. The default is \"\\n\"\n     * \n     * @param lineTerminator The characters used for delimiting lines of text\n     * @return\n     */\n    public CsvDataExtractor withItemTerminator(String lineTerminator) {\n        if (lineTerminator != null) {\n            this.serialiser.withItemTerminator(lineTerminator);\n            super.serialiser = this.serialiser;\n        }\n        return this;\n    }\n\n    /**\n     * Add a custom configured serialiser\n     * \n     * @param serialiser\n     * @return\n     */\n    public CsvDataExtractor withSerialiser(CsvSerializer serialiser) {\n        this.serialiser = serialiser;\n        super.serialiser = serialiser;\n        return this;\n    }\n\n    /**\n     * Builder method for adding a index to the extraction configuration which\n     * indicates where the date item to be used for aggregation can be found.\n     * \n     * @param dateValueIndex The index value (base 0) in the CSV stream which\n     *        contains the date value.\n     * @return\n     */\n    public CsvDataExtractor withDateValueIndex(Integer dateValueIndex) {\n        if (dateValueIndex != null) {\n            this.dateValueIndex = dateValueIndex;\n        }\n        return this;\n    }\n\n    @Override\n    public IDataExtractor copy() throws Exception {\n        return new CsvDataExtractor(this.labelIndicies, super.labelAttributeAlias,\n                this.dateValueIndex, super.dateAttributeAlias, this.delimiter, this.serialiser).withSummaryIndicies(this.getOriginalSummaryExpressions());\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/io/IDataExtractor.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.io;\n\nimport java.util.List;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregateData;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorType;\nimport com.amazonaws.services.kinesis.aggregators.InputEvent;\nimport com.amazonaws.services.kinesis.aggregators.exception.SerializationException;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryConfiguration;\n\n/**\n * Interface which allows for pluggable data extractors for different types of\n * stream data. Aggregators use IDataExtractor to interoperate between the\n * stream data format and the internal format required for Aggregation.\n * IDataExtractors likely use IKinesisSerialisers to read and write to and from\n * the stream\n */\npublic interface IDataExtractor {\n    /**\n     * Get the name of the element which represents the unique ID for the event,\n     * if there is one\n     */\n    public String getUniqueIdName();\n\n    /**\n     * Get the name of the label value to be extracted.\n     * \n     * @return\n     */\n    public String getAggregateLabelName();\n\n    /**\n     * Get the name of the date value to be extracted.\n     * \n     * @return\n     */\n    public String getDateValueName();\n\n    /**\n     * Extract one or more aggregatable items from a Kinesis Record.\n     * \n     * @param event The Kinesis Record data from which we want to extract data.\n     * @return A list of ExtractedData elements which have been resolved from\n     *         the input data.\n     * @throws SerializationException\n     */\n    public List<AggregateData> getData(InputEvent event) throws SerializationException;\n\n    /**\n     * Set the type of aggregator which contains this IDataExtractor. Used to\n     * boost efficiency in that the Extractor will not extract summary items for\n     * COUNT based Aggregator integration.\n     * \n     * @param type\n     */\n    public void setAggregatorType(AggregatorType type);\n\n    /**\n     * Validate that the extractor is well formed.\n     * \n     * @throws Exception\n     */\n    public void validate() throws Exception;\n\n    /**\n     * Get the summary configuration that is driving data extraction against the\n     * data stream.\n     * \n     * @return\n     */\n    public SummaryConfiguration getSummaryConfig();\n\n    public IDataExtractor copy() throws Exception;\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/io/JsonDataExtractor.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.io;\n\nimport java.text.SimpleDateFormat;\nimport java.util.ArrayList;\nimport java.util.Date;\nimport java.util.HashMap;\nimport java.util.List;\nimport java.util.Map;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregateData;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorType;\nimport com.amazonaws.services.kinesis.aggregators.InputEvent;\nimport com.amazonaws.services.kinesis.aggregators.LabelSet;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils;\nimport com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException;\nimport com.amazonaws.services.kinesis.aggregators.exception.SerializationException;\nimport com.amazonaws.services.kinesis.aggregators.exception.UnsupportedCalculationException;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryConfiguration;\nimport com.amazonaws.services.kinesis.io.serializer.JsonSerializer;\nimport com.fasterxml.jackson.databind.JsonNode;\n\npublic class JsonDataExtractor extends AbstractDataExtractor implements\n\t\tIDataExtractor {\n\tprivate List<String> labelAttributes;\n\n\tprivate String labelName, dateFormat, uniqueIdAttribute,\n\t\t\tdateValueAttribute;\n\n\tprivate SimpleDateFormat dateFormatter;\n\n\tprivate List<String> summaryAttributes;\n\n\tprivate final Log LOG = LogFactory.getLog(JsonDataExtractor.class);\n\n\tprivate Map<String, Double> sumUpdates = new HashMap<>();\n\n\tprivate JsonSerializer serialiser = new JsonSerializer();\n\n\tprivate JsonDataExtractor() {\n\t}\n\n\tpublic JsonDataExtractor(List<String> labelAttributes) {\n\t\tthis.labelAttributes = labelAttributes;\n\t\tthis.labelName = LabelSet.fromStringKeys(labelAttributes).getName();\n\t}\n\n\tpublic JsonDataExtractor(List<String> labelAttributes,\n\t\t\tJsonSerializer serialiser) {\n\t\tthis(labelAttributes);\n\t\tthis.serialiser = serialiser;\n\t}\n\n\t/**\n\t * {@inheritDoc}\n\t */\n\t@Override\n\tpublic List<AggregateData> getData(InputEvent event)\n\t\t\tthrows SerializationException {\n\t\ttry {\n\t\t\tList<AggregateData> aggregateData = new ArrayList<>();\n\t\t\tDate dateValue = null;\n\t\t\tJsonNode jsonContent = null;\n\t\t\tString dateString, summary = null;\n\t\t\tsumUpdates = new HashMap<>();\n\n\t\t\tList<String> items = (List<String>) serialiser.toClass(event);\n\n\t\t\t// log a warning if we didn't get anything back from the serialiser\n\t\t\t// - this could be OK, but probably isn't\n\t\t\tif (items == null || items.size() == 0)\n\t\t\t\tLOG.warn(String\n\t\t\t\t\t\t.format(\"Failed to deserialise any content for Record (Partition Key %s, Sequence %s\",\n\t\t\t\t\t\t\t\tevent.getPartitionKey(),\n\t\t\t\t\t\t\t\tevent.getSequenceNumber()));\n\n\t\t\t// process all the items returned by the serialiser\n\t\t\tfor (String item : items) {\n\t\t\t\t// Convert the string to a Jackson JsonNode for navigation\n\t\t\t\tjsonContent = StreamAggregatorUtils.asJsonNode(item);\n\n\t\t\t\tLabelSet labels = new LabelSet();\n\t\t\t\tfor (String key : this.labelAttributes) {\n\t\t\t\t\tlabels.put(key, StreamAggregatorUtils.readValueAsString(\n\t\t\t\t\t\t\tjsonContent, key));\n\t\t\t\t}\n\n\t\t\t\t// get the unique ID for the event\n\t\t\t\tString uniqueId = null;\n\t\t\t\tif (this.uniqueIdAttribute != null) {\n\t\t\t\t\tswitch (this.uniqueIdAttribute) {\n\t\t\t\t\tcase StreamAggregator.REF_PARTITION_KEY:\n\t\t\t\t\t\tuniqueId = event.getPartitionKey();\n\t\t\t\t\t\tbreak;\n\t\t\t\t\tcase StreamAggregator.REF_SEQUENCE:\n\t\t\t\t\t\tuniqueId = event.getSequenceNumber();\n\t\t\t\t\t\tbreak;\n\t\t\t\t\tdefault:\n\t\t\t\t\t\tuniqueId = StreamAggregatorUtils.readValueAsString(\n\t\t\t\t\t\t\t\tjsonContent, uniqueIdAttribute);\n\t\t\t\t\t\tbreak;\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\t// get the date value from the line\n\t\t\t\tif (dateValueAttribute != null) {\n\t\t\t\t\tdateString = StreamAggregatorUtils.readValueAsString(\n\t\t\t\t\t\t\tjsonContent, dateValueAttribute);\n\n\t\t\t\t\t// bail on no date returned\n\t\t\t\t\tif (dateString == null || dateString.equals(\"\"))\n\t\t\t\t\t\tthrow new SerializationException(\n\t\t\t\t\t\t\t\tString.format(\n\t\t\t\t\t\t\t\t\t\t\"Unable to read date value attribute %s from JSON Content %s\",\n\t\t\t\t\t\t\t\t\t\tdateValueAttribute, item));\n\n\t\t\t\t\t// turn date as long or string into Date\n\t\t\t\t\tif (this.dateFormat != null) {\n\t\t\t\t\t\tdateValue = dateFormatter.parse(dateString);\n\t\t\t\t\t} else {\n\t\t\t\t\t\t// no formatter, so treat as epoch seconds\n\t\t\t\t\t\ttry {\n\t\t\t\t\t\t\tdateValue = new Date(Long.parseLong(dateString));\n\t\t\t\t\t\t} catch (Exception e) {\n\t\t\t\t\t\t\tLOG.error(String\n\t\t\t\t\t\t\t\t\t.format(\"Unable to create Date Value element from item '%s' due to invalid format as Epoch Seconds\",\n\t\t\t\t\t\t\t\t\t\t\tdateValueAttribute));\n\t\t\t\t\t\t\tthrow new SerializationException(e);\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t} else {\n\t\t\t\t\t// no date value attribute configured, so use now\n\t\t\t\t\tdateValue = new Date(System.currentTimeMillis());\n\t\t\t\t}\n\n\t\t\t\t// get the summed values\n\t\t\t\tif (this.aggregatorType.equals(AggregatorType.SUM)) {\n\t\t\t\t\t// get the positional sum items\n\t\t\t\t\tfor (String s : summaryConfig.getItemSet()) {\n\t\t\t\t\t\ttry {\n\t\t\t\t\t\t\tsummary = StreamAggregatorUtils.readValueAsString(\n\t\t\t\t\t\t\t\t\tjsonContent, s);\n\n\t\t\t\t\t\t\t// if a summary is not found in the data element,\n\t\t\t\t\t\t\t// then we simply continue without it\n\t\t\t\t\t\t\t// StreamAggregatorUtils.readValueAsString returns\n\t\t\t\t\t\t\t// \"\" if\n\t\t\t\t\t\t\t// attribute is not found.\n\t\t\t\t\t\t\tif (summary != null && !summary.equals(\"\")) {\n\t\t\t\t\t\t\t\tsumUpdates.put(s, Double.parseDouble(summary));\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t} catch (NumberFormatException nfe) {\n\t\t\t\t\t\t\tLOG.error(String\n\t\t\t\t\t\t\t\t\t.format(\"Unable to deserialise Summary '%s' due to NumberFormatException\",\n\t\t\t\t\t\t\t\t\t\t\ts));\n\t\t\t\t\t\t\tthrow new SerializationException(nfe);\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\taggregateData.add(new AggregateData(uniqueId, labels,\n\t\t\t\t\t\tdateValue, sumUpdates));\n\t\t\t}\n\n\t\t\treturn aggregateData;\n\t\t} catch (Exception e) {\n\t\t\tthrow new SerializationException(e);\n\t\t}\n\t}\n\n\t/** Builder method to add the attribute which is the event unique id */\n\tpublic JsonDataExtractor withUniqueIdAttribute(String uniqueIdAttribute) {\n\t\tthis.uniqueIdAttribute = uniqueIdAttribute;\n\n\t\treturn this;\n\t}\n\n\t/**\n\t * Builder Method to add the attribute name which contains the date value\n\t * for the stream item.\n\t * \n\t * @param dateValueAttribute\n\t *            The attribute name which contains the date item.\n\t * @return\n\t */\n\tpublic JsonDataExtractor withDateValueAttribute(String dateValueAttribute) {\n\t\tif (dateValueAttribute != null)\n\t\t\tthis.dateValueAttribute = dateValueAttribute;\n\t\treturn this;\n\t}\n\n\t/**\n\t * Builder method which allows adding a date format string which can be used\n\t * to convert the data value in the dateValueAttribute, if the value is a\n\t * string.\n\t * \n\t * @param dateFormat\n\t *            Date Format in {@link java.text.SimpleDateFormat} form.\n\t * @return\n\t */\n\tpublic JsonDataExtractor withDateFormat(String dateFormat) {\n\t\tif (dateFormat != null && !dateFormat.equals(\"\")) {\n\t\t\tthis.dateFormat = dateFormat;\n\t\t\tthis.dateFormatter = new SimpleDateFormat(dateFormat);\n\t\t}\n\t\treturn this;\n\t}\n\n\tpublic JsonDataExtractor withSerialiser(JsonSerializer serialiser) {\n\t\tthis.serialiser = serialiser;\n\t\treturn this;\n\t}\n\n\t/**\n\t * Builder method which allows for setting a list of summary attribute names\n\t * or expressions on the data extractor.\n\t * \n\t * @param summaryAttributes\n\t *            List of summary attribute names or expressions which should be\n\t *            extracted from the data\n\t * @return\n\t * @throws UnsupportedCalculationException\n\t */\n\tpublic JsonDataExtractor withSummaryAttributes(\n\t\t\tList<String> summaryAttributes)\n\t\t\tthrows UnsupportedCalculationException {\n\t\tif (summaryAttributes != null) {\n\t\t\tthis.aggregatorType = AggregatorType.SUM;\n\t\t\tthis.summaryAttributes = summaryAttributes;\n\t\t\tthis.summaryConfig = new SummaryConfiguration(summaryAttributes);\n\t\t}\n\t\treturn this;\n\t}\n\n\t/**\n\t * Add a regular expression filter to this data extractor. When configured,\n\t * only string values which match the regular expression will be\n\t * deserialised and have data extracted from it.\n\t * \n\t * @param filterRegex\n\t *            Regular expression which must match in order for data to be\n\t *            subject to data extraction.\n\t * @return\n\t */\n\tpublic JsonDataExtractor withRegexFilter(String filterRegex) {\n\t\tif (filterRegex != null) {\n\t\t\tthis.serialiser.withFilterRegex(filterRegex);\n\t\t}\n\t\treturn this;\n\t}\n\n\t/**\n\t * Add a non default item terminator. The default is \"\\n\"\n\t * \n\t * @param lineTerminator\n\t *            The characters used for delimiting lines of text\n\t * @return\n\t */\n\tpublic JsonDataExtractor withItemTerminator(String lineTerminator) {\n\t\tif (lineTerminator != null) {\n\t\t\tthis.serialiser.withItemTerminator(lineTerminator);\n\t\t}\n\t\treturn this;\n\t}\n\n\t/**\n\t * {@inheritDoc}\n\t */\n\t@Override\n\tpublic String getAggregateLabelName() {\n\t\treturn this.labelName;\n\t}\n\n\t/**\n\t * {@inheritDoc}\n\t */\n\t@Override\n\tpublic String getDateValueName() {\n\t\treturn this.dateValueAttribute == null ? StreamAggregator.DEFAULT_DATE_VALUE\n\t\t\t\t: this.dateValueAttribute;\n\t}\n\n\t/**\n\t * {@inheritDoc}\n\t */\n\t@Override\n\tpublic String getUniqueIdName() {\n\t\treturn this.uniqueIdAttribute;\n\t}\n\n\t/**\n\t * {@inheritDoc}\n\t */\n\t@Override\n\tpublic void validate() throws Exception {\n\t\tif (this.serialiser == null) {\n\t\t\tthrow new InvalidConfigurationException(\n\t\t\t\t\t\"Cannot create instance of JsonDataExtractor without an IKinesisSerialiser\");\n\t\t}\n\n\t\tif (this.aggregatorType.equals(AggregatorType.SUM)\n\t\t\t\t&& this.summaryAttributes == null) {\n\t\t\tthrow new InvalidConfigurationException(\n\t\t\t\t\t\"Summary aggregators require configuration of Summary Attributes\");\n\t\t}\n\t}\n\n\tpublic IDataExtractor copy() throws Exception {\n\t\treturn new JsonDataExtractor(this.labelAttributes, this.serialiser)\n\t\t\t\t.withDateFormat(this.dateFormat)\n\t\t\t\t.withDateValueAttribute(this.dateValueAttribute)\n\t\t\t\t.withSummaryAttributes(this.summaryAttributes);\n\t}\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/io/ObjectExtractor.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.io;\n\nimport java.lang.reflect.Method;\nimport java.util.ArrayList;\nimport java.util.Date;\nimport java.util.HashMap;\nimport java.util.LinkedHashMap;\nimport java.util.List;\nimport java.util.Map;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregateData;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorType;\nimport com.amazonaws.services.kinesis.aggregators.InputEvent;\nimport com.amazonaws.services.kinesis.aggregators.LabelSet;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils;\nimport com.amazonaws.services.kinesis.aggregators.annotations.AnnotationProcessor;\nimport com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException;\nimport com.amazonaws.services.kinesis.aggregators.exception.SerializationException;\nimport com.amazonaws.services.kinesis.aggregators.exception.UnsupportedCalculationException;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryConfiguration;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryElement;\nimport com.amazonaws.services.kinesis.io.serializer.IKinesisSerializer;\nimport com.amazonaws.services.kinesis.io.serializer.JsonSerializer;\n\n/**\n * IDataExtractor which supports extracting data from Objects via reflected\n * method signatures.\n */\npublic class ObjectExtractor extends AbstractDataExtractor implements IDataExtractor {\n    @SuppressWarnings(\"rawtypes\")\n    private Class clazz;\n\n    private String uniqueIdMethodName;\n\n    private List<String> aggregateLabelMethods;\n\n    private Map<String, Method> aggregateLabelMethodMap = new LinkedHashMap<>();\n\n    private String aggregateLabelColumn, dateValueColumn, dateMethodName;\n\n    private Method dateMethod, uniqueIdMethod;\n\n    private Object eventDate;\n\n    private Map<String, Method> sumValueMap;\n\n    private Object summaryValue;\n\n    private final Log LOG = LogFactory.getLog(ObjectExtractor.class);\n\n    private Date dateValue;\n\n    private Map<String, Double> sumUpdates = new HashMap<>();\n\n    private IKinesisSerializer<Object, byte[]> serialiser;\n\n    private List<AggregateData> data;\n\n    private boolean validated = false;\n\n    private ObjectExtractor() {\n    }\n\n    public ObjectExtractor(Class clazz) throws Exception {\n        AnnotationProcessor p = new AnnotationProcessor(clazz);\n        this.aggregateLabelMethods = p.getLabelMethodNames();\n\n        for (String s : p.getLabelMethodNames()) {\n            this.aggregateLabelMethodMap.put(s, p.getLabelMethods().get(s));\n        }\n\n        LabelSet labels = LabelSet.fromStringKeys(this.aggregateLabelMethods);\n        this.aggregateLabelColumn = StreamAggregatorUtils.methodToColumn(labels.getName());\n        this.dateMethodName = p.getDateMethodName();\n        this.dateValueColumn = StreamAggregatorUtils.methodToColumn(p.getDateMethodName());\n        this.dateMethod = p.getDateMethod();\n        this.sumValueMap = p.getSummaryMethods();\n        this.summaryConfig = p.getSummaryConfig();\n\n        this.clazz = clazz;\n        this.serialiser = new JsonSerializer(clazz);\n    }\n\n    /**\n     * Create an Object Extractor using Default serialisation for the class.\n     * \n     * @param aggregateLabelMethod The method to be used as the label for\n     *        aggregation.\n     * @param clazz The base class used for deserialisation and accessed using\n     *        configured accessor methods.\n     */\n    public ObjectExtractor(List<String> aggregateLabelMethods, Class clazz) throws Exception {\n        this(aggregateLabelMethods, clazz, null);\n\n    }\n\n    /**\n     * Create an Object Extractor using indicated serialisation for the class.\n     * \n     * @param aggregateLabelMethod The method to be used as the label for\n     *        aggregation.\n     * @param clazz The base class used for deserialisation and accessed using\n     *        configured accessor methods.\n     * @param serialiser Instance of an ITransformer which converts between the\n     *        binary Kinesis format and the required Object format indicated by\n     *        the base class.\n     */\n    public ObjectExtractor(List<String> aggregateLabelMethodNames, Class clazz,\n            IKinesisSerializer<Object, byte[]> serialiser) throws Exception {\n        this.clazz = clazz;\n\n        if (serialiser == null) {\n            this.serialiser = new JsonSerializer(clazz);\n        } else {\n            this.serialiser = serialiser;\n        }\n\n        if (aggregateLabelMethodNames == null || aggregateLabelMethodNames.size() == 0) {\n            throw new InvalidConfigurationException(\n                    \"Cannot Aggregate an Object without a Label Method\");\n        } else {\n            this.aggregateLabelMethods = aggregateLabelMethodNames;\n\n            for (String s : aggregateLabelMethodNames) {\n                Method m = clazz.getDeclaredMethod(s);\n                m.setAccessible(true);\n\n                this.aggregateLabelMethodMap.put(s, m);\n            }\n        }\n\n        LabelSet labels = LabelSet.fromStringKeys(this.aggregateLabelMethods);\n        this.aggregateLabelColumn = labels.getName();\n    }\n\n    /**\n     * {@inheritDoc}\n     */\n    @Override\n    public void validate() throws Exception {\n        if (!validated) {\n            // validate sum config\n            if ((this.aggregatorType.equals(AggregatorType.SUM)) && this.sumValueMap == null) {\n                throw new Exception(\n                        \"Summary Aggregators require both a Label Field and a Value Field Set\");\n            }\n\n            if (this.aggregatorType.equals(AggregatorType.SUM)) {\n                for (String s : this.sumValueMap.keySet()) {\n                    try {\n                        Method m = clazz.getDeclaredMethod(s);\n                        m.setAccessible(true);\n                        this.sumValueMap.put(s, m);\n                    } catch (NoSuchMethodException e) {\n                        LOG.error(e);\n                        throw e;\n                    }\n                }\n            }\n\n            LOG.info(String.format(\"Object Extractor Configuration\\n\" + \"Class: %s\\n\"\n                    + \"Date Method: %s\\n\", this.clazz.getSimpleName(), this.dateMethodName));\n\n            validated = true;\n        }\n    }\n\n    /**\n     * {@inheritDoc}\n     */\n    @Override\n    public List<AggregateData> getData(InputEvent event) throws SerializationException {\n        if (!validated) {\n            try {\n                validate();\n            } catch (Exception e) {\n                throw new SerializationException(e);\n            }\n        }\n\n        try {\n            List<AggregateData> data = new ArrayList<>();\n\n            Object o = serialiser.toClass(event);\n\n            // get the value of the reflected labels\n            LabelSet labels = new LabelSet();\n            for (String key : this.aggregateLabelMethods) {\n                labels.put(key, aggregateLabelMethodMap.get(key).invoke(o).toString());\n            }\n\n            // get the unique ID value from the object\n            String uniqueId = null;\n            if (this.uniqueIdMethodName != null) {\n                switch (this.uniqueIdMethodName) {\n                    case StreamAggregator.REF_PARTITION_KEY:\n                        uniqueId = event.getPartitionKey();\n                        break;\n                    case StreamAggregator.REF_SEQUENCE:\n                        uniqueId = event.getSequenceNumber();\n                        break;\n                    default:\n                        Object id = uniqueIdMethod.invoke(o);\n                        if (id != null) {\n                            uniqueId = id.toString();\n                        }\n                        break;\n                }\n            }\n\n            // get the date value from the object\n            if (this.dateMethod != null) {\n                eventDate = dateMethod.invoke(o);\n\n                if (eventDate == null) {\n                    dateValue = new Date(System.currentTimeMillis());\n                } else {\n                    if (eventDate instanceof Date) {\n                        dateValue = (Date) eventDate;\n                    } else if (eventDate instanceof Long) {\n                        dateValue = new Date((Long) eventDate);\n                    } else {\n                        throw new Exception(String.format(\n                                \"Cannot use data type %s for date value on event\",\n                                eventDate.getClass().getSimpleName()));\n                    }\n                }\n            }\n\n            // extract all summed values from the serialised object\n            if (this.aggregatorType.equals(AggregatorType.SUM)) {\n                // lift out the aggregated method value\n                for (String s : this.sumValueMap.keySet()) {\n                    summaryValue = this.sumValueMap.get(s).invoke(o);\n\n                    if (summaryValue != null) {\n                        if (summaryValue instanceof Double) {\n                            sumUpdates.put(s, (Double) summaryValue);\n                        } else if (summaryValue instanceof Long) {\n                            sumUpdates.put(s, ((Long) summaryValue).doubleValue());\n                        } else if (summaryValue instanceof Integer) {\n                            sumUpdates.put(s, ((Integer) summaryValue).doubleValue());\n                        } else {\n                            String msg = String.format(\n                                    \"Unable to access  Summary %s due to NumberFormatException\", s);\n                            LOG.error(msg);\n                            throw new SerializationException(msg);\n                        }\n                    }\n                }\n            }\n\n            data.add(new AggregateData(uniqueId, labels, dateValue, sumUpdates));\n\n            return data;\n        } catch (Exception e) {\n            throw new SerializationException(e);\n        }\n    }\n\n    /**\n     * Builder which allows for configuration a date method to be used as the\n     * date item for aggregation.\n     * \n     * @param dateMethodName The name of the method which returns the date for\n     *        the event.\n     * @return\n     */\n    public ObjectExtractor withDateMethod(String dateMethodName) throws NoSuchMethodException {\n        this.dateMethodName = dateMethodName;\n        this.dateValueColumn = StreamAggregatorUtils.methodToColumn(dateMethodName);\n        this.dateMethod = this.clazz.getDeclaredMethod(dateMethodName);\n        this.dateMethod.setAccessible(true);\n        return this;\n    }\n\n    public ObjectExtractor withUniqueIdMethod(String uniqueIdMethodName)\n            throws NoSuchMethodException {\n        this.uniqueIdMethodName = uniqueIdMethodName;\n\n        switch (this.uniqueIdMethodName) {\n            case StreamAggregator.REF_PARTITION_KEY:\n                break;\n            case StreamAggregator.REF_SEQUENCE:\n                break;\n            default:\n                this.uniqueIdMethod = this.clazz.getDeclaredMethod(this.uniqueIdMethodName);\n                break;\n        }\n\n        return this;\n    }\n\n    /**\n     * Builder which allows associating a set of method names or expressions\n     * against methods for use as summary aggregate values.\n     * \n     * @param summaryMethodName The method name or an expression against the\n     *        method name which will be used as summary aggregate values. For\n     *        instance, when an expression is used against a method, the format\n     *        is SummaryCalculation(methodName), for example:\n     *        sum(getObjectValue)\n     * @return\n     * @throws UnsupportedCalculationException\n     */\n    public ObjectExtractor withSummaryMethods(List<String> summaryMethodName)\n            throws UnsupportedCalculationException {\n        if (summaryMethodName != null) {\n            this.aggregatorType = AggregatorType.SUM;\n\n            if (this.sumValueMap == null)\n                this.sumValueMap = new HashMap<>();\n\n            for (String s : summaryMethodName) {\n                this.summaryConfig.withConfigItem(s);\n                // parse the requested summary method name into a calculation\n                // and name, as we require the method name directly\n                SummaryElement e = new SummaryElement(s);\n                this.sumValueMap.put(e.getStreamDataElement(), null);\n            }\n        }\n\n        return this;\n    }\n\n    public ObjectExtractor withSummaryConfig(SummaryConfiguration config) {\n        this.summaryConfig = config;\n\n        if (this.sumValueMap == null)\n            this.sumValueMap = new HashMap<>();\n\n        for (String s : this.summaryConfig.getItemSet()) {\n            this.sumValueMap.put(s, null);\n        }\n\n        return this;\n    }\n\n    /**\n     * Get the class which this object can extract data from\n     * \n     * @return\n     */\n    @SuppressWarnings(\"rawtypes\")\n    public Class getClazz() {\n        return this.clazz;\n    }\n\n    /**\n     * {@inheritDoc}\n     */\n    @Override\n    public String getAggregateLabelName() {\n        return this.aggregateLabelColumn;\n    }\n\n    /**\n     * {@inheritDoc}\n     */\n    @Override\n    public String getDateValueName() {\n        return this.dateValueColumn == null ? StreamAggregator.DEFAULT_DATE_VALUE\n                : this.dateValueColumn;\n    }\n\n    public IDataExtractor copy() throws Exception {\n        throw new UnsupportedOperationException();\n    }\n\n    /**\n     * {@inheritDoc}\n     */\n    @Override\n    public String getUniqueIdName() {\n        if (this.uniqueIdMethod != null) {\n            return StreamAggregatorUtils.methodToColumn(this.uniqueIdMethodName);\n        } else {\n            return null;\n        }\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/io/RegexDataExtractor.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.io;\n\nimport java.util.List;\n\nimport com.amazonaws.services.kinesis.io.serializer.CsvSerializer;\nimport com.amazonaws.services.kinesis.io.serializer.RegexSerializer;\n\n/**\n * IDataExtractor implementation which allows for extraction of data from\n * Streams formatted as Character Separated Values. Also optionally allows for\n * regular expression based filtering of the stream prior to aggregation.\n */\npublic class RegexDataExtractor extends StringDataExtractor<RegexDataExtractor> implements\n        IDataExtractor {\n    private String regex;\n\n    private RegexSerializer serialiser;\n\n    /**\n     * Create a new data extractor using the indicated index for the label value\n     * to be aggregated on, and the regular expression used for data extraction\n     * \n     * @param labelIndex Index (base 0) of where in the CSV stream the label\n     *        value occurs\n     * @param delimiter The character delimiter separating items in the stream\n     *        data.\n     */\n    public RegexDataExtractor(String regex, List<Integer> labelIndicies) {\n        this(regex, labelIndicies, null, -1, null, null);\n    }\n\n    public RegexDataExtractor(String regex, List<Integer> labelIndicies, int dateValueIndex) {\n        this(regex, labelIndicies, null, dateValueIndex, null, null);\n    }\n\n    public RegexDataExtractor(String regex, List<Integer> labelIndicies,\n            String labelAttributeAlias, int dateValueIndex, String dateAttributeAlias,\n            RegexSerializer serialiser) {\n        this.regex = regex;\n        super.labelIndicies = labelIndicies;\n        super.labelAttributeAlias = labelAttributeAlias;\n        super.dateAttributeAlias = dateAttributeAlias;\n\n        if (dateValueIndex != -1)\n            super.dateValueIndex = dateValueIndex;\n        if (serialiser != null) {\n            super.serialiser = serialiser;\n        } else {\n            super.serialiser = new RegexSerializer(regex);\n        }\n    }\n\n    /**\n     * Add a non default item terminator. The default is \"\\n\"\n     * \n     * @param lineTerminator The characters used for delimiting lines of text\n     * @return\n     */\n    public RegexDataExtractor withItemTerminator(String lineTerminator) {\n        if (lineTerminator != null) {\n            this.serialiser.withItemTerminator(lineTerminator);\n            super.serialiser = this.serialiser;\n        }\n        return this;\n    }\n\n    /**\n     * Add a custom configured serialiser\n     * \n     * @param serialiser\n     * @return\n     */\n    public RegexDataExtractor withSerialiser(CsvSerializer serialiser) {\n        super.serialiser = serialiser;\n        return this;\n    }\n\n    /**\n     * Builder method for adding a index to the extraction configuration which\n     * indicates where the date item to be used for aggregation can be found.\n     * \n     * @param dateValueIndex The index value (base 0) in the CSV stream which\n     *        contains the date value.\n     * @return\n     */\n    public RegexDataExtractor withDateValueIndex(Integer dateValueIndex) {\n        if (dateValueIndex != null) {\n            this.dateValueIndex = dateValueIndex;\n        }\n        return this;\n    }\n\n    @Override\n    public IDataExtractor copy() throws Exception {\n        RegexDataExtractor dataExtractor = new RegexDataExtractor(this.regex, this.labelIndicies,\n                super.labelAttributeAlias, this.dateValueIndex, super.dateAttributeAlias,\n                this.serialiser).withSummaryIndicies(this.getOriginalSummaryExpressions());\n\n        return dataExtractor;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/io/StringDataExtractor.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.io;\n\nimport java.text.SimpleDateFormat;\nimport java.util.ArrayList;\nimport java.util.Date;\nimport java.util.HashMap;\nimport java.util.List;\nimport java.util.Map;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport com.amazonaws.services.kinesis.aggregators.AggregateData;\nimport com.amazonaws.services.kinesis.aggregators.AggregatorType;\nimport com.amazonaws.services.kinesis.aggregators.InputEvent;\nimport com.amazonaws.services.kinesis.aggregators.LabelSet;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregator;\nimport com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils;\nimport com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException;\nimport com.amazonaws.services.kinesis.aggregators.exception.SerializationException;\nimport com.amazonaws.services.kinesis.aggregators.exception.UnsupportedCalculationException;\nimport com.amazonaws.services.kinesis.aggregators.summary.SummaryElement;\nimport com.amazonaws.services.kinesis.io.serializer.IKinesisSerializer;\n\n/**\n * IDataExtractor implementation which allows for extraction of data from\n * Streams formatted as Character Separated Values. Also optionally allows for\n * regular expression based filtering of the stream prior to aggregation.\n */\npublic class StringDataExtractor<T extends StringDataExtractor<T>> extends AbstractDataExtractor\n        implements IDataExtractor {\n    protected List<Integer> labelIndicies = new ArrayList<>();\n\n    private LabelSet labelSet;\n\n    protected String labelAttributeAlias, dateAttributeAlias;\n\n    private boolean usePartitionKeyForUnique = false;\n\n    private boolean useSequenceForUnique = false;\n\n    private int uniqueIdIndex = -1;\n\n    protected int dateValueIndex = -1;\n\n    private String dateFormat;\n\n    private SimpleDateFormat dateFormatter;\n\n    protected List<Object> originalSummaryExpressions = new ArrayList<>();\n\n    protected List<Integer> summaryIndicies = new ArrayList<>();\n\n    protected Map<String, Double> sumUpdates;\n\n    protected IKinesisSerializer<List<List<String>>, byte[]> serialiser;\n\n    private final Log LOG = LogFactory.getLog(StringDataExtractor.class);\n\n    protected StringDataExtractor() {\n    }\n\n    /**\n     * Validate that the Data Extractor is correctly configured.\n     */\n    @Override\n    public void validate() throws Exception {\n        if (this.serialiser == null) {\n            throw new InvalidConfigurationException(\n                    \"Unable to create instance of StringDataExtractor without an IKinesisSerialiser\");\n        }\n\n        if (aggregatorType.equals(AggregatorType.SUM)\n                && (this.summaryIndicies == null || this.summaryIndicies.size() == 0)) {\n            throw new InvalidConfigurationException(\n                    \"Summary type String Aggregators require a list of Summary Indicies\");\n        }\n\n        this.labelSet = LabelSet.fromIntegerKeys(this.labelIndicies);\n\n        if (this.labelAttributeAlias != null) {\n            this.labelSet.withAlias(this.labelAttributeAlias);\n        }\n    }\n\n    /**\n     * {@inheritDoc}\n     */\n    @Override\n    public List<AggregateData> getData(InputEvent event) throws SerializationException {\n        try {\n            int summaryIndex = -1;\n            String dateString;\n            Date dateValue;\n            List<AggregateData> data = new ArrayList<>();\n\n            List<List<String>> content = serialiser.toClass(event);\n            for (List<String> line : content) {\n                if (line != null) {\n                    LabelSet labels = new LabelSet();\n                    labels.withAlias(this.labelAttributeAlias);\n\n                    for (Integer key : this.labelIndicies) {\n                        labels.put(\"\" + key, line.get(key));\n                    }\n\n                    // get the unique index\n                    String uniqueId = null;\n                    if (this.usePartitionKeyForUnique) {\n                        uniqueId = event.getPartitionKey();\n                    } else if (this.useSequenceForUnique) {\n                        uniqueId = event.getSequenceNumber();\n                    } else {\n                        if (this.uniqueIdIndex != -1) {\n                            uniqueId = line.get(this.uniqueIdIndex);\n                        }\n                    }\n\n                    // get the date value from the line\n                    if (this.dateValueIndex != -1) {\n                        dateString = line.get(dateValueIndex);\n                        if (this.dateFormat != null) {\n                            dateValue = dateFormatter.parse(dateString);\n                        } else {\n                            // no formatter, so treat as epoch seconds\n                            try {\n                                dateValue = new Date(Long.parseLong(dateString));\n                            } catch (Exception e) {\n                                LOG.error(String.format(\n                                        \"Unable to create Date Value element from item '%s' due to invalid format as Epoch Seconds\",\n                                        dateValueIndex));\n                                throw new SerializationException(e);\n                            }\n                        }\n                    } else {\n                        dateValue = new Date(System.currentTimeMillis());\n                    }\n\n                    // get the summed values\n                    if (this.aggregatorType.equals(AggregatorType.SUM)) {\n                        sumUpdates = new HashMap<>();\n\n                        // get the positional sum items\n                        for (int i = 0; i < summaryIndicies.size(); i++) {\n                            summaryIndex = summaryIndicies.get(i);\n                            try {\n                                sumUpdates.put(\"\" + summaryIndex,\n                                        Double.parseDouble(line.get(summaryIndex)));\n                            } catch (NumberFormatException nfe) {\n                                LOG.error(String.format(\n                                        \"Unable to deserialise Summary '%s' due to NumberFormatException\",\n                                        i));\n                                throw new SerializationException(nfe);\n                            }\n                        }\n                    }\n\n                    data.add(new AggregateData(uniqueId, labels, dateValue, sumUpdates));\n                }\n            }\n\n            return data;\n        } catch (Exception e) {\n            throw new SerializationException(e);\n        }\n\n    }\n\n    /**\n     * Builder method to add a date format (based on\n     * {@link java.text.SimpleDateFormat} when the dateValueIndex item is a\n     * string.\n     * \n     * @param dateFormat\n     * @return\n     */\n    @SuppressWarnings(\"unchecked\")\n    public T withDateFormat(String dateFormat) {\n        if (dateFormat != null && !dateFormat.equals(\"\")) {\n            this.dateFormat = dateFormat;\n            this.dateFormatter = new SimpleDateFormat(dateFormat);\n        }\n        return (T) this;\n    }\n\n    /**\n     * Builder method to add a set of summary indicies or expressions to the\n     * aggregation configuration.\n     * \n     * @param summaryIndicies List of integer values indicating positions in the\n     *        stream for summary values, or a list of strings indicating\n     *        expressions around positions which contain summary values to be\n     *        aggregated. If expressions using\n     *        {@link com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation}\n     *        are used, then the format is SummaryCalculation(index), for\n     *        example the sum of position 4 would be 'sum(4)'\n     * @return\n     * @throws UnsupportedCalculationException\n     */\n    @SuppressWarnings(\"unchecked\")\n    public T withSummaryIndicies(List<Object> summaryIndicies)\n            throws UnsupportedCalculationException {\n        if (summaryIndicies != null) {\n            for (Object o : summaryIndicies) {\n                if (o instanceof Integer) {\n                    Integer i = (Integer) o;\n                    withSummaryIndex(i);\n                } else if (o instanceof String) {\n                    String s = (String) o;\n                    withSummaryIndex(s);\n                } else {\n                    throw new UnsupportedCalculationException(String.format(\n                            \"Unable to generate calculation for %s Datatype\",\n                            o.getClass().getSimpleName()));\n                }\n            }\n        }\n\n        return (T) this;\n    }\n\n    @SuppressWarnings(\"unchecked\")\n    public T withStringSummaryIndicies(List<String> summaryIndicies)\n            throws UnsupportedCalculationException {\n        if (summaryIndicies != null) {\n            for (String s : summaryIndicies) {\n                withSummaryIndex(s);\n            }\n        }\n\n        return (T) this;\n    }\n\n    @SuppressWarnings(\"unchecked\")\n    public T withIntegerSummaryIndicies(List<Integer> summaryIndicies)\n            throws UnsupportedCalculationException {\n        if (summaryIndicies != null) {\n            for (Integer i : summaryIndicies) {\n                withSummaryIndex(i);\n            }\n        }\n\n        return (T) this;\n    }\n\n    @SuppressWarnings(\"unchecked\")\n    public T withSummaryIndex(Integer index) {\n        this.aggregatorType = AggregatorType.SUM;\n\n        this.summaryIndicies.add(index);\n        this.originalSummaryExpressions.add(index);\n        try {\n            this.summaryConfig.withConfigItem(String.format(\"sum(%s)\", index));\n        } catch (UnsupportedCalculationException e) {\n        }\n\n        return (T) this;\n    }\n\n    @SuppressWarnings(\"unchecked\")\n    public T withSummaryIndex(String expression) throws UnsupportedCalculationException {\n        this.aggregatorType = AggregatorType.SUM;\n\n        if (this.summaryIndicies == null) {\n            this.summaryIndicies = new ArrayList<>();\n        }\n\n        SummaryElement e = new SummaryElement(expression);\n\n        this.originalSummaryExpressions.add(expression);\n        this.summaryIndicies.add(Integer.parseInt(e.getStreamDataElement()));\n        this.summaryConfig.withConfigItem(expression);\n\n        return (T) this;\n    }\n\n    public T withLabelAttributeAlias(String alias) {\n        this.labelAttributeAlias = alias;\n\n        return (T) this;\n    }\n\n    public T withUniqueIdIndex(String index) {\n        switch (index) {\n            case StreamAggregator.REF_PARTITION_KEY:\n                this.usePartitionKeyForUnique = true;\n                break;\n            case StreamAggregator.REF_SEQUENCE:\n                this.useSequenceForUnique = true;\n                break;\n            default:\n                this.uniqueIdIndex = Integer.parseInt(index);\n\n                break;\n        }\n\n        return (T) this;\n    }\n\n    public T withDateAttributeAlias(String alias) {\n        this.dateAttributeAlias = alias;\n\n        return (T) this;\n    }\n\n    /**\n     * {@inheritDoc}\n     */\n    public String getAggregateLabelName() {\n        return this.labelSet.getName();\n    }\n\n    /**\n     * {@inheritDoc}\n     */\n    @Override\n    public String getUniqueIdName() {\n        return \"\" + this.uniqueIdIndex;\n    }\n\n    /**\n     * {@inheritDoc}\n     */\n    public String getDateValueName() {\n        return this.dateAttributeAlias != null ? this.dateAttributeAlias : \"\" + this.dateValueIndex;\n    }\n\n    public List<Object> getOriginalSummaryExpressions() {\n        return this.originalSummaryExpressions;\n    }\n\n    public IDataExtractor copy() throws Exception {\n        throw new UnsupportedOperationException();\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/io/serializer/CsvSerializer.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.io.serializer;\n\nimport java.io.IOException;\nimport java.util.ArrayList;\nimport java.util.Arrays;\nimport java.util.List;\nimport java.util.regex.Pattern;\n\nimport com.amazonaws.services.kinesis.aggregators.InputEvent;\n\npublic class CsvSerializer extends StringSerializer<CsvSerializer> implements\n        IKinesisSerializer<List<List<String>>, byte[]> {\n    private String delimiter = \",\";\n\n    private String itemTerminator = \"\\n\";\n\n    private String filterRegex;\n\n    private String charset = \"UTF-8\";\n\n    private Pattern p;\n\n    /**\n     * Convert a Kinesis record into one or more String lists by tokenising the\n     * parsed item by the delimiter\n     */\n    public List<List<String>> toClass(InputEvent event) throws IOException {\n        List<List<String>> outputData = new ArrayList<>();\n        List<String> item = new ArrayList<>();\n\n        try {\n            String[] lines;\n\n            lines = super.getItems(event);\n\n            // apply filters and tokenise by delimiter\n            for (String line : lines) {\n                if ((filterRegex != null && p.matcher(line).matches()) || filterRegex == null) {\n                    item = Arrays.asList(line.split(delimiter));\n                    outputData.add(item);\n                }\n            }\n\n            return outputData;\n        } catch (Exception e) {\n            throw new IOException(e);\n        }\n    }\n\n    /**\n     * Generate a byte stream in the supplied character set using the String\n     * list of CSV items\n     */\n    public byte[] fromClass(List<List<String>> csv) throws IOException {\n        StringBuffer ret = new StringBuffer();\n        StringBuffer sb = new StringBuffer();\n        for (List<String> item : csv) {\n            for (String s : item) {\n                sb.append(s + this.delimiter);\n            }\n            ret.append(sb.substring(0, sb.length() - 1) + this.itemTerminator);\n            sb = new StringBuffer();\n        }\n\n        return SerializationUtils.safeReturnData(ret.substring(0, ret.length() - 1).getBytes(\n                this.charset));\n    }\n\n    /**\n     * Builder method to apply a non-default field delimiter (default ',')\n     * \n     * @param delimiter\n     * @return\n     */\n    public CsvSerializer withFieldDelimiter(String delimiter) {\n        this.delimiter = delimiter;\n        return this;\n    }\n\n    /**\n     * Builder method to apply a filtering regular expression to text based\n     * serialisation operations\n     * \n     * @param regex\n     * @return\n     */\n    public CsvSerializer withFilterRegex(String regex) {\n        this.filterRegex = regex;\n        p = Pattern.compile(this.filterRegex);\n\n        return this;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/io/serializer/IKinesisSerializer.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.io.serializer;\n\nimport java.io.IOException;\n\nimport com.amazonaws.services.kinesis.aggregators.InputEvent;\n\n/**\n * IKinesisSerialiser is used to transform data from a Record (byte array) to the data\n * model class (T) for processing in the application and from the data model\n * class to the output type (U) for the emitter.\n * \n * @param <T> the data type stored in the record\n */\npublic interface IKinesisSerializer<T, U> {\n    /**\n     * Transform record into an object of its original class.\n     * \n     * @param event raw intput event from the Kinesis stream\n     * @return data as its original class\n     * @throws IOException could not convert the record to a T\n     */\n    public T toClass(InputEvent event) throws IOException;\n\n    /**\n     * Transform record from its original class to byte array.\n     * \n     * @param record data as its original class\n     * @return data byte array\n     */\n    public U fromClass(T record) throws IOException;\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/io/serializer/JavaSerializationSerializer.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.io.serializer;\n\nimport java.io.ByteArrayInputStream;\nimport java.io.ByteArrayOutputStream;\nimport java.io.IOException;\nimport java.io.ObjectInput;\nimport java.io.ObjectInputStream;\nimport java.io.ObjectOutput;\nimport java.io.ObjectOutputStream;\nimport java.io.Serializable;\n\nimport com.amazonaws.services.kinesis.aggregators.InputEvent;\n\npublic class JavaSerializationSerializer implements IKinesisSerializer<Object, byte[]>,\n        Serializable {\n    private static final long serialVersionUID = 2837410982374019823L;\n\n    public Object toClass(InputEvent event) throws IOException {\n        ByteArrayInputStream bis = new ByteArrayInputStream(event.getData());\n        ObjectInput in = null;\n        try {\n            in = new ObjectInputStream(bis);\n            return in.readObject();\n        } catch (ClassNotFoundException e) {\n            throw new IOException(e);\n        } finally {\n            try {\n                bis.close();\n            } catch (IOException ex) {\n                ;\n            }\n            try {\n                if (in != null) {\n                    in.close();\n                }\n            } catch (IOException ex) {\n            }\n        }\n    }\n\n    public byte[] fromClass(Object o) throws IOException {\n        ByteArrayOutputStream bos = new ByteArrayOutputStream();\n        ObjectOutput out = null;\n        try {\n            out = new ObjectOutputStream(bos);\n            out.writeObject(o);\n            return SerializationUtils.safeReturnData(bos.toByteArray());\n        } finally {\n            try {\n                if (out != null) {\n                    out.close();\n                }\n            } catch (IOException ex) {\n                // ignore close exception\n            }\n            try {\n                bos.close();\n            } catch (IOException ex) {\n                // ignore close exception\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/io/serializer/JsonSerializer.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.io.serializer;\n\nimport java.io.IOException;\nimport java.nio.charset.Charset;\nimport java.util.ArrayList;\nimport java.util.List;\nimport java.util.regex.Pattern;\n\nimport com.amazonaws.services.kinesis.aggregators.InputEvent;\nimport com.fasterxml.jackson.databind.DeserializationFeature;\nimport com.fasterxml.jackson.databind.ObjectMapper;\n\n@SuppressWarnings(\"rawtypes\")\n/**\n * Class which handles serialising Object payloads using Jackson marshalling, or converts to string format if configured to support text based payloads\n */\npublic class JsonSerializer implements IKinesisSerializer<Object, byte[]> {\n    ObjectMapper mapper = new ObjectMapper();\n\n    String itemTerminator = null;\n\n    Class clazz;\n\n    private String filterRegex;\n\n    private Pattern p;\n\n    private String charset = \"UTF-8\";\n\n    /**\n     * Construct a basic json data serialiser\n     */\n    public JsonSerializer() {\n    }\n\n    /**\n     * Construct a serialiser that is based on a densely packed recordset list\n     * of items\n     * \n     * @param itemTerminator\n     */\n    public JsonSerializer(String itemTerminator) {\n        this.itemTerminator = itemTerminator;\n    }\n\n    /**\n     * Construct a Serialiser which is class based\n     * \n     * @param clazz\n     */\n    public JsonSerializer(Class clazz) {\n        this.clazz = clazz;\n        mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);\n    }\n\n    @SuppressWarnings(\"unchecked\")\n    /**\n     * Method to generate either a class instance from a Kinesis Record, or a String which will be converted to JsonMap if we are serialising text based payloads\n     */\n    public Object toClass(final InputEvent event) throws IOException {\n        // Return a class object from the json, or if we have no class then\n        // return a String list\n        List<String> jsonStringList = new ArrayList<>();\n\n        if (this.clazz == null) {\n            if (this.itemTerminator != null) {\n                // break up the json items as separate lines\n                String[] items = new String(event.getData(), this.charset).split(this.itemTerminator);\n\n                for (String item : items) {\n                    if (filterRegex == null || (filterRegex != null && p.matcher(item).matches())) {\n                        jsonStringList.add(item);\n                    }\n                }\n\n                return jsonStringList;\n            } else {\n                // single json object per record\n                String item = new String(event.getData(), this.charset);\n\n                if (filterRegex == null || (filterRegex != null && p.matcher(item).matches())) {\n                    jsonStringList.add(item);\n                }\n\n                return jsonStringList;\n            }\n        } else {\n            // use jackson to serialise a class instance\n            return mapper.readValue(event.getData(), clazz);\n        }\n    }\n\n    /**\n     * Convert a given object into the required binary representation, based\n     * upon the serialiser config as either an object serialiser or a string\n     * serialiser\n     */\n    public byte[] fromClass(final Object o) throws IOException {\n        if (this.clazz == null) {\n            return SerializationUtils.safeReturnData(((String) o).getBytes(this.charset));\n        } else {\n            return SerializationUtils.safeReturnData(mapper.writeValueAsBytes(o));\n        }\n    }\n\n    /**\n     * Builder method to apply a filtering regular expression to text based\n     * serialisation operations\n     * \n     * @param regex\n     * @return\n     */\n    public JsonSerializer withFilterRegex(String regex) {\n        this.filterRegex = regex;\n        p = Pattern.compile(this.filterRegex);\n\n        return this;\n    }\n\n    /**\n     * Builder method to apply a non-default character set to text based\n     * serialisation operations (default UTF-8)\n     * \n     * @param charset\n     * @return\n     */\n    public JsonSerializer withCharset(String charset) {\n        // test that this is a valid character set\n        Charset test = Charset.forName(charset);\n\n        // use it\n        this.charset = charset;\n\n        return this;\n    }\n\n    /**\n     * Build method to apply a non-default item terminator (default \\n)\n     * \n     * @param itemTerminator\n     * @return\n     */\n    public JsonSerializer withItemTerminator(String itemTerminator) {\n        this.itemTerminator = itemTerminator;\n        return this;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/io/serializer/RegexSerializer.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.io.serializer;\n\nimport java.io.IOException;\nimport java.util.ArrayList;\nimport java.util.List;\nimport java.util.regex.Matcher;\nimport java.util.regex.Pattern;\n\nimport com.amazonaws.services.kinesis.aggregators.InputEvent;\n\npublic class RegexSerializer extends StringSerializer<RegexSerializer> implements\n        IKinesisSerializer<List<List<String>>, byte[]> {\n    private String regexPattern;\n\n    private Pattern p;\n\n    private Matcher m;\n\n    public RegexSerializer(String regexPattern) {\n        this.regexPattern = regexPattern;\n        p = Pattern.compile(this.regexPattern);\n    }\n\n    public List<List<String>> toClass(InputEvent event) throws IOException {\n        List<List<String>> output = new ArrayList<>();\n        String[] items;\n        try {\n            items = super.getItems(event);\n\n            for (String s : items) {\n                List<String> elements = new ArrayList<>();\n\n                if (m == null) {\n                    m = p.matcher(s);\n                } else {\n                    m.reset(s);\n                }\n                if (m.find() && m.groupCount() > 0) {\n                    for (int i = 1; i < m.groupCount() + 1; i++) {\n                        elements.add(m.group(i));\n                    }\n\n                    output.add(elements);\n                }\n            }\n\n            return output;\n        } catch (Exception e) {\n            throw new IOException(e);\n        }\n    }\n\n    public byte[] fromClass(List<List<String>> content) throws IOException {\n        // Can't reverse engineer the original regex from a string list, so dont\n        // try\n        throw new IOException(new UnsupportedOperationException());\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/io/serializer/SerializationUtils.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.io.serializer;\n\nimport java.io.IOException;\n\n/**\n * Helper methods for managing Kinesis Serialisation\n * \n * @author meyersi\n */\npublic class SerializationUtils {\n    /**\n     * Ensure that the generated binary representation will conform to Kinesis\n     * wire format requirements\n     * \n     * @param check\n     * @return\n     * @throws Exception\n     */\n    // Kinesis Maximum Byte Length is 50KB\n    public static final int maxObjectSize = 50 * 1024;\n\n    public static byte[] safeReturnData(byte[] check) throws IOException {\n        if (check.length > maxObjectSize) {\n            throw new IOException(String.format(\n                    \"Serialised byte length exceeds maximum length of %s\", maxObjectSize));\n        }\n\n        return check;\n    }\n}\n"
  },
  {
    "path": "src/main/java/com/amazonaws/services/kinesis/io/serializer/StringSerializer.java",
    "content": "/**\n * Amazon Kinesis Aggregators\n *\n * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.\n *\n * Licensed under the Amazon Software License (the \"License\").\n * You may not use this file except in compliance with the License.\n * A copy of the License is located at\n *\n *  http://aws.amazon.com/asl/\n *\n * or in the \"license\" file accompanying this file. This file is distributed\n * on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either\n * express or implied. See the License for the specific language governing\n * permissions and limitations under the License.\n */\npackage com.amazonaws.services.kinesis.io.serializer;\n\nimport java.nio.charset.Charset;\n\nimport com.amazonaws.services.kinesis.aggregators.InputEvent;\n\npublic abstract class StringSerializer<T extends StringSerializer<T>> {\n    protected String charset = \"UTF-8\";\n\n    protected String itemTerminator = \"\\n\";\n\n    /**\n     * Builder method to apply a non-default character set to text based\n     * serialisation operations (default UTF-8)\n     * \n     * @param charset\n     * @return\n     */\n    @SuppressWarnings(\"unchecked\")\n    public T withCharset(String charset) {\n        // test that this is a valid character set\n        Charset test = Charset.forName(charset);\n\n        // use it\n        this.charset = charset;\n\n        return (T) this;\n    }\n\n    /**\n     * Build method to apply a non-default item terminator (default \\n)\n     * \n     * @param itemTerminator\n     * @return\n     */\n    @SuppressWarnings(\"unchecked\")\n    public T withItemTerminator(String terminator) {\n        this.itemTerminator = terminator;\n        return (T) this;\n    }\n\n    protected String[] getItems(InputEvent event) throws Exception {\n        // convert the content to a string in the supplied character set\n        String content = new String(event.getData(), this.charset);\n\n        // break into items using line terminator\n        return content.split(this.itemTerminator);\n    }\n}\n"
  }
]