Repository: awslabs/amazon-kinesis-aggregators Branch: master Commit: a9e2f60bfdb3 Files: 100 Total size: 29.7 MB Directory structure: gitextract_tmef7pg6/ ├── .gitignore ├── LICENSE.txt ├── NOTICE.txt ├── README.md ├── assembly.xml ├── dist/ │ ├── AmazonKinesisAggregators.jar-complete.jar │ ├── AmazonKinesisAggregators.war │ ├── amazon-kinesis-aggregators-.9.2.8.jar │ ├── amazon-kinesis-aggregators-.9.2.9-sources.jar │ └── amazon-kinesis-aggregators-.9.2.9.jar ├── pom.xml ├── sample/ │ ├── bin/ │ │ └── run-producer.sh │ ├── java/ │ │ ├── model/ │ │ │ ├── SensorReading.java │ │ │ └── SensorState.java │ │ └── producer/ │ │ └── SensorReadingProducer.java │ └── resources/ │ ├── BySegment-CSV.json │ ├── BySegment-Json.json │ └── BySegment-Regex.json └── src/ ├── .gitkeep ├── log4j.properties └── main/ ├── WebContent/ │ ├── .ebextensions/ │ │ └── as.config │ ├── META-INF/ │ │ └── MANIFEST.MF │ ├── WEB-INF/ │ │ └── web.xml │ ├── index.html │ └── styles/ │ └── styles.css └── java/ └── com/ └── amazonaws/ └── services/ └── kinesis/ ├── aggregators/ │ ├── AggregateData.java │ ├── AggregatorGroup.java │ ├── AggregatorType.java │ ├── AggregatorsConstants.java │ ├── EnvironmentType.java │ ├── IStreamAggregator.java │ ├── InputEvent.java │ ├── InventoryModel.java │ ├── InventoryStatus.java │ ├── LabelSet.java │ ├── StreamAggregator.java │ ├── StreamAggregatorUtils.java │ ├── TableKeyStructure.java │ ├── TimeHorizon.java │ ├── annotations/ │ │ ├── Aggregate.java │ │ ├── AnnotationProcessor.java │ │ ├── DateValue.java │ │ ├── Label.java │ │ └── Summary.java │ ├── app/ │ │ ├── AbstractQueryServlet.java │ │ ├── AggregatorsBeanstalkApp.java │ │ ├── DateQueryServlet.java │ │ ├── FetchConfigurationServlet.java │ │ ├── ListAggregateKeysServlet.java │ │ ├── QueryByLabelServlet.java │ │ ├── ShowConfigFileServlet.java │ │ └── ShowConfigurationServlet.java │ ├── cache/ │ │ ├── AggregateCache.java │ │ ├── UpdateKey.java │ │ └── UpdateValue.java │ ├── cli/ │ │ └── AggregatorsCli.java │ ├── configuration/ │ │ ├── ConfigFileUtils.java │ │ ├── DataExtractor.java │ │ ├── ExternalConfigurationModel.java │ │ └── json.schema │ ├── consumer/ │ │ └── AggregatorConsumer.java │ ├── datastore/ │ │ ├── AggregateAttributeModification.java │ │ ├── DevNullDataStore.java │ │ ├── DynamoDataStore.java │ │ ├── DynamoQueryEngine.java │ │ ├── DynamoUtils.java │ │ └── IDataStore.java │ ├── exception/ │ │ ├── ClassNotAnnotatedException.java │ │ ├── InvalidConfigurationException.java │ │ ├── SerializationException.java │ │ └── UnsupportedCalculationException.java │ ├── factory/ │ │ ├── CSVAggregatorFactory.java │ │ ├── ExternallyConfiguredAggregatorFactory.java │ │ ├── JsonAggregatorFactory.java │ │ ├── ObjectAggregatorFactory.java │ │ └── RegexAggregatorFactory.java │ ├── idempotency/ │ │ ├── DefaultIdempotencyCheck.java │ │ └── IIdempotencyCheck.java │ ├── metrics/ │ │ ├── CloudWatchMetricsEmitter.java │ │ ├── IMetricsEmitter.java │ │ └── MetricsEmitterThrottledException.java │ ├── processor/ │ │ ├── AggregatorProcessor.java │ │ └── AggregatorProcessorFactory.java │ └── summary/ │ ├── SummaryCalculation.java │ ├── SummaryConfiguration.java │ └── SummaryElement.java └── io/ ├── AbstractDataExtractor.java ├── CsvDataExtractor.java ├── IDataExtractor.java ├── JsonDataExtractor.java ├── ObjectExtractor.java ├── RegexDataExtractor.java ├── StringDataExtractor.java └── serializer/ ├── CsvSerializer.java ├── IKinesisSerializer.java ├── JavaSerializationSerializer.java ├── JsonSerializer.java ├── RegexSerializer.java ├── SerializationUtils.java └── StringSerializer.java ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ /bin/ /target/ .classpath .project .settings/ ================================================ FILE: LICENSE.txt ================================================ http://www.apache.org/licenses/LICENSE-2.0.html Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: You must give any other recipients of the Work or Derivative Works a copy of this License; and You must cause any modified files to carry prominent notices stating that You changed the files; and You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. Note: Other license terms may apply to certain, identified software files contained within or distributed with the accompanying software if such terms are included in the directory containing the accompanying software. Such other license terms will then apply in lieu of the terms of the software license above. END OF TERMS AND CONDITIONS ================================================ FILE: NOTICE.txt ================================================ amazon-kinesis-aggregators Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # Amazon Kinesis Aggregators ---- *This project is now deprecated, and only updates for security vulnerabilities in dependencies will be made. We advise the use of Apache Flink on Amazon Kinesis Analytics instead.* ---- Amazon Kinesis Aggregators is a Java framework that enables the automatic creation of real-time aggregated time series data from Amazon Kinesis streams. You can use this data to answer questions such as ‘how many times per second has ‘x’ occurred’ or ‘what was the breakdown by hour over the day of the streamed data containing ‘y'. Using this framework, you simply describe the format of the data on your stream (CSV, JSON, and so on), the granularity of times series that you require (seconds, minutes, hours, and so on), and how the data elements that are streamed should be grouped; the framework handles all the time series calculations and data persistence. You then simply consume the time series aggregates in your application using Amazon DynamoDB, or interact with the time series using Amazon CloudWatch or the Web Query API. You can also analyze the data using Hive on Amazon Elastic MapReduce, or bulk import it to Amazon Redshift. The process runs as a standalone Amazon Kinesis-enabled application which only requires configuration, or can be integrated into existing Amazon Kinesis applications. The data is stored in a time series based on how you aggregate it. A dataset aggregating Telecoms Call Data Records in DynamoDB might look like this: ![Dynamo Real Time Aggregate Table](https://s3.amazonaws.com/amazon-kinesis-aggregators/img/DynamoTable.png) The corresponding data in CloudWatch would look like this: ![CloudWatch Dashboard View](https://s3.amazonaws.com/amazon-kinesis-aggregators/img/CloudWatch.png) ## Building Aggregators Amazon Kinesis Aggregators is built using Apache Maven. To build, simply run Maven from the amazon-kinesis-aggregators directory. The target directory contains the following build artifacts: * **amazon-kinesis-aggregators-.9.2.7.4.jar** - Includes no compiled dependencies * **AmazonKinesisAggregators.jar-complete.jar** - Includes all required dependencies * **AmazonKinesisAggregator.war** - The web application archive file ## Running Aggregators Amazon Kinesis Aggregators ships with several deployment options, which should enable you to run with minimal operational overhead while also accommodating advanced deployment use cases. You can run Amazon Kinesis Aggregators as: * A fully-managed Elastic Beanstalk application. All you need to do is deploy the KinesisAggregators.war file, and provide a configuration file that is accessible using HTTP. * A managed Java client, running through the host orchestration of your choice. For example, you can deploy this managed Java client as part of an Amazon EC2 fleet that uses Auto Scaling. * As part of an existing Amazon Kinesis-enabled application. This enables an existing application to 'sideload' aggregator processing, as an augmentation to an already-established application. ### Running Amazon Kinesis Aggregators Using Elastic Beanstalk Amazon Kinesis Aggregators compiles a web application archive (WAR) file, which enables easy deployment on Java application servers, such as Apache Tomcat, using Elastic Beanstalk (http://aws.amazon.com/elasticbeanstalk). Amazon Kinesis Aggregators also includes configuration options that instruct Elastic Beanstalk to scale the application on CPU load, which is typically the bottleneck for applications as they scale up. This is the recommended deployment method. To deploy Amazon Kinesis Aggregators as an Elastic Beanstalk application, start by creating a new Elastic Beanstalk web server application with the pre-configured Tomcat stack. When prompted by the AWS Management Console, upload the KinesisAggregators.war file from your local build. Select an instance type that is suitable for the type of aggregation that you are running (specifically, the higher the granularity of label items and the more fine-grained the TimeHorizon value, the larger the instance type you require). After deployment, click the URL for the application environment; the following message is displayed: ```OK - Kinesis Aggregators Managed Application hosted in Elastic Beanstalk Online ``` Furthermore, if you request a log snapshot from the Elastic Beanstalk console, you see a log line indicating the following: ```No Aggregators Configuration File found in Beanstalk Configuration config-file-url. Application is Idle``` This indicates that the application is deployed but not configured. To configure the application, add these Elastic Beanstalk configuration parameters as required: * **stream-name** - The name of the stream. * **application-name** - The name of the Amazon Kinesis application. * **failures-tolerated** - The number of worker exceptions allowed before the worker terminates. * **position-in-stream** - The position in the stream to start consuming data from. The possible values are 'LATEST' and 'TRIM_HORIZON'. * **max-records** - The maximum number of records to consume from a stream in a single cycle. You can set this value if your stream processing (in addition to aggregation) is slow. * **region** - The region to use for the stream, the DynamoDB lease tables, and the CloudWatch and aggregate data stores. Amazon Kinesis Aggregators does not currently support cross-region deployment. * **environment** - The name of the environment. This ensures that all DynamoDB tables are prefixed with the environment, enabling you to keep data sets separate for test and production (for example). * **config-file-url** - The URL for the configuration file. This is typically done by adding `-D` flags to the JVM command line options. Then, choose 'Save' and Elastic Beanstalk applies the changes to the environment. Wait a minute or so, and then snapshot logs to confirm that Amazon Kinesis Aggregators is running. ### Running the Managed Java Client Application This is a great option if you have data in Amazon Kinesis, but don’t want to use Elastic Beanstalk. You can start the application from a server using the following command: ```java -cp AmazonKinesisAggregators.jar-complete.jar -Dconfig-file-path= -Dstream-name= -Dapplication-name= -Dregion= com.amazonaws.services.kinesis.aggregators.consumer.AggregatorConsumer``` In addition to the configuration items outlined in the Elastic Beanstalk section, use the following configuration item: * **config-file-path** - The path to the configuration file. We recommend that you run your servers in an Auto Scaling group to ensure fault tolerance if the host fails. ### Configuration You can use the configuration file to create one or more aggregations against the same stream. It is a JSON file that creates a set of aggregator objects managed by the framework. Create one aggregator for each distinct label that you want to aggregate on. Each aggregator can then have its own properties of time granularity, aggregator type, and so on. The core structure of the configuration file is an array of aggregator objects. For example, the following configuration creates two aggregators: ```[{aggregatorDef1}, {aggregatorDef2}]``` Note that aggregatorDef*N* is an aggregator configuration. An aggregator configuration must include the following attributes: * **namespace** (String) - Enables you to create separate time series data stores. This namespace is used with the application name and environment to create the underlying data tables for the time series, as well as the namespace for custom CloudWatch metrics. Use something that's meaningful based upon the label and time granularity. * **labelItems** (array<String>) - Includes a list of the elements of the data stream to aggregate on. The data stored in the time series is aggregated by the unique values from the stream for these attributes, and by time. For instance, to aggregate data for searches made against a car website, you might have a label item set of ["Make","Model","Year"]. If you are using CSV data, then this same configuration might be positional based on the fields in the line, such as [0,3,5]. * **labelAttributeAlias** (String) - Enables you to name the target database attribute for the label. This is particularly useful when you are using CSV or regex-extracted data, and would otherwise end up with a label attribute named the same as the label attribute index. * **type** (enum) - The type of aggregation to run. The available types are 'COUNT' and 'SUM'. Counting aggregators simply counts the instances of unique values in Label Items by time. Using the previous example, it would generate a count of searches by configured time period for each unique combination of Make, Model, and Year. Building on this 'SUM' type, aggregators also calculate summaries of other numeric values on the stream. For more information, see the configuration option **'summaryItems**'. * **timeHorizons** (array<enum>) - Because the data is captured as a time series, you must tell the aggregator which definition of time you require. To have the data on the stream aggregated by minute, specify 'MINUTE'. To put data into buckets of 5 minutes duration, specify MINUTES_GROUPED(5). You can specify multiple timeHorizon values, and the aggregator automatically maintains the time series data at that granularity. A common configuration might be ["SECOND","HOUR","FOREVER"], which gives per-second aggregates, a rollup by Hour, and a simple data set to view everything that ever occurred in a single value. The possible values are: * SECOND * MINUTE * MINUTES\_GROUPED(int minutePeriod) - Groups data into time buckets using a minute period. For 4 buckets per hour, use '15', or use '5' for buckets of aggregation that are 5 minutes long. * HOUR * DAY * MONTH * YEAR * FOREVER - Rolls up everything that occurred in a single value '*'. * **dataExtractor** (enum) - Tells the aggregator how to parse and extract the Label Items from your stream. Currently, the following data formats are supported for external configurations using the configuration file: * **CSV** - Character-separated UTF-8 data. The default delimiter is a comma. To override the delimiter, set the configuration option 'delimiter' to the character value to use as the field terminator. Also, note that all data extractors support multi-value events. This means that you can have many CSV 'lines' within a single event, which are extracted with a line terminator of "\n". To override the line terminator on any data extractor that is text-based, set the configuration option 'lineTerminator' to the character to use as the line terminator. When this data extractor is used, indicate the Label Items using zero-index position values of the fields. * **JSON** - UTF-8 encoded JSON data. This data can either reside in a JSON array on the event (for example [{object1},{object2},{object3}]) or can be a single object per 'line' (for example {object1}\n {object2}). To control the object delimiter, use the configuration option 'lineTerminator'. * **REGEX** - UTF-8 encoded strings of arbitrary data. With this configuration option, you must include the 'regularExpression' configuration option. This data extraction method also uses zero-indexed positional values for Label Items. * **OBJECT** - Serialized objects using Jackson JSON binary data. With this configuration option, you must include the 'class' configuration option. Using this data extraction method, an event can include only 1 serialized object. * **dateItem** (String) - The attribute or field index that defines when the event occurred. This is used to generate the aggregate for the correct time period for the event. This can be formatted as a long value of epoch seconds, or a String value. If you provide a String value in the event, you must also set the configuration option 'dateFormat'. If it is omitted, then the timestamp of the event is set to the timestamp of the server instance when it processes the item. * **dateAttributeAlias** (String) - Similar to labelAttributeAlias, this enables you to set the name of the date attribute in the aggregated data table. * **dateFormat** (String) - The date format of the dateItem, using date format strings as specified at ```http://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html```. You can also include the following options in the configuration: * **summaryItems** (array<String>) - If the aggregator 'type' is SUM, then the aggregator automatically performs a time series aggregation on the summary items configured. These summary items must be numeric values that you want aggregated for the indicated time period and values of Label Items. For example, if your stream includes call data records, you might want to sum the duration of all calls made by mobile network by hour. Along with configuration of the Label Items and time, you would include a summary item of 'callDuration'. As with the configuration of the Label Items, summary items are zero-index positional values for CSV and regex data extractions, attribute names for JSON, and method names for OBJECT. For more information, see the Summary Items Mini-Language section. * **filterRegex** (String) - Filters the stream data using a filtering regular expression. If provided, only the data that conforms to the regular expression is passed in for subsequent parsing. Note this step is applied on the raw underlying stream data as String values, and is not available for object serialization. * **failOnDataExtraction** (boolean) - By default, the aggregator fails if it can't understand the data on the stream, to ensure that all events are properly accounted for. If you have a data stream that contains internally inconsistent data, and you want to perform a simple aggregation whenever you can successfully parse the data stream, set this value to 'true'. Alternatively, consider writing a **filterRegex** expression that extracts only the data that fits the configuration of Label, Date, and Summary Items. * **tableName** (String) - Sets the name of the underlying time series data table in the data store. * **environment** (String) - Runs an aggregator with a specified environment type. This enables you to separate the underlying data stores used for the time series data into production and test, for example. * **readIOPS** (int) - Use with the default DynamoDB IDataStore to set up the number of read I/O operations per second (IOPS) you want on the time series data store. * **writeIOPS** (int) - Use with the default DynamoDB IDataStore to set up how many write IOPS you want on the time series data store. * **IDataStore** (String) - Configures alternative backing data stores other than DynamoDB. If you have written your own data store implementation, specify the full class name, including the package, to have this data store used. You can also specify the internal alternate data store 'com.amazonaws.services.kinesis.aggregators.datastore.DevNullDataStore', which does NOT store the time series data, and is useful only to consume the time series from CloudWatch. * **emitMetrics** (boolean) - Emits the time series aggregated data as a custom CloudWatch domain of metrics. Set this value to 'true' to create a custom CloudWatch metric for the application name and namespace of the aggregator, with dimensions on the label and summary items. ### Summary Items Mini-Language You can configure summary items and the type of summary using a miniature specification language, and navigate complex document structures in JSON data. You can apply the following type of summary transformations: * **SUM** - Applies the default summary if you do not specify a summary type. This sums up all values seen for label and time values. * **MIN** - Calculates the minimum value observed for the time period and label values. * **MAX** - Calculates the maximum value observed for the time period and label values. * **FIRST** - Stores the first observed value for the time period and label values. * **LAST** - Is equal to the latest value for the time period and label values. Summary items can have aliases applied, as in SQL, to control the name of the generated attribute in the data store you write to. You simply add the name of the item you require to the definition of the summary item, including functions. You can also navigate an entity structure in a JSON-formatted stream data using dot notation; for example, given the following object, you can access the calculated duration using a summary item of 'timeValues.durations.calculated': ``` { "name": "Object To Be Aggregated", "timeValues": { "durations": { "calculated": 60, "recorded": 58 }, "endTime": "01/01/1970 01:00:00", "startTime": "01/01/1970 00:00:00" } } ``` These concepts can be combined into a mini-specification: Example 1 - Calculate the min, max, and sum of value 7 in a CSV stream, giving them friendly names - ```["min(7) min-purchase-price","max(7) max-purchase-price","sum(7) total-sales]"``` Example 2 - Calculate the sum and maximum value of the calculated duration in the JSON stream - ```["sum(timeValues.durations.calculated)","max(timeValues.durations.calculated)"]``` ### Sample Configurations * **JSON** - http://amazon-kinesis-aggregators.s3.amazonaws.com/sample/json-aggregator.json * **CSV** - http://amazon-kinesis-aggregators.s3.amazonaws.com/sample/csv-aggregator.json * **Regular Expression** - http://amazon-kinesis-aggregators.s3.amazonaws.com/sample/regex-aggregator.json * **Object Serialized Data** - http://amazon-kinesis-aggregators.s3.amazonaws.com/sample/object-aggregator.json ### Aggregator Data Structure The data structure for aggregated data is arranged as a hash/range table in DynamoDB on the Label attributes and Date attribute at the configured granularity of time. Every table also includes the following: * **eventCount** - The number of events consumed during the period. * **lastWriteSeq** - The last sequence value from the Amazon Kinesis stream that generated an update to the time period and aggregate label. * **lastWriteTime** - The time on the consumer application when the update was made to the aggregate data. * **scatterPrefix** - A random number between 0 and 99 used to ensure that there are no write bottlenecks on global secondary indexes for the time period and last write sequence. Of course, the table also includes any summary values that were added to the aggregator configuration. The format of these summary attributes in DynamoDB follow the pattern <attribute>-<summary type>, or use the alias provided. * For JSON streams, the attribute is the attribute name configured. * For object-serialized streams, the attribute is the summary method converted to a user-friendly name. For example 'getComputedValue' is written to the data store as 'computedValue'. * For CSV and String data parsed using regular expressions, the attribute value is the position in the stream, indexed from 0. * The summary type is one of the following values: MIN, MAX, SUM, FIRST, or LAST. #### Indexes All aggregator data stores have global secondary indexes (logically) on the date value and on lastWriteSeq. To ensure adequate write performance, these indexes are structured as hash/range on the scatterPrefix (a random number between 0 and 99) and the value is indexed. #### Web-based Query API The Amazon Kinesis Aggregators web application also provides several query API operations, which return data in the JSON format. When deployed, you can make an HTTP request to a variety of endpoints to retrieve different types of data. Currently, there is no security offered for the Web API operations, so you must ensure that they are only accessible from within your VPC using security group rules or similar. Do NOT make these endpoints publicly accessible. ##### Viewing the Running Configuration You can view the configuration of your aggregators at the URL ```/configuration```, which returns an object such as: ``` { "application-name": "EnergyRealTimeDataConsumer", "config-file-url": "s3://mybucket/kinesis/sensor-consumer-regex.json", "environment": null, "failures-tolerated": null, "max-records": "2500", "position-in-stream": "LATEST", "region": "eu-west-1", "stream-name": "EnergyPipelineSensors", "version": ".9.2.7.4" } ``` ##### Date-based Queries Use the Date query to find data that has been aggregated on the basis of the stream timestamp value. For example, use this interface to periodically retrieve all new data that has been processed, or to pull data for specific time ranges for comparative analysis. The URL is: ``` /dateQuery?params ``` Parameters: * **namespace** - The namespace for the aggregator configuration. * **operator** - The condition to query for, from the DynamoDB ComparisonOperator enum: EQ, GT, GE, and so on. Note that BETWEEN is not yet supported. * **granularity** – The granularity of time required, from the TimeHorizon enum: SECOND, MINUTE, HOUR, and so on. * **date–value** – The date value to query relative to, in yyyy-MM-dd+hh:mm:ss format (for example, 2014–09–01+18:00:00). This returns all data from the aggregated table for the date period specified. You can also use the internal Java API: ``` public List> queryByDate(Date dateValue, TimeHorizon h, ComparisonOperator comp, int threads) throws Exception ``` This method queries by the Date, TimeHorizon, and ComparisonOperator values you select. For example, to find all hourly aggregates after 3pm, use: ``` dateValue=Date('2014-01-01 15:00:00'), TimeHorizon.HOUR, ComparisonOperator.GT ``` The Threads parameter is the number of threads used to do the query. This is due to the index being organized on hash/range of scatterPrefix/DateValue. ##### Query for Label/Date Values To query the application to find the unique set of labels and date values that have been aggregated, use the following URL: ```/keyQuery?params``` Parameters: * **namespace** - The namespace for the aggregator configuration. * **scope** - Use 'HashKey' to get just the unique aggregate label values or 'HashAndRangeKey' to get both the label and date values. This returns a unique list of all keys from the aggregated table. You can also use the internal Java API: ``` public Map queryValue(String label, Date dateValue, TimeHorizon h) throws Exception ``` This method takes the label you are interested in, as well as a date for the date value. If you have multiple TimeHorizon values configured on the aggregator, it generates the correct dateValue to query the underlying table with. You are likely to use this interface to query across aggregator data stores looking for related time-based values. ## Integrating Aggregators into Existing Java Applications In addition to running aggregators as stand-alone Amazon Kinesis applications, you can integrate them into existing Amazon Kinesis applications. You can: * Run the managed consumer from an existing control environment * Inject a set of aggregators into a managed IRecordProcessorFactory * Use an existing IRecordProcessor to send data to one or more aggregators ### Managed IRecordProcessorFactory To build your Amazon Kinesis worker and configure it explicitly, you can still use aggregators to create IRecordProcessorFactory. In this case, simply create a new instance of com.amazonaws.services.kinesis.aggregators.processor.AggregatorProcessorFactory with the configured aggregators. ### Integration with Existing IRecordProcessors If you have an existing worker application and you simply want to add the aggregation capability, you can directly integrate with one or more aggregators. To do this, simply construct the aggregators using a configuration file, or using a pure Java configuration. Then, to inject new data into the aggregator, simply call: ```void aggregate(List records)``` This causes the time series calculations to be done based upon the configuration of the aggregators. Then, when your worker normally calls checkpoint(), also call: ```void checkpoint()``` This flushes the in-memory time series state to the backing data store. You must ensure that the aggregators are initialized correctly against the shard for the worker by calling this method in the existing KCL Application IRecordProcessor initialize() method: ```void initialize(String shardId)``` You must also ensure that if the shutdown() method is invoked on your Amazon Kinesis application, you call: ```void shutdown(boolean flushState)``` If the shutdown reason specified in the shutdown method for IRecordProcessor is ShutdownReason.ZOMBIE, set flushState to 'false' to allow the data to be re-aggregated by another worker. However, if the value is ShutdownReason.TERMINATE, you should flush the aggregator state on termination. ### Configuring Aggregators in Existing Applications There are a variety of ways to configure aggregators when you are integrating into existing applications. You might use a factory to create one or more aggregators from a simple set of arguments, or you can configure each aggregator directly and manage it as part of an aggregator group. #### Aggregator Factories There are a variety of aggregator factories available in the com.amazonaws.services.kinesis.aggregators.factory package, which generally map to the configuration types found in the configuration file. In fact, you can use configuration files to configure aggregators from Java using the following: ``` ExternallyConfiguredAggregatorFactory.buildFromConfig( String streamName, String applicationName, KinesisClientLibConfiguration config, String configFile) ``` You can also take advantage of aggregators that are specific to the type of data to be aggregated: ##### JSON Data ``` JsonAggregatorFactory.newInstance(String streamName , String appName , KinesisClientLibConfiguration config , String namespace , TimeHorizon timeHorizon , AggregatorType aggregatorType , List labelAttributes , String dateAttribute , String dateFormat , List summaryAttributes) ``` ##### CSV Data ``` CsvAggregatorFactory.newInstance(String streamName , String appName , KinesisClientLibConfiguration config , String namespace , TimeHorizon timeHorizon , AggregatorType aggregatorType , String delimiter , List labelIndicies , int dateIndex , String dateFormat , List summaryIndicies) ``` ##### String Data parsed with Regular Expressions ``` RegexAggregatorFactory.newInstance(String streamName , String appName , KinesisClientLibConfiguration config , String namespace , List timeHorizons , AggregatorType aggregatorType , String regularExpression , List labelIndicies , int dateIndex , String dateFormat , List summaryIndicies) ``` ##### Object Serialized Data You can generate aggregators for object-serialized data using annotations: ``` ObjectAggregatorFactory.newInstance(String streamName , String appName , KinesisClientLibConfiguration config , Class clazz) ``` Note that 'clazz' is a class that has been configured using annotations found in the com.amazonaws.services.kinesis.aggregators.annotations package. This factory method throws an error if the class is not annotated. Alternatively, you can configure the aggregator directly: ``` ObjectAggregatorFactory.newInstance(String streamName , String appName , KinesisClientLibConfiguration config , String namespace , List timeHorizons , AggregatorType aggregatorType , Class clazz , List labelMethods , String dateMethod , List summaryMethods) ``` #### Direct Configuration If you want even more control over the configuration of a given set of aggregators, then you can configure them directly. To effectively do this, you must understand how aggregators work. Aggregators are built around several subsystems that their factory methods configure automatically. When you build aggregators directly, you must construct an aggregator from its constituent subsystems. For more information, see the 'Extending Aggregators' section of this document. To configure an aggregator directly, you must configure two of the subsystems: the aggregator and the IDataExtractor that extracts the data from the stream. ##### IDataExtractor When you create an aggregator directly, you must specify the IDataExtractor to get data out of the stream for aggregation. There are IDataExtractors in the com.amazonaws.services.kinesis.aggregators.io package. Each of these map to the supported data formats, and provide relevant configuration options, including label, Date, and summary items. IDataExtractors use fluent builders for all optional configurations. For example, creating a JsonDataExtractor looks like this: ``` new JsonDataExtractor(labelAttributes) .withDateValueAttribute(dateAttribute) .withSummaryAttributes(summaryAttributes) .withDateFormat(dateFormat); ``` ##### Aggregator You then create the aggregator with the options that are specific to it, including KinesisClientLibConfiguration, required TimeHorizon values, and options for emitting metrics. For example, using the example JsonDataExtractor, you might configure the aggregator as follows: ``` return new StreamAggregator(streamName, appName, namespace, config, dataExtractor) .withTimeHorizon(timeHorizons) .withAggregatorType(aggregatorType) .withCloudWatchMetrics(true); ``` ## Extending Aggregators You might want to extend aggregators for a variety of reasons. The use cases that we know of today that will require extension include supporting data on a stream that is compressed, encrypted, and uses an object serialization format other than Jackson/JSON, or implementing large objects. We designed aggregators with extensibility in mind. You can extend the framework at the following integration points. ### Data Format & Handling The ability to support CSV, JSON, arbitrary string data and object serialization is provided by the IDataExtractor and IKinesisSerializer interfaces, residing at com.amazonaws.services.kinesis.aggregators.io and io.Serializer. #### IKinesisSerializer This interface interoperates between the internal data format used by IDataExtractors, and byte arrays are used on the stream. You implement IKinesisSerializer to support compressed stream data or if your data is encrypted, for example. The implementation would conform to the following interface, which is identical to the Amazon Kinesis Connector ITransformer class: ``` /** * Transforms data from a Record (byte array) to the data * model class (T) for processing in the application and from the data model * class to the output type (U) for the emitter. * * @param the data type stored in the record */ public interface IKinesisSerializer { /** * Transform the record into an object of its original class. * * @param record raw record from the stream * @return data using its original class * @throws IOException if it could not convert the record to a T */ public T toClass(InputEvent event) throws IOException; /** * Transform the record from its original class to a byte array. * * @param record data as its original class * @return a data byte array */ public U fromClass(T record) throws IOException; } ``` #### IDataExtractor IDataExtractors take the deserialized data and extract the relevant Label, Date, and Summary items. They also typically do any filtering that is exposed by the IDataExtractor. Implement a new IDataExtractor if the type of data returned by a custom IKinesisSerializer implementation is not compatible with the existing IDataExtractors in the io package. This new IDataExtractor would conform to: ``` /** * Enables pluggable data extractors for different types of * stream data. Aggregators use IDataExtractor to interoperate between the * stream data format and the internal format required for aggregation. * IDataExtractors likely use IKinesisSerializers to read and write to and from * the stream */ public interface IDataExtractor { /** * Gets the name of the label value to be extracted. * * @return */ public String getAggregateLabelName(); /** * Gets the name of the date value to be extracted. * * @return */ public String getDateValueName(); /** * Extracts one or more aggregatable items from a Amazon Kinesis record. * * @param event The Amazon Kinesis record from which we want to extract data. * @return A list of ExtractedData elements that have been resolved from * the input data. * @throws SerializationException */ public List getData(InputEvent event) throws SerialisationException; /** * Sets the type of aggregator that contains this IDataExtractor. Used to * boost efficiency in that the extractor will not extract summary items for * COUNT-based aggregator integration. * * @param type */ public void setAggregatorType(AggregatorType type); /** * Validates that the extractor is well formed. * * @throws Exception */ public void validate() throws Exception; /** * Gets the summary configuration that is driving data extraction against the * data stream. * * @return */ public SummaryConfiguration getSummaryConfig(); public IDataExtractor copy() throws Exception; } ``` Also note that an IDataExtractor returns multiple aggregatable objects from the stream. If you had a requirement to support M:N Kinesis Events to Aggregatable Events, an IDataExtractor could do the job using local state. Note that the IDataExtractor is STATEFUL for the life of an aggregator running on a shard, and contains the configuration of the data that is to be extracted. Because a new IDataExtractor is generated when a new aggregator is initialized on a shard, you must ensure that it is thread-safe and implement the copy() interface correctly to ensure that multiple instances can operate within a single JVM. ### Data Store The Amazon Kinesis Aggregators framework backs its data onto DynamoDB, and takes advantage of powerful DynamoDB features such as hash/range keys, atomic increment, and conditional updates. It also implements a defensive flush mechanism, which means that at any provisioned I/O rate, the aggregator can flush its state to DynamoDB without timing out. To extend aggregators with support for an alternate backing store, such as a relational database or Redis, implement com.amazonaws.services.kinesis.aggregators.datastore.IDataStore. This implementation must meet the following service levels: * Flushes all internal state to the data store in 5 minutes or less (this is due to the Amazon Kinesis worker timeout) * Supports a composite primary key for all label values and date value * Performs an atomic, transactional increment operation * Conditionally updates a discrete value in the table The implementation of a new IDataStore must conform to the following: ``` /** * Enables the in-memory cached aggregates * to be saved to a persistent store */ public interface IDataStore { /** * Writes a set of Update key/value pairs to the backing store * * @param data The input dataset to be updated * @return A data structure that maps a set of * AggregateAttributeModifications to the values that were * affected on the underlying data store, by UpdateKey * @throws Exception */ public Map> write( Map data) throws Exception; /** * Method called on creation of the IDataStore * * @throws Exception */ public void initialize() throws Exception; /** * Method that is periodically invoked to allow the IDataStore to * refresh tolerated limits for how often write() should be called * * @return * @throws Exception */ public long refreshForceCheckpointThresholds() throws Exception; /** * Sets the region for the IDataStore * * @param region */ public void setRegion(Region region); } ``` ### Metrics Service By default, Amazon Kinesis Aggregators integrates with CloudWatch for the purpose of metrics dashboards and alerts. However, you might want to push metrics to platforms such as Ganglia or New Relic. In these cases, you would provide an implementation of the com.amazonaws.services.kinesis.aggregators.metrics.IMetricsEmitter. This implementation would conform to the following: ``` /** * Provides classes that can write to metrics services. * Receives the output of the IDataStore modifications, and applies the data to * the metrics service. */ public interface IMetricsEmitter { /** * Emits a new set of metrics to the metrics service * * @param metricData Input Data to be intrumented * @throws Exception */ public void emit(Map> metricData) throws Exception; /** * Sets the region of the metrics service * * @param region */ public void setRegion(Region region); } ``` ---- Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/asl/ ================================================ FILE: assembly.xml ================================================ complete jar false / true runtime ================================================ FILE: dist/AmazonKinesisAggregators.jar-complete.jar ================================================ [File too large to display: 14.6 MB] ================================================ FILE: dist/AmazonKinesisAggregators.war ================================================ [File too large to display: 14.7 MB] ================================================ FILE: pom.xml ================================================ 4.0.0 com.amazonaws amazon-kinesis-aggregators .9.2.9 1.11.745 clean source:jar install assembly:assembly war:war src/main/java src/main/java **/*.properties sample/java sample/resources **/*.properties tst tst false **/*.java **/*.properties **/*.json maven-compiler-plugin 3.1 1.7 1.7 maven-war-plugin 2.3 src/main/WebContent true AmazonKinesisAggregators org.codehaus.mojo build-helper-maven-plugin 1.8 add-extra-source generate-sources add-source sample maven-assembly-plugin 2.1 AmazonKinesisAggregators.jar assembly.xml com.mycila.maven-license-plugin maven-license-plugin 1.8.0
com/amazonaws/services/kinesis/aggregators/license.txt
Ian Meyers 2014 meyersi@amazon.co.uk **/README **/license.txt src/test/resources/** src/main/resources/**
org.apache.maven.plugins maven-surefire-plugin 2.17 false 1 org.apache.maven.plugins maven-javadoc-plugin 2.9.1 private true
com.amazonaws aws-java-sdk-core ${sdk-version} com.amazonaws aws-java-sdk-dynamodb ${sdk-version} com.amazonaws aws-java-sdk-kinesis ${sdk-version} com.amazonaws aws-java-sdk-cloudwatch ${sdk-version} com.amazonaws aws-java-sdk-cloudwatchmetrics ${sdk-version} com.amazonaws amazon-kinesis-client 1.7.0 aws-java-sdk com.amazonaws commons-logging commons-logging 1.1.1 commons-httpclient commons-httpclient 3.1 commons-collections commons-collections 20040616 joda-time joda-time 2.2 javax.servlet javax.servlet-api 3.0.1 provided commons-io commons-io [2.7,) com.fasterxml.jackson.core jackson-core [2.9.10.1,) com.fasterxml.jackson.core jackson-databind [2.9.10.1,) Amazon Web Services UK Ltd
================================================ FILE: sample/bin/run-producer.sh ================================================ #!/bin/bash # # Amazon Kinesis Aggregators # # Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Amazon Software License (the "License"). # You may not use this file except in compliance with the License. # A copy of the License is located at # # http://aws.amazon.com/asl/ # # or in the "license" file accompanying this file. This file is distributed # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the License for the specific language governing # permissions and limitations under the License. # # Number of messages for this producer to create num_messages=$1 # Format should be one of 'json','csv', or 'string' format=$2 # Stream to write messages to stream=$3 # AWS Region Name to use, such as 'us-east-1' or 'eu-west=1'. US East is Default region=$4 java -cp ../../target/AmazonKinesisAggregators.jar-complete.jar producer.SensorReadingProducer $num_messages $format $stream $region ================================================ FILE: sample/java/model/SensorReading.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package model; import com.fasterxml.jackson.databind.ObjectMapper; public class SensorReading { private static ObjectMapper mapper = new ObjectMapper(); public enum OutputFormat { json, csv, string; } private OutputFormat outputAs = OutputFormat.json; private String id; private long captureTs; private String segment; private double lat; private double lng; private double pressure; private double temperature; private double flowRate; private double corrosionIndex; private double segmentIncline; private SensorReading() { } public SensorReading(String id, String segment, long captureTs, double lat, double lng, double pressure, double temperature, double flowRate, double corrosionIndex, double segmentIncline) { this.id = id; this.segment = segment; this.captureTs = captureTs; this.lat = lat; this.lng = lng; this.pressure = pressure; this.temperature = temperature; this.flowRate = flowRate; this.corrosionIndex = corrosionIndex; this.segmentIncline = segmentIncline; } public String getId() { return this.id; } public String getSegment() { return this.segment; } public long getCaptureTs() { return this.captureTs; } public double getLat() { return this.lat; } public double getLng() { return this.lng; } public double getPressure() { return this.pressure; } public double getTemp() { return this.temperature; } public double getFlowRate() { return this.flowRate; } public double getCorrosionIndex() { return this.corrosionIndex; } public double getSegmentIncline() { return this.segmentIncline; } public SensorReading withOutputFormat(OutputFormat format) { this.outputAs = format; return this; } public String asJson() throws Exception { return mapper.writeValueAsString(this); } public String asString() throws Exception { return String.format("%s (%s) ts-%s %sx%s %s at %s T:%s c:%10f deg%10f", this.id, this.segment, this.captureTs, this.lat, this.lng, this.pressure, this.flowRate, this.temperature, this.corrosionIndex, this.segmentIncline); } public String asCSV() throws Exception { return String.format("%s|%s|%s|%s|%s|%s|%s|%s|%10f|%10f", this.id, this.segment, this.captureTs, this.lat, this.lng, this.pressure, this.temperature, this.flowRate, this.corrosionIndex, this.segmentIncline); } @Override public String toString() { try { switch (this.outputAs) { case string: return this.asString(); case csv: return this.asCSV(); default: return this.asJson(); } } catch (Exception e) { e.printStackTrace(); return null; } } } ================================================ FILE: sample/java/model/SensorState.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package model; public class SensorState { private String segment; private double lat; private double lng; private double pressure; private double flowRate; private double temp; private double corrosion; private double incline; public SensorState(String segment, double lat, double lng, double pressure, double flowRate, double temp, double corrosion, double incline) { this.segment = segment; this.lat = lat; this.lng = lng; this.pressure = pressure; this.flowRate = flowRate; this.temp = temp; this.corrosion = corrosion; this.incline = incline; } public String getSegment() { return segment; } public double getLat() { return lat; } public double getLng() { return lng; } public double getPressure() { return pressure; } public double getFlowRate() { return flowRate; } public double getTemp() { return temp; } public double getCorrosion() { return corrosion; } public double getIncline() { return incline; } } ================================================ FILE: sample/java/producer/SensorReadingProducer.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package producer; import java.nio.ByteBuffer; import java.security.SecureRandom; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import model.SensorReading; import model.SensorReading.OutputFormat; import model.SensorState; import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; import com.amazonaws.regions.Region; import com.amazonaws.regions.Regions; import com.amazonaws.services.kinesis.AmazonKinesis; import com.amazonaws.services.kinesis.AmazonKinesisClient; import com.amazonaws.services.kinesis.model.ProvisionedThroughputExceededException; import com.amazonaws.services.kinesis.model.PutRecordRequest; public class SensorReadingProducer { private final Random rand = new SecureRandom(); private final int ID_SPACE_SIZE = 1000; private final int NUM_SEGMENTS = 40; private final double PRESSURE_BASE = 108D; private final double PRESSURE_VOLATILITY = 3D; private final double FLOW_BASE = 1D; private final double FLOW_VOLATILITY = 1D; private final double TEMP_BASE = 16D; private final double TEMP_VOLATILITY = .2D; private final double INCLINE_BASE = 0D; private final double CORROSION_BASE = .0000234D; private final int BATCH_SIZE = 20; private final int BACKOFF = 5; private int sensorsGenerated = 0; private Map sensorCache = new HashMap<>(); final double londonLat = 51.50722D; final double londonLng = -0.12750D; final double aberdeenLat = 57.1436900D; final double aberdeenLng = -2.0981400D; final double lineATan = Math.atan((aberdeenLng - londonLng) / (aberdeenLat - londonLat)); final double lineLength = Math.sqrt((Math.pow(aberdeenLng - londonLng, 2)) + (Math.pow(aberdeenLat - londonLat, 2))); final double lineIncrement = lineLength / NUM_SEGMENTS; public SensorReadingProducer() { } public double[] getLinePoint() { // random distance double dist = Math.random() * lineLength; // calculate which segment the point is in double seg = Math.floor(dist / lineIncrement) + 1; // derive lat/lng double lat = (dist / Math.cos(lineATan)) + londonLat; double lng = (dist * Math.sin(lineATan)) + londonLng; return new double[] { lat, lng, seg }; } public SensorReading nextSensorReading(final OutputFormat format) { return nextSensorReading(format, rand.nextInt(ID_SPACE_SIZE)); } public SensorReading nextSensorReading(final OutputFormat format, int position) { String id = Integer.toHexString(position); SensorState sensorState = sensorCache.get(id); if (sensorState == null) { sensorsGenerated++; System.out.println(String.format("Generating Sensor %s", sensorsGenerated)); double[] location = getLinePoint(); String segment = Integer.toHexString(new Double(location[2]) .intValue()); double pressure = PRESSURE_BASE + (PRESSURE_VOLATILITY * rand.nextDouble()); double flow = FLOW_BASE + (FLOW_VOLATILITY * rand.nextDouble()); double temp = TEMP_BASE + (TEMP_VOLATILITY * rand.nextDouble()); double corrosion = CORROSION_BASE + (rand.nextDouble() / 1000); double incline = INCLINE_BASE + (rand.nextDouble() / 1_000_000); sensorState = new SensorState(segment, location[0], location[1], pressure, flow, temp, corrosion, incline); sensorCache.put(id, sensorState); } double pressure = sensorState.getPressure() + (PRESSURE_VOLATILITY * rand.nextDouble()); double temp = sensorState.getTemp() + (TEMP_VOLATILITY * rand.nextDouble()); double flow = sensorState.getFlowRate() + (FLOW_VOLATILITY * rand.nextDouble()); double corrosion = sensorState.getCorrosion() + (rand.nextDouble() / 1000); double incline = sensorState.getIncline() + (rand.nextDouble() / 1_000_000); SensorReading reading = new SensorReading(id, sensorState.getSegment(), System.currentTimeMillis(), sensorState.getLat(), sensorState.getLng(), pressure, temp, flow, corrosion, incline); reading.withOutputFormat(format); return reading; } private void run(final int events, final OutputFormat format, final String streamName, final String region) throws Exception { AmazonKinesis kinesisClient = new AmazonKinesisClient( new DefaultAWSCredentialsProviderChain()); kinesisClient.setRegion(Region.getRegion(Regions.fromName(region))); int count = 0; SensorReading r = null; do { r = nextSensorReading(format); try { PutRecordRequest req = new PutRecordRequest() .withPartitionKey("" + rand.nextLong()) .withStreamName(streamName) .withData(ByteBuffer.wrap(r.toString().getBytes())); kinesisClient.putRecord(req); } catch (ProvisionedThroughputExceededException e) { Thread.sleep(BACKOFF); } System.out.println(r); count++; } while (count < events); } public static void main(String[] args) throws Exception { Integer i = Integer.parseInt(args[0]); OutputFormat format = OutputFormat.valueOf(args[1]); String streamName = args[2]; String region = args[3]; new SensorReadingProducer().run(i, format, streamName, region); } } ================================================ FILE: sample/resources/BySegment-CSV.json ================================================ [{"dataExtractor": "CSV", "dateFormat": "", "dateItem": 2, "dateAttributeAlias":"sensorTS", "labelItems": [ 1 ], "labelAttributeAlias":"segment", "namespace": "BySegment-CSV", "delimiter": "|", "summaryItems": [ "max(5) max-pressure", "max(6) max-flow", "max(9) max-corrosion", "min(3) lat", "min(4) lng" ], "timeHorizons": [ "SECOND" ], "type": "SUM", "emitMetrics":"true", "writeIOPS":100, "readIOPS":25 } ] ================================================ FILE: sample/resources/BySegment-Json.json ================================================ [{"dataExtractor": "JSON", "dateFormat": "", "dateAttribute": "captureTs", "labelItems": [ "segment" ], "namespace": "BySegment-Json", "summaryItems": [ "max(pressure)", "max(flowRate)", "max(corrosionIndex)", "min(lat) lat", "min(lng) lng" ], "timeHorizons": [ "SECOND" ], "type": "SUM", "emitMetrics":"true", "writeIOPS":100, "readIOPS":25 } ] ================================================ FILE: sample/resources/BySegment-Regex.json ================================================ [{"dataExtractor": "REGEX", "dateFormat": "", "dateItem": 2, "dateAttributeAlias":"sensorTS", "labelItems": [ 1 ], "labelAttributeAlias":"segment", "namespace": "BySegment-Regex", "regularExpression": "^(.*) \\((.*)\\) ts-(\\d+) (\\d+\\.\\d+)x(\\-\\d+\\.\\d+) (\\d+.\\d+) at (\\d+.\\d+) T:(\\d+\\.\\d+) c:\\ +(\\d+\\.\\d+) deg\\ +(\\d+\\.\\d+)$", "summaryItems": [ "max(5) max-pressure", "max(6) max-flow", "max(9) max-corrosion", "min(3) lat", "min(4) lng" ], "timeHorizons": [ "SECOND" ], "type": "SUM", "emitMetrics":"true", "writeIOPS":100, "readIOPS":25 } ] ================================================ FILE: src/.gitkeep ================================================ Feel free to delete this file as soon as actual Java code is added to this directory. ================================================ FILE: src/log4j.properties ================================================ # Root logger option log4j.rootLogger=INFO, stdout # Direct log messages to stdout log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.Target=System.out log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n ================================================ FILE: src/main/WebContent/.ebextensions/as.config ================================================ option_settings: - namespace: aws:autoscaling:asg option_name: MinSize value: 2 - namespace: aws:autoscaling:trigger option_name: MeasureName value: CPUUtilization - namespace: aws:autoscaling:trigger option_name: LowerThreshold value: 40 - namespace: aws:autoscaling:trigger option_name: UpperThreshold value: 90 ================================================ FILE: src/main/WebContent/META-INF/MANIFEST.MF ================================================ Manifest-Version: 1.0 Class-Path: ================================================ FILE: src/main/WebContent/WEB-INF/web.xml ================================================ KinesisAggregatorsBeanstalkApplication index.html com.amazonaws.services.kinesis.aggregators.app.AggregatorsBeanstalkApp DateQuery com.amazonaws.services.kinesis.aggregators.app.DateQueryServlet DateQuery /dateQuery KeyQuery com.amazonaws.services.kinesis.aggregators.app.ListAggregateKeysServlet KeyQuery /keyQuery LabelQuery com.amazonaws.services.kinesis.aggregators.app.QueryByLabelServlet LabelQuery /labelQuery ConfigParams com.amazonaws.services.kinesis.aggregators.app.FetchConfigurationServlet ConfigParams /configParams Configuration com.amazonaws.services.kinesis.aggregators.app.ShowConfigurationServlet Configuration /configuration ConfigFile com.amazonaws.services.kinesis.aggregators.app.ShowConfigFileServlet ConfigFile /configFile ================================================ FILE: src/main/WebContent/index.html ================================================ Kinesis Aggregators Managed Application in Elastic Beanstalk OK - Kinesis Aggregators Managed Application hosted in Elastic Beanstalk Online ================================================ FILE: src/main/WebContent/styles/styles.css ================================================ /************************************* GENERAL *************************************/ body { margin: 0; padding: 0; font: 12px/1.4em "Lucida Grande", Verdana, sans-serif; color: #333; overflow-y: scroll; text-rendering: optimizeLegibility; background-color: #d5e9ed; } h2 { font-size: 1.3em; line-height: 1.5em; font-weight: bold; margin: 20px 0 0 0; padding: 0; border-bottom: 3px solid #eee; /* icon setup */ padding: 0.2em 1em 0.2em 30px; background-position: 0 50%; background-repeat: no-repeat; } /************************************* SECTIONS *************************************/ div#content { margin: 30px auto; padding: 0 30px 15px 30px; background-color: #fff; width: 940px; /* box-shadow */ -moz-box-shadow: 0 5px 10px #aaa; -webkit-box-shadow: 0 5px 10px #aaa; box-shadow: 0 5px 10px #aaa; /* bottom corners */ -webkit-border-bottom-right-radius: 7px; -webkit-border-bottom-left-radius: 7px; -moz-border-radius-bottomright: 7px; -moz-border-radius-bottomleft: 7px; border-bottom-right-radius: 7px; border-bottom-left-radius: 7px; } /*div#content div.section {}*/ div#content div.section ul { margin: 0; padding: 1em 0 0 2em; overflow: hidden; } div#content div.section ul li { list-style-type: square; white-space: nowrap; line-height: 1.5em; } /* Section titles */ div#content div.section.s3 h2 { background-image: url(../images/drive.png); } div#content div.section.ec2 h2 { background-image: url(../images/server.png); } div#content div.section.sdb h2 { background-image: url(../images/database.png); } /************************************* CONTAINERS *************************************/ .container { zoom: 1; } .container:after { content: "."; display: block; height: 0; clear: both; visibility: hidden; } /************************************* GRIDS *************************************/ .grid { float: left; margin-right: 20px; } .gridlast { margin-right: 0; } .grid5 { width: 300px; } .grid15 { width: 940px; } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/AggregateData.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators; import java.util.Date; import java.util.Map; public class AggregateData { private String uniqueId; private LabelSet labels; private Date date; private Map summaries; public AggregateData(String uniqueId, LabelSet labels, Date date, Map summaries) { this.uniqueId = uniqueId; this.labels = labels; this.date = date; this.summaries = summaries; } public String getUniqueId() { return this.uniqueId; } public String getLabel() { return this.labels.valuesAsString(); } public LabelSet getLabels() { return this.labels; } public Date getDate() { return this.date; } public Map getSummaries() { return this.summaries; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/AggregatorGroup.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators; import java.util.ArrayList; import java.util.List; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.services.kinesis.model.Record; /** * Class which provides a simple automation around a number of aggregators. * Register any number of aggregators with the container, and then call all of * the registered aggregators aggregate and checkpoint methods through this * simple proxy */ public class AggregatorGroup implements IStreamAggregator { List aggregators = new ArrayList<>(); public AggregatorGroup() { } public AggregatorGroup(AggregatorGroup template) throws Exception { // create a new aggregator group from all of the aggregators this one // encapsulates, by instantiating new aggregators with their copy // constructors for (StreamAggregator agg : template.aggregators) { this.registerAggregator(new StreamAggregator(agg)); } } public void registerAggregator(StreamAggregator agg) { this.aggregators.add(agg); } public List getAggregators() { return this.aggregators; } /** * {@inheritDoc} */ @Override public void aggregate(List records) throws Exception { for (IStreamAggregator agg : aggregators) { agg.aggregate(records); } } public void aggregateEvents(List events) throws Exception { for (IStreamAggregator agg : aggregators) { agg.aggregateEvents(events); } } /** * {@inheritDoc} */ @Override public void checkpoint() throws Exception { for (IStreamAggregator agg : aggregators) { agg.checkpoint(); } } @Override public void initialize(String shardId) throws Exception { for (IStreamAggregator agg : aggregators) { agg.initialize(shardId); } } /** * {@inheritDoc} */ @Override public void shutdown(boolean flushState) throws Exception { for (IStreamAggregator agg : aggregators) { agg.shutdown(flushState); } } /** * N/A - use getTableNames() */ @Override public String getTableName() { return null; } public List getTableNames() { List out = new ArrayList<>(this.aggregators.size()); for (IStreamAggregator i : this.aggregators) { out.add(i.getTableName()); } return out; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/AggregatorType.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators; /** * Types of Aggregators supported by the Kinesis Aggregator Framework. */ public enum AggregatorType { /** * Count Aggregators maintain only an Event Count observed for the indicated * {@link com.amazonaws.services.kinesis.aggregators.TimeHorizon} */ COUNT, /** * Sum Aggregators maintain an Event Count, plus a set of summary values for * data indicated on the stream as being a summary value. Summary Values can * be any of * {@link com.amazonaws.services.kinesis.aggregators.SummaryCalculation} */ SUM; } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/AggregatorsConstants.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators; public class AggregatorsConstants { public static final String CONFIG_URL_PARAM = "config-file-url"; public static final String CONFIG_PATH_PARAM = "config-file-path"; public static final String STREAM_NAME_PARAM = "stream-name"; public static final String NAMESPACE_PARAM = "namespace"; public static final String APP_NAME_PARAM = "application-name"; public static final String REGION_PARAM = "region"; public static final String STREAM_POSITION_PARAM = "position-in-stream"; public static final String MAX_RECORDS_PARAM = "max-records"; public static final String ENVIRONMENT_PARAM = "environment"; public static final String FAILURES_TOLERATED_PARAM = "failures-tolerated"; } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/EnvironmentType.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators; public enum EnvironmentType { DEV, TEST, INT, PERF, PROD; } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/IStreamAggregator.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators; import java.util.List; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.services.kinesis.model.Record; /** * Stream aggregators give end users the ability to dynamically aggregate * Kinesis data in Dynamo DB. All a consumer must do is create an Aggregator and * then call aggregate within the processRecords method of an IRecordProcessor. * Please note all writes made within the aggregate context are durable. * * @author meyersi */ public interface IStreamAggregator { /** * Aggregate a set of records received from the Kinesis Client Library. * * @param records The set of Records received from a processRecords * invocation * @throws Exception */ public void aggregate(List records) throws Exception; public void aggregateEvents(List events) throws Exception; /** * Commit all aggregated data to the backing store. */ public void checkpoint() throws Exception; /** * Initialise the Aggregator on a shard. Should be called by * IRecordProcessor.initialize(). */ public void initialize(String shardId) throws Exception; /** * Terminate an Aggregator running, which will mark the process as offline * in the {@link InventoryModel} table. */ public void shutdown(boolean flushState) throws Exception; /** Get the underlying data store name for the aggregator. */ public String getTableName(); } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/InputEvent.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators; import com.amazonaws.services.kinesis.model.Record; public class InputEvent { private String sequenceNumber; private String partitionKey; private byte[] data; public InputEvent(Record record) { this.sequenceNumber = record.getSequenceNumber(); this.partitionKey = record.getPartitionKey(); this.data = record.getData().array(); } public InputEvent withSequence(String sequence) { this.sequenceNumber = sequence; return this; } public InputEvent withPartitionKey(String partitionKey) { this.partitionKey = partitionKey; return this; } public String getSequenceNumber() { return sequenceNumber; } public String getPartitionKey() { return partitionKey; } public byte[] getData() { return data; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/InventoryModel.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient; import com.amazonaws.services.dynamodbv2.model.AttributeAction; import com.amazonaws.services.dynamodbv2.model.AttributeDefinition; import com.amazonaws.services.dynamodbv2.model.AttributeValue; import com.amazonaws.services.dynamodbv2.model.AttributeValueUpdate; import com.amazonaws.services.dynamodbv2.model.DeleteItemRequest; import com.amazonaws.services.dynamodbv2.model.GetItemResult; import com.amazonaws.services.dynamodbv2.model.KeySchemaElement; import com.amazonaws.services.dynamodbv2.model.KeyType; import com.amazonaws.services.dynamodbv2.model.UpdateItemRequest; import com.amazonaws.services.kinesis.aggregators.datastore.DynamoUtils; /** * Class used to provide configuration and setup for the Worker Inventory table * in Dynamo DB. */ @SuppressWarnings("serial") public final class InventoryModel { private boolean online = false; /** * Name of the Table in Dynamo DB. */ public static final String TABLE_NAME = "KinesisAggregatorWorkerState"; /** * Column name used to store the Kinesis Stream Name for an Aggregator. */ public static final String AGGREGATOR = "aggregator"; /** * Column name used to store the Shard ID for an Aggregator. */ public static final String SHARD_ID = "shardId"; /** * Column name used to store the last time an Aggregator updated. */ public static final String LAST_WRITE_TIME = "lastWriteTime"; /** * Column name used to store the lowest sequence value updated in the last * flush of an Aggregator. */ public static final String LAST_LOW_SEQ = "lastLowSeq"; /** * Column name used to store the highest sequence value updated in the last * flush of an Aggregator. */ public static final String LAST_HIGH_SEQ = "lastHighSeq"; /** * Column name used to store the status of the running or stopped * Aggregator. */ public static final String STATUS = "status"; /** * Amount of read IOPS to provision for the Inventory table. */ public static final long READ_CAPACITY = 10L; /** * Amount of write IOPS to provision for the Inventory table. */ public static final long WRITE_CAPACITY = 10L; /** * Available states for an Aggregator to be in. */ public static enum STATE { STARTING, RUNNING, STOPPED, SERIALISATION_ERROR, UNKNOWN_ERROR; } private AmazonDynamoDB dynamoClient; public InventoryModel(AmazonDynamoDB dynamoClient) throws Exception { this.dynamoClient = dynamoClient; init(); } public InventoryModel(AWSCredentialsProvider credentials) throws Exception { this(new AmazonDynamoDBClient(credentials)); } protected void init() throws Exception { List attributes = new ArrayList() { { add(new AttributeDefinition().withAttributeName(InventoryModel.AGGREGATOR).withAttributeType( "S")); add(new AttributeDefinition().withAttributeName(InventoryModel.SHARD_ID).withAttributeType( "S")); } }; List key = new ArrayList() { { add(new KeySchemaElement().withAttributeName(InventoryModel.AGGREGATOR).withKeyType( KeyType.HASH)); add(new KeySchemaElement().withAttributeName(InventoryModel.SHARD_ID).withKeyType( KeyType.RANGE)); } }; DynamoUtils.initTable(dynamoClient, InventoryModel.TABLE_NAME, InventoryModel.READ_CAPACITY, InventoryModel.WRITE_CAPACITY, attributes, key, null); online = true; } private Map getKey(final String streamName, final String applicationName, final String namespace, final String shardId) { return new HashMap() { { put(InventoryModel.AGGREGATOR, new AttributeValue().withS(String.format("%s.%s.%s", streamName, applicationName, namespace))); put(InventoryModel.SHARD_ID, new AttributeValue().withS(shardId)); } }; } public void removeState(final String streamName, final String applicationName, final String namespace, final String shardId) throws Exception { DeleteItemRequest req = new DeleteItemRequest().withTableName(TABLE_NAME).withKey( getKey(streamName, applicationName, namespace, shardId)); dynamoClient.deleteItem(req); } /** * Update the Inventory table with the state of an Aggregator. * * @param streamName The Kinesis Stream being aggregated. * @param applicationName The application name running the aggregator. * @param workerId The worker ID which encapsulates an instance of an * Aggregator. * @param lastLowSeq The lowest sequence number observed in all records * which were flushed prior to this update. * @param lastHighSeq The highest sequence number for all records flushed in * this update. * @param lastWriteTime The write time of the data to Dynamo DB. * @param status The {@link STATE} of the Aggregator. * @throws Exception */ public void update(final String streamName, final String applicationName, final String namespace, final String shardId, final String lastLowSeq, final String lastHighSeq, final long lastWriteTime, final STATE status) throws Exception { // create the last write time value final String lastUpdateDateLabel = StreamAggregator.dateFormatter.format(new Date( lastWriteTime)); // generate the item update Map inventoryUpdate = new HashMap() { { put(InventoryModel.LAST_WRITE_TIME, new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue( new AttributeValue().withS(lastUpdateDateLabel))); if (lastLowSeq != null) put(InventoryModel.LAST_LOW_SEQ, new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue( new AttributeValue().withS(lastLowSeq))); if (lastHighSeq != null) put(InventoryModel.LAST_HIGH_SEQ, new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue( new AttributeValue().withS(lastHighSeq))); if (status != null) put(InventoryModel.STATUS, new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue( new AttributeValue().withS(status.name()))); } }; DynamoUtils.updateWithRetries( dynamoClient, new UpdateItemRequest().withTableName(InventoryModel.TABLE_NAME).withKey( getKey(streamName, applicationName, namespace, shardId)).withAttributeUpdates( inventoryUpdate)); } /** * Method which returns the update information for an Aggregator process. * * @param streamName The Stream name which is being aggregated. * @param applicationName The application which is hosting the aggregator. * @param workerId The worker ID which is running an aggregator instance. * @return Tuple of Last Write Time (String), Last Low Sequence, and Last * High Sequence */ public InventoryStatus getLastUpdate(final String streamName, final String applicationName, final String namespace, final String shardId) { GetItemResult response = dynamoClient.getItem(InventoryModel.TABLE_NAME, getKey(streamName, applicationName, namespace, shardId)); if (response.getItem() != null) { Map item = response.getItem(); AttributeValue lastTime, lowSeq, highSeq = null; lastTime = item.get(InventoryModel.LAST_WRITE_TIME); lowSeq = item.get(InventoryModel.LAST_LOW_SEQ); highSeq = item.get(InventoryModel.LAST_HIGH_SEQ); return new InventoryStatus(lastTime == null ? null : lastTime.getS(), lowSeq == null ? null : lowSeq.getS(), highSeq == null ? null : highSeq.getS()); } else { return null; } } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/InventoryStatus.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators; public class InventoryStatus { private String lastTime, lowSeq, highSeq; public InventoryStatus(String lastTime, String lowSeq, String highSeq) { super(); this.lastTime = lastTime; this.lowSeq = lowSeq; this.highSeq = highSeq; } public String getLastTime() { return lastTime; } public String getLowSeq() { return lowSeq; } public String getHighSeq() { return highSeq; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/LabelSet.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators; import java.util.LinkedHashMap; import java.util.List; /** * Object which encapsulates all label values to be tracked and managed within * the Aggregator framework. Adheres to the properties of strict ordering by * insert sequence, equality on the basis of values as well as hash on the basis * of values, and a name property synthesized from the keyset, and a String * value synthesized from the value set */ public class LabelSet extends LinkedHashMap { private final String setDelimiter = "."; private String alias = null; public LabelSet() { super(); } public static LabelSet fromIntegerKeys(List keys) { LabelSet labels = new LabelSet(); for (Integer i : keys) { labels.put("" + i, null); } return labels; } public static LabelSet fromStringKeys(List keys) { LabelSet labels = new LabelSet(); for (String s : keys) { labels.put(s, null); } return labels; } @Override public String put(String key, String value) { // wrap general map put with internal pre-processing of names return super.put(StreamAggregatorUtils.methodToColumn(key), value); } public String valuesAsString() { StringBuffer sb = new StringBuffer(); for (String s : this.values()) { sb.append(s + setDelimiter); } return sb.substring(0, sb.length() - 1); } public String getName() { if (this.alias == null) { StringBuffer sb = new StringBuffer(); for (String s : this.keySet()) { sb.append(StreamAggregatorUtils.methodToColumn(s) + setDelimiter); } return sb.substring(0, sb.length() - 1); } else { return this.alias; } } public LabelSet withAlias(String alias) { this.alias = alias; return this; } @Override public boolean equals(Object o) { if (o == null) return false; if (!(o instanceof LabelSet)) return false; LabelSet other = (LabelSet) o; boolean matched = false; // match on keys for (String s : this.keySet()) { matched = false; for (String k : other.keySet()) { if (k.equals(s)) { matched = true; break; } } if (!matched) return false; } // must match on values for (String t : this.values()) { matched = false; for (String v : other.values()) { if (t.equals(v)) { matched = true; break; } } if (!matched) return false; } return true; } @Override public int hashCode() { int res = 17; for (String s : this.keySet()) { res = 31 * res + (s == null ? 0 : s.hashCode()); } for (String t : this.values()) { res = 31 * res + (t == null ? 0 : t.hashCode()); } return res; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/StreamAggregator.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators; import java.math.BigInteger; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.ClientConfiguration; import com.amazonaws.regions.Region; import com.amazonaws.regions.Regions; import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; import com.amazonaws.services.dynamodbv2.AmazonDynamoDBAsyncClient; import com.amazonaws.services.dynamodbv2.model.AttributeValue; import com.amazonaws.services.dynamodbv2.model.ComparisonOperator; import com.amazonaws.services.dynamodbv2.model.Condition; import com.amazonaws.services.kinesis.AmazonKinesisClient; import com.amazonaws.services.kinesis.aggregators.cache.AggregateCache; import com.amazonaws.services.kinesis.aggregators.datastore.DynamoDataStore; import com.amazonaws.services.kinesis.aggregators.datastore.DynamoQueryEngine.QueryKeyScope; import com.amazonaws.services.kinesis.aggregators.datastore.IDataStore; import com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException; import com.amazonaws.services.kinesis.aggregators.exception.SerializationException; import com.amazonaws.services.kinesis.aggregators.idempotency.DefaultIdempotencyCheck; import com.amazonaws.services.kinesis.aggregators.idempotency.IIdempotencyCheck; import com.amazonaws.services.kinesis.aggregators.metrics.CloudWatchMetricsEmitter; import com.amazonaws.services.kinesis.aggregators.metrics.IMetricsEmitter; import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration; import com.amazonaws.services.kinesis.io.IDataExtractor; import com.amazonaws.services.kinesis.model.Record; /** * StreamAggregator is the main implementation of the Kinesis Aggregators * framework. It provides the ability to create dynamic aggregations in Dynamo * DB for data being streamed through Kinesis. Objects are aggregated on the * basis of the unique values contained in the Aggregate Label by Date, storing * event counts. Additionally, by configuring a set of summary values on the * StreamAggregator with AggregatorType set to SUM, an additional data element * is aggregated tracking the total value observed for each element of the * stream.Data in DynamoDB is aggregated on the basis of the configured * TimeHorizon, from granularity of SECOND to FOREVER. * * @author meyersi */ public class StreamAggregator implements IStreamAggregator { public static final String AWSApplication = "AmazonKinesisAggregators"; public static final String version = ".9.2.7.3"; /** * The default column name for the aggregated value, if none is provided. */ public static final String DEFAULT_AGGREGATE_COLUMN = "aggregatedValue"; /** * The default attribute name for the date value of an aggregate, if none is * provided. */ public static final String DEFAULT_DATE_VALUE = "dateValue"; /** * The default attribute name for the count of events observed for an * aggregate value and date, if none is provided. */ public static final String EVENT_COUNT = "eventCount"; /** * The attribute name for the time horizon marker */ public static final String TIME_HORIZON_ATTR = "timeHorizonType"; /** * The attribute name used for the last write sequence value in the table. */ public static final String LAST_WRITE_SEQ = "lastWriteSeq"; /** * The attribute name used for the timestamp of the update of the aggregate. */ public static final String LAST_WRITE_TIME = "lastWriteTime"; /** * The attribute used to refer to the partition key */ public static final String REF_PARTITION_KEY = "__partition_key"; /** * The attribute used to refer to the event sequence number */ public static final String REF_SEQUENCE = "__sequence"; public static final SimpleDateFormat dateFormatter = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss"); protected String namespace; private KinesisClientLibConfiguration config; private String environment; private AmazonDynamoDB dynamoClient; private AmazonKinesisClient kinesisClient; private InventoryModel inventory; protected String tableName; protected boolean withTimeHierarchy = false; protected List timeHorizons = new ArrayList<>(); protected AggregatorType aggregatorType = AggregatorType.COUNT; protected long readCapacity; protected long writeCapacity; protected final String streamName; protected final String applicationName; protected String shardId = null; private boolean isFirstShardWorker = false; private final Log LOG = LogFactory.getLog(StreamAggregator.class); private Region region = null; protected AggregateCache cache; protected boolean online = false; protected String lowSeq; protected BigInteger highSeq = null; protected long start; private IDataExtractor dataExtractor; private IDataStore dataStore; private IIdempotencyCheck idempotencyCheck = new DefaultIdempotencyCheck(); private IMetricsEmitter metricsEmitter; private boolean raiseExceptionOnDataExtractionErrors = true; private int ignoredRecordsBelowHWM = 0; private boolean publishMetrics = false; /** * Copy Constructor * * @param template */ public StreamAggregator(StreamAggregator template) throws Exception { this.streamName = template.streamName; this.applicationName = template.applicationName; this.namespace = template.namespace; this.config = template.config; this.dataExtractor = template.dataExtractor.copy(); this.withDataStore(template.getDataStore()); this.withAggregatorType(template.aggregatorType); this.withRaiseExceptionOnDataExtractionErrors(template.raiseExceptionOnDataExtractionErrors); this.withStorageCapacity(template.readCapacity, template.writeCapacity); this.withTableName(template.tableName); this.withTimeHorizon(template.timeHorizons); this.withIdempotencyCheck(template.idempotencyCheck); if (template.publishMetrics) { this.publishMetrics = true; this.metricsEmitter = template.metricsEmitter; } } public StreamAggregator(String streamName, String applicationName, String namespace, KinesisClientLibConfiguration config, IDataExtractor dataExtractor) { this.streamName = streamName; this.applicationName = applicationName; this.namespace = namespace; this.config = config; this.dataExtractor = dataExtractor; } public void checkpoint() throws Exception { cache.flush(); lowSeq = null; // update the worker inventory showing progress to the last sequence // value inventory.update(this.streamName, this.applicationName, this.namespace, this.shardId, this.lowSeq, this.highSeq.toString(), System.currentTimeMillis(), InventoryModel.STATE.RUNNING); // warn and reset if there were any ignored records if (ignoredRecordsBelowHWM > 0) { logWarn(String .format("Processed %s records which were ignored due to being below the current processing HWM", ignoredRecordsBelowHWM)); ignoredRecordsBelowHWM = 0; } LOG.debug("Aggregator Checkpoint for Shard " + this.shardId + " Complete"); } /* * builder methods */ public StreamAggregator withStorageCapacity(Long readCapacity, Long writeCapacity) { if (readCapacity != null) this.readCapacity = readCapacity; if (writeCapacity != null) this.writeCapacity = writeCapacity; return this; } private void logInfo(String message) { LOG.info("[" + this.shardId + "] " + message); } private void logWarn(String message) { LOG.warn("[" + this.shardId + "] " + message); } private void logWarn(String message, Exception e) { LOG.warn("[" + this.shardId + "] " + message); LOG.error(e); } public void initialize(String shardId) throws Exception { // Set System properties to allow entity expansion of unlimited items in // response documents from AWS API // // see https://blogs.oracle.com/joew/entry/jdk_7u45_aws_issue_123 for // more information System.setProperty("entityExpansionLimit", "0"); System.setProperty("jdk.xml.entityExpansionLimit", "0"); this.shardId = shardId; // establish we are running on the lowest shard on the basis of hash // range AmazonKinesisClient kinesisClient = new AmazonKinesisClient( this.config.getKinesisCredentialsProvider()); if (this.config.getRegionName() != null) { region = Region.getRegion(Regions.fromName(this.config .getRegionName())); kinesisClient.setRegion(region); } try { if (this.shardId.equals(StreamAggregatorUtils.getFirstShardName( kinesisClient, this.config.getStreamName()))) { this.isFirstShardWorker = true; logInfo("Aggregator taking Primary Thread Responsibility"); } } catch (Exception e) { logWarn("Unable to establish if Worker Thread is Primary"); } validateConfig(); // set the default aggregator type if (this.aggregatorType == null) { this.aggregatorType = AggregatorType.COUNT; } if (this.dataExtractor == null) throw new InvalidConfigurationException( "Unable to create Aggregator Instance without a configured IDataStore"); // set the aggregator type on the data extractor this.dataExtractor.setAggregatorType(this.aggregatorType); this.dataExtractor.validate(); // create connections to dynamo and kinesis ClientConfiguration clientConfig = new ClientConfiguration() .withSocketTimeout(60000); this.dynamoClient = new AmazonDynamoDBAsyncClient( this.config.getDynamoDBCredentialsProvider(), clientConfig); if (region != null) this.dynamoClient.setRegion(region); this.kinesisClient = new AmazonKinesisClient( this.config.getKinesisCredentialsProvider()); if (region != null) this.kinesisClient.setRegion(region); inventory = new InventoryModel(this.dynamoClient); // get the latest sequence number checkpointed for this named aggregator // on this shard InventoryStatus lastUpdate = inventory.getLastUpdate(this.streamName, this.applicationName, this.namespace, this.shardId); if (lastUpdate != null && lastUpdate.getHighSeq() != null) { // set the current high sequence to the last high sequence this.highSeq = new BigInteger(lastUpdate.getHighSeq()); } // log that we are now starting up inventory.update(this.streamName, this.applicationName, this.namespace, this.shardId, null, null, System.currentTimeMillis(), InventoryModel.STATE.STARTING); // set the table name we will use for aggregated values if (this.tableName == null) { this.tableName = StreamAggregatorUtils.getTableName( config.getApplicationName(), this.getNamespace()); } if (this.environment != null && !this.environment.equals("")) this.tableName = String.format("%s.%s", this.environment, this.tableName); // resolve the basic data being aggregated String labelColumn = StreamAggregatorUtils.methodToColumn(dataExtractor .getAggregateLabelName()); String dateColumn = dataExtractor.getDateValueName() == null ? DEFAULT_DATE_VALUE : dataExtractor.getDateValueName(); // configure the default dynamo data store if (this.dataStore == null) { this.dataStore = new DynamoDataStore(this.dynamoClient, this.kinesisClient, this.aggregatorType, this.streamName, this.tableName, labelColumn, dateColumn) .withStorageCapacity(this.readCapacity, this.writeCapacity); this.dataStore.setRegion(region); } this.dataStore.initialise(); // configure the cache so it can do its work cache = new AggregateCache(this.shardId) .withCredentials(this.config.getKinesisCredentialsProvider()) .withAggregateType(this.aggregatorType) .withTableName(this.tableName).withLabelColumn(labelColumn) .withDateColumn(dateColumn).withDataStore(this.dataStore); // create a cloudwatch client for the cache to publish against if needed if (this.publishMetrics && this.metricsEmitter == null) { this.metricsEmitter = new CloudWatchMetricsEmitter(this.tableName, this.config.getCloudWatchCredentialsProvider()); } if (this.metricsEmitter != null) { if (this.config.getRegionName() != null) this.metricsEmitter.setRegion(region); } // add the metrics publisher to the cache if we are bound to the lowest // shard if (this.metricsEmitter != null) { cache.withMetricsEmitter(this.metricsEmitter); } cache.initialise(); // set the user agent StringBuilder userAgent = new StringBuilder( ClientConfiguration.DEFAULT_USER_AGENT); userAgent.append(" "); userAgent.append(this.AWSApplication); userAgent.append("/"); userAgent.append(this.version); this.config.getKinesisClientConfiguration().setUserAgent( userAgent.toString()); // log startup state StringBuffer sb = new StringBuffer(); for (TimeHorizon t : timeHorizons) { sb.append(String.format("%s,", t.name())); } sb.deleteCharAt(sb.length() - 1); logInfo(String .format("Amazon Kinesis Stream Aggregator Online\nStream: %s\nApplication: %s\nNamespace: %s\nWorker: %s\nGranularity: %s\nContent Extracted With: %s", streamName, applicationName, this.namespace, this.config.getWorkerIdentifier(), sb.toString(), dataExtractor.getClass().getName())); if (this.highSeq != null) logInfo(String.format("Processing Data from Seq: %s", this.highSeq)); online = true; } private void validateConfig() throws Exception { // this would only be null if the containing worker IRecordProcessor has // not called initialise() if (this.shardId == null) { throw new Exception( "Aggregator Not Online - Call Initialise to establish System State on Shard"); } // default to Hourly granularity if the customer has not configured it if (this.timeHorizons == null) { withTimeHorizon(TimeHorizon.HOUR); } } /** * Add a single * {@link com.amazonaws.services.kinesis.aggregators.TimeHorizon} to the * configuration of the Aggregator * * @param horizon * TimeHorizon value to be used for aggregated data * @return */ public StreamAggregator withTimeHorizon(TimeHorizon horizon) { if (this.timeHorizons == null) this.timeHorizons = new ArrayList<>(); this.timeHorizons.add(horizon); return this; } /** * Add a set of * {@link com.amazonaws.services.kinesis.aggregators.TimeHorizon} values to * the configuration of the Aggregator * * @param horizon * TimeHorizon value to be used for aggregated data * @return */ public StreamAggregator withTimeHorizon(List horizons) { if (this.timeHorizons == null) { this.timeHorizons = horizons; } else { this.timeHorizons.addAll(horizons); } return this; } /** * Add a set of * {@link com.amazonaws.services.kinesis.aggregators.TimeHorizon} values to * the configuration of the Aggregator * * @param horizon * TimeHorizon value to be used for aggregated data * @return */ public StreamAggregator withTimeHorizon(TimeHorizon... horizons) { if (this.timeHorizons == null) this.timeHorizons = new ArrayList<>(); for (TimeHorizon t : horizons) { this.timeHorizons.add(t); } return this; } /** * Set the name of the data store in Dynamo DB for the Aggregated Data * * @param tableName * The table name to use for data storage * @return */ public StreamAggregator withTableName(String tableName) { this.tableName = tableName; return this; } /** * Select an explicit * {@link com.amazonaws.servies.kinesis.aggregators.AggregatorType} for the * Aggregator. Default is COUNT * * @param t * The Aggregator Type to use * @return */ public StreamAggregator withAggregatorType(AggregatorType t) { if (t != null) { this.aggregatorType = t; } return this; } /** * Override the default behaviour of an Aggregator to fail when the data * stream cannot be deserialised. When setting this value to 'true', then * the Aggregator stream will be able to deal with bad data that cannot be * aggregated, and will simply continue working * * @param bool * Boolean indicating whether to fail when bad data is received * on the stream and cannot be deserialised * @return */ public StreamAggregator withRaiseExceptionOnDataExtractionErrors( boolean bool) { this.raiseExceptionOnDataExtractionErrors = bool; return this; } /** * Should we publish CloudWatch metrics for all captured data? * * @param bool * @return */ public StreamAggregator withCloudWatchMetrics() { this.publishMetrics = true; return this; } /** * Allow configuring a non-Default data store * * @param dataStore * @return */ public StreamAggregator withDataStore(IDataStore dataStore) { if (dataStore != null) { this.dataStore = dataStore; } return this; } /** * Allow configuring a non-Default metrics emitter * * @param metricsEmitter * @return */ public StreamAggregator withMetricsEmitter(IMetricsEmitter metricsEmitter) { if (metricsEmitter != null) { this.metricsEmitter = metricsEmitter; } return this; } /** * Allow configuring a non-Default idempotency check * * @param idempotencyCheck * @return */ public StreamAggregator withIdempotencyCheck( IIdempotencyCheck idempotencyCheck) { if (idempotencyCheck != null) { this.idempotencyCheck = idempotencyCheck; } return this; } public StreamAggregator withEnvironment(EnvironmentType environment) { this.environment = environment.name(); return this; } public StreamAggregator withEnvironment(String environment) { this.environment = environment; return this; } /* Simple property accessors */ public String getNamespace() { return this.namespace; } public IDataExtractor getDataExtractor() { return this.dataExtractor; } public IDataStore getDataStore() { return this.dataStore; } public String getTableName() { return this.tableName; } public String getLabelAttribute() { return this.dataExtractor.getAggregateLabelName(); } public String getDateAttribute() { return this.dataExtractor.getDateValueName(); } public AggregatorType getAggregatorType() { return this.aggregatorType; } public long getReadCapacity() { return this.readCapacity; } public long getWriteCapacity() { return this.writeCapacity; } public List getTimeHorizon() { return this.timeHorizons; } /** * Shut down an aggregator and mark its state as Stopped in the Inventory * Table * * @param flushState * Should the aggregator clear it's pending updates prior to * shutting down * @param withState * Final status for the aggregator * @throws Exception */ public void shutdown() throws Exception { shutdown(true); } public void shutdown(boolean flushState) throws Exception { shutdown(flushState, null); } public void shutdown(boolean flushState, InventoryModel.STATE withState) throws Exception { if (flushState) checkpoint(); if (inventory != null) inventory.update(this.streamName, this.applicationName, this.namespace, this.shardId, null, null, System .currentTimeMillis(), withState == null ? InventoryModel.STATE.STOPPED : withState); } /** * {@inheritDoc} */ public void aggregate(List records) throws Exception { List events = new ArrayList<>(); for (Record r : records) { events.add(new InputEvent(r)); } aggregateEvents(events); } /** * {@inheritDoc} */ public void aggregateEvents(List events) throws Exception { start = System.currentTimeMillis(); int aggregatedEventCount = 0; int aggregatedElementCount = 0; if (!online) { throw new Exception("Aggregator Not Initialised"); } BigInteger thisSequence; List extractedItems = null; Date eventDate = null; try { for (InputEvent event : events) { // reset extracted items extractedItems = null; if (event.getSequenceNumber() != null) { thisSequence = new BigInteger(event.getSequenceNumber()); // ignore any records which are going backward with regard // to // the current hwm if (highSeq != null && highSeq.compareTo(thisSequence) != -1) { ignoredRecordsBelowHWM++; continue; } } // set the low sequence if this is the first record received // after a flush if (lowSeq == null) lowSeq = event.getSequenceNumber(); // high sequence is always the latest value highSeq = new BigInteger(event.getSequenceNumber()); // extract the data from the input event try { extractedItems = dataExtractor.getData(event); } catch (SerializationException se) { // customer may have elected to suppress serialisation // errors if the stream is expected have heterogenous data // on it if (this.raiseExceptionOnDataExtractionErrors) { throw se; } else { logWarn(String.format( "Serialisation Exception Sequence %s Partition Key %s", event.getSequenceNumber(), event.getPartitionKey()), se); } } // data extractor may have returned multiple data elements, or // be empty if there were serialisation problems which are // suppressed if (extractedItems != null) { aggregatedEventCount++; for (AggregateData data : extractedItems) { // run the idempotency check if (!this.idempotencyCheck.doProcess( event.getPartitionKey(), event.getSequenceNumber(), data, event.getData())) { logInfo(String .format("Ignoring Event %s as it failed Idempotency Check", event.getPartitionKey())); continue; } aggregatedElementCount++; // if the data extractor didn't have a date value to // extract, then use the current time eventDate = data.getDate(); if (eventDate == null) { eventDate = new Date(System.currentTimeMillis()); } // generate the local updates, one per time horizon that // is requested for (TimeHorizon h : timeHorizons) { // atomically update the aggregate table with event // count or count + summaries cache.update( aggregatorType, data.getLabels(), (timeHorizons.size() > 1 ? h .getItemWithMultiValueFormat(eventDate) : h.getValue(eventDate)), h, event .getSequenceNumber(), 1, data .getSummaries(), dataExtractor .getSummaryConfig()); } } } } logInfo(String .format("Aggregation Complete - %s Records and %s Elements in %s ms", aggregatedEventCount, aggregatedElementCount, (System.currentTimeMillis() - start))); } catch (SerializationException se) { shutdown(true, InventoryModel.STATE.SERIALISATION_ERROR); LOG.error(se); throw se; } catch (Exception e) { shutdown(true, InventoryModel.STATE.UNKNOWN_ERROR); LOG.error(e); throw e; } } /** * Return the stored value for a label and date value at the configured time * granularity * * @param label * The Aggregated Label Value to get data for * @param dateValue * The Date Value to obtain data from * @param h * The Time Horizon to query * @return */ public List> queryValue(String label, Date dateValue, ComparisonOperator comp) throws Exception { if (!(this.dataStore instanceof DynamoDataStore)) { throw new Exception( "Unable to Query by Date unless Data Store is Dynamo DB"); } if (comp != null && comp.equals(ComparisonOperator.BETWEEN)) { throw new InvalidConfigurationException( "Between Operator Not Supported"); } return ((DynamoDataStore) this.dataStore).queryEngine().queryByKey( label, dateValue, comp); } /** * Query all data in the data store for a given range of date values and * time horizon * * @param dateValue * The date to search relative to * @param h * The Time Horizon to limit search to * @param comp * The Comparison Operator to be applied to the dateValue, such * as 'equal' EQ or 'greater than' GT * @return A list of data stored in Dynamo DB for the time range * @throws Exception */ public List> queryByDate(Date dateValue, TimeHorizon h, ComparisonOperator comp, int threads) throws Exception { if (!(this.dataStore instanceof DynamoDataStore)) { throw new Exception( "Unable to Query by Date unless Data Store is Dynamo DB"); } if (comp.equals(ComparisonOperator.BETWEEN)) { throw new InvalidConfigurationException( "Between Operator Not Supported"); } // resolve the query date based on if we are managing multiple time // values or a single String queryDate = null; if (this.timeHorizons.size() > 1) { queryDate = h.getItemWithMultiValueFormat(dateValue); } else { queryDate = h.getValue(dateValue); } // setup the query condition on date Map conditions = new HashMap<>(); Condition dateCondition = new Condition().withComparisonOperator(comp) .withAttributeValueList(new AttributeValue().withS(queryDate)); conditions.put(this.dataExtractor.getDateValueName(), dateCondition); List> items = ((DynamoDataStore) this.dataStore) .queryEngine().parallelQueryDate( this.dataExtractor.getDateValueName(), conditions, threads); return items; } public List parallelQueryKeys(QueryKeyScope scope, int threads) throws Exception { if (!(this.dataStore instanceof DynamoDataStore)) { throw new Exception( "Unable to Query Keys unless Data Store is Dynamo DB"); } logInfo(String .format("Executing Unique Key Scan on %s with Scope %s using %s Threads", this.tableName, scope.toString(), threads)); return ((DynamoDataStore) this.dataStore).queryEngine() .parallelQueryKeys(scope, threads); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/StreamAggregatorUtils.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators; import java.io.File; import java.math.BigInteger; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; import com.amazonaws.services.dynamodbv2.model.AttributeValue; import com.amazonaws.services.dynamodbv2.model.GetItemRequest; import com.amazonaws.services.kinesis.AmazonKinesisClient; import com.amazonaws.services.kinesis.aggregators.cache.UpdateKey; import com.amazonaws.services.kinesis.aggregators.datastore.DynamoUtils; import com.amazonaws.services.kinesis.model.LimitExceededException; import com.amazonaws.services.kinesis.model.ResourceNotFoundException; import com.amazonaws.services.kinesis.model.Shard; import com.amazonaws.services.kinesis.model.StreamDescription; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; /** * Utility methods used across the Amazon Kinesis Aggregators framework. */ public class StreamAggregatorUtils { private static final Log LOG = LogFactory .getLog(StreamAggregatorUtils.class); private static final String rsTimeformat = "yyyy-mm-dd hh:mi:ss"; private static final ObjectMapper mapper = new ObjectMapper(); static { mapper.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true); mapper.configure(DeserializationFeature.WRAP_EXCEPTIONS, false); } private StreamAggregatorUtils() { } /** * Helper method which converts input values from Aggregator configurations * into names for attributes in Dynamo DB. In particular this supports * Object based Aggregators who will be configured using get methods. For * example, this methods will turn 'getValue' into 'value' and 'isSomething' * to 'isSomething'. * * @param methodName * The name of the method to be converted into an Attribute Name. * @return A string value to be used as the corresponding attribute name. */ public static String methodToColumn(String methodName) { if (methodName.startsWith("get")) { return methodName.substring(3, 4).toLowerCase() + methodName.substring(4); } else { return methodName.substring(0, 1).toLowerCase() + methodName.substring(1); } } /** * Returns a statement which can be used to create an External Table in Hive * which wraps the Aggregator Table indicated, using the required name in * Hive. * * @param dynamoClient * Dynamo DB Client to use for connection to Dynamo DB. * @param hiveTableName * The table name to generate for the Hive Table. * @param dynamoTable * The name of the aggregator table in Dynamo DB. * @return A CREATE EXTERNAL TABLE statement to be used in Hive * @throws Exception */ public static String getDynamoHiveWrapper(AmazonDynamoDB dynamoClient, String hiveTableName, String dynamoTable) throws Exception { LOG.info("Generating Hive Integration Statement"); StringBuffer sb = new StringBuffer(); sb.append(String.format("CREATE EXTERNAL TABLE %s(", hiveTableName)); // add the hive table spec List tableDefinition = DynamoUtils.getDictionaryEntry( dynamoClient, dynamoTable); for (String s : tableDefinition) { sb.append(String.format("%s string,", s)); } sb.replace(sb.length() - 1, sb.length(), ""); sb.append(String .format(") STORED BY 'org.apache.hadoop.hive.dynamodb.DynamoDBStorageHandler' TBLPROPERTIES (\"dynamodb.table.name\" = \"%s\", \"dynamodb.column.mapping\" = \"", dynamoTable)); for (String s : tableDefinition) { sb.append(String.format("%s:%s,", s, s)); } sb.replace(sb.length() - 1, sb.length(), ""); sb.append("))"); return sb.toString(); } /** * Helper method to generate a Redshift CREATE TABLE command which matches * the structure of the aggregate table, and a COPY command which will load * the table data from an Aggregator table into a Redshift Table. * @param * dynamoClient Dynamo DB Client to use for connection to Dynamo DB. * * @param redshiftTableName * The table name to use in Redshift. * @param dynamoTable * The Aggregator table name in Dynamo DB. * @return A String which contains the create table and copy commands to be * issued against redshift. */ public static String getRedshiftCopyCommand( final AmazonDynamoDB dynamoClient, String redshiftTableName, String dynamoTable) throws Exception { LOG.info("Generating Redshift Copy Command"); StringBuffer sb = new StringBuffer(); // generate the create table statement sb.append(String.format("CREATE TABLE %S(\n", redshiftTableName)); int i = 0; List tableStructure = DynamoUtils.getDictionaryEntry( dynamoClient, dynamoTable); String columnSpec = null; String dataType = null; for (String s : tableStructure) { i++; switch (s) { case StreamAggregator.LAST_WRITE_SEQ: dataType = "BIGINT"; break; case StreamAggregator.LAST_WRITE_TIME: dataType = "TIMESTAMP"; break; case StreamAggregator.EVENT_COUNT: dataType = "INT"; break; default: if (s.contains("-SUM") || s.contains("-MIN") || s.contains("-MAX")) { dataType = "INT"; } else { dataType = "VARCHAR(1000)"; } break; } ; columnSpec = s + " " + dataType; if (i == tableStructure.size()) { sb.append(columnSpec); } else { sb.append(columnSpec + ","); } } sb.append(");\n\n"); // generate the copy command sb.append(String .format("copy %s from 'dynamodb://%s' credentials 'aws_access_key_id=;aws_secret_access_key=' readratio 50 timeformat 'yyyy-MM-dd hh:mi:ss';", redshiftTableName, dynamoTable, rsTimeformat)); return sb.toString(); } /** * Index name which should be used for the last write sequence GSI on a * table * * @param dynamoTable * The table name in Dynamo DB. * @return The name for the global secondary index on the table for last * write sequence. */ public static final String getLastWriteSeqIndexName(String dynamoTable) { return dynamoTable + "-seq"; } /** * Index name which should be used for the last write sequence GSI on a * table * * @param dynamoTable * The table name in Dynamo DB. * @return The name for the global secondary index on the table for last * write sequence. */ public static final String getDateDimensionIndexName(String dynamoTable, String dateAttribute) { return String.format("%s-%s", dynamoTable, dateAttribute); } /** * Method which will generate a correctly formatted primary key for a dynamo * table hosting aggregated data. * * @param updateKey * An {@link UpdateKey} which should be pivoted into a key. * @return */ public static Map getTableKey(UpdateKey updateKey) { return getTableKey(updateKey.getAggregateColumnName(), updateKey.getAggregatedValue(), updateKey.getDateValueColumnName(), updateKey.getDateValue()); } /** * Method which will generate a correctly formatted primary key for a dynamo * table hosting aggregated data. * * @param keyColumnName * The attribute name in the table to be used as the first part * of a hash key. * @param fieldValue * The value of the hash key to query for. * @param dateColumnName * The attribute name of the date column to be used as the range * key. * @param dateValue * The value of the range key value to query for. * @return */ protected static Map getTableKey( String keyColumnName, String fieldValue, String dateColumnName, String dateValue) { HashMap key = new HashMap<>(); key.put(keyColumnName, new AttributeValue().withS(fieldValue)); key.put(dateColumnName, new AttributeValue().withS(dateValue)); return key; } protected static Map getValue( final AmazonDynamoDB dynamoClient, final String tableName, final UpdateKey key) { GetItemRequest req = new GetItemRequest().withTableName(tableName) .withKey(getTableKey(key)); return dynamoClient.getItem(req).getItem(); } protected static String getTableName(final String applicationName, final String namespace) { return String.format("%s-%s", applicationName, namespace); } public static JsonNode asJsonNode(String s) throws Exception { return mapper.readTree(s); } public static JsonNode asJsonNode(File f) throws Exception { return mapper.readTree(f); } public static JsonNode readJsonValue(JsonNode json, String atPath) { if (!atPath.contains(".")) { return json.get(atPath); } else { String[] path = atPath.split("\\."); JsonNode node = json.get(path[0]); for (int i = 1; i < path.length; i++) { node = node.path(path[i]); } return node; } } public static String readValueAsString(JsonNode json, String atPath) { JsonNode node = readJsonValue(json, atPath); return node == null ? null : node.asText(); } /** * Get a list of all Open shards ordered by their start hash * * @param streamName * @return A Map of only Open Shards indexed by the Shard ID */ public static Map getOpenShards( AmazonKinesisClient kinesisClient, String streamName) throws Exception { Map shardMap = new LinkedHashMap<>(); final int BACKOFF_MILLIS = 10; final int MAX_DESCRIBE_ATTEMPTS = 10; int describeAttempts = 0; StreamDescription stream = null; try { do { try { stream = kinesisClient.describeStream(streamName) .getStreamDescription(); } catch (LimitExceededException e) { Thread.sleep(2 ^ describeAttempts * BACKOFF_MILLIS); describeAttempts++; } } while (stream == null && describeAttempts < MAX_DESCRIBE_ATTEMPTS); } catch (InterruptedException e) { LOG.error(e); throw e; } if (stream == null) { throw new Exception(String.format( "Unable to describe Stream after %s attempts", MAX_DESCRIBE_ATTEMPTS)); } Collection openShardNames = new ArrayList(); // load all the shards on the stream for (Shard shard : stream.getShards()) { openShardNames.add(shard.getShardId()); shardMap.put(shard.getShardId(), shard); // remove this shard's parents from the set of active shards - // we // can't do anything to them if (shard.getParentShardId() != null) { openShardNames.remove(shard.getParentShardId()); } if (shard.getAdjacentParentShardId() != null) { openShardNames.remove(shard.getAdjacentParentShardId()); } } // create a List of Open shards for sorting List shards = new ArrayList(); for (String s : openShardNames) { shards.add(shardMap.get(s)); } // sort the list into lowest start hash order Collections.sort(shards, new Comparator() { public int compare(Shard o1, Shard o2) { return new BigInteger(o1.getHashKeyRange().getStartingHashKey()) .compareTo(new BigInteger(o2.getHashKeyRange() .getStartingHashKey())); } }); // rebuild the shard map into the correct order shardMap.clear(); for (Shard s : shards) { shardMap.put(s.getShardId(), s); } return shardMap; } public static Shard getFirstShard(AmazonKinesisClient kinesisClient, String streamName) throws Exception { return getOpenShards(kinesisClient, streamName).values().iterator() .next(); } public static String getFirstShardName(AmazonKinesisClient kinesisClient, String streamName) throws Exception { return getFirstShard(kinesisClient, streamName).getShardId(); } public static int getShardCount(AmazonKinesisClient kinesisClient, String streamName) throws Exception { return getOpenShards(kinesisClient, streamName).keySet().size(); } /** * Strip the TimeHorizon abbreviation from a dateValueItem used in DynamoDB * with multi-value format */ public static String extractDateFromMultivalue(TimeHorizon t, String date) { if (date.startsWith(t.getAbbrev())) { return date.replaceAll("^" + t.getAbbrev() + "-", ""); } else { return date; } } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/TableKeyStructure.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators; import java.util.HashSet; import java.util.Set; public class TableKeyStructure { private String labelAttributeName, labelAttributeValue, dateAttributeName; private Set dateValues; public TableKeyStructure() { } public TableKeyStructure(String labelAttributeName, String labelAttributeValue, String dateAttributeName) { this.labelAttributeName = labelAttributeName; this.labelAttributeValue = labelAttributeValue; this.dateAttributeName = dateAttributeName; } public TableKeyStructure(String labelAttributeName, String labelAttributeValue, String dateAttributeName, String dateAttributeValue) { this.labelAttributeName = labelAttributeName; this.labelAttributeValue = labelAttributeValue; this.dateAttributeName = dateAttributeName; this.dateValues = new HashSet<>(); this.dateValues.add(dateAttributeValue); } public TableKeyStructure withDateValue(String dateValue) { if (this.dateValues == null) { this.dateValues = new HashSet<>(); } this.dateValues.add(dateValue); return this; } public String getLabelAttributeName() { return this.labelAttributeName; } public String getLabelAttributeValue() { return this.labelAttributeValue; } public String getDateAttributeName() { return this.dateAttributeName; } public Set getDateValues() { return this.dateValues; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/TimeHorizon.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.List; public enum TimeHorizon { SECOND(0, "MM-dd HH:mm:ss", "s"), MINUTE(1, "MM-dd HH:mm:00", "m"), MINUTES_GROUPED(1, null, "mb") { private Calendar calendar = Calendar.getInstance(); private int scope; @Override public int getGranularity() { return this.scope; } @Override public void setGranularity(int bucketSize) { this.scope = bucketSize; } @Override public String getValue(Date forDate) { calendar.setTime(forDate); int minutes = calendar.get(Calendar.MINUTE); int bucket = new Double(Math.floor(minutes / scope) * scope).intValue(); return String.format("%s:%02d:00", new SimpleDateFormat("yyyy-MM-dd HH").format(forDate), bucket); } }, HOUR(2, "MM-dd HH:00:00", "H"), DAY(3, "MM-dd 00:00:00", "d"), WEEK(4, "ww", "W"), MONTH(5, "MM-01 00:00:00", "M"), YEAR( 6, "01-01 00:00:00", "Y"), FOREVER(999, "", "*") { /** * Override the getValue method, as TimeHorizon.FOREVER is for all * values regardless of time period. We'll set the value to '*' as * Dynamo wont allow an empty value */ @Override public String getValue(Date forDate) { return "*"; } }; private TimeHorizon(int placemark, String mask, String abbrev) { this.placemark = placemark; this.mask = mask; this.abbrev = abbrev; } private int placemark; private String mask; private String abbrev; private SimpleDateFormat getMask() { return new SimpleDateFormat("yyyy-" + this.mask); } public String getAbbrev() { return this.abbrev; } public String getItemWithMultiValueFormat(Date dateValue) { return getAbbrev() + "-" + getValue(dateValue); } public String getValue(Date forDate) { return getMask().format(forDate); } /** * Returns the full hierarchy of TimeHorizon values from this Horizon to * FOREVER * * @return */ public List getFullHierarchy() { return getHierarchyTo(TimeHorizon.FOREVER); } /** * Get a list of all TimeHorizons in decreasing granularity, to the * indicated Time Horizon. For example, if we requested * TimeHorizon.MINUTE.getHierarchyTo(TimeHorizon.MONTH), we would receive a * list of MINUTE, HOUR, DAY, MONTH * * @param t * @return */ public List getHierarchyTo(TimeHorizon t) { List hierarchy = new ArrayList<>(); for (TimeHorizon h : TimeHorizon.values()) { // don't include Minutes Group in automated hierarchies as they are // a peer to Minutes if (h.placemark >= this.placemark && h.placemark <= t.placemark && !h.equals(TimeHorizon.MINUTES_GROUPED)) { hierarchy.add(h); } } return hierarchy; } public int getGranularity() throws Exception { throw new Exception("Not Implemented"); } public void setGranularity(int scope) throws Exception { throw new Exception("Not Implemented"); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/annotations/Aggregate.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.annotations; import java.lang.annotation.ElementType; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.TimeHorizon; import com.amazonaws.services.kinesis.aggregators.datastore.DynamoDataStore; import com.amazonaws.services.kinesis.aggregators.metrics.CloudWatchMetricsEmitter; /** * Annotations to indicate that a Class contains an Aggregator Configuration */ @Target(ElementType.TYPE) @Retention(RetentionPolicy.RUNTIME) public @interface Aggregate { /** * The type of Aggregator to create. Default is COUNT. * * @return */ AggregatorType type() default AggregatorType.COUNT; /** The list of Time Horizons to Aggregate on */ TimeHorizon[] timeHorizons() default TimeHorizon.HOUR; int[] timeGranularity() default -1; /** * The namespace for the Aggregation Data. * * @return */ String namespace() default ""; /** * Should the Aggregator fail on errors in reading data from the stream for * Aggregation. * * @return */ boolean failOnDataExtractionErrors() default true; /** * Should the aggregator publish intrumentation metrics? The default metrics * emitter is CloudWatch * * @return */ boolean emitMetrics() default false; /** * Configure an IDataStore other than the default Dynamo DB Datastore * * @return */ Class dataStore() default DynamoDataStore.class; /** * Configure an IMetricsEmitter other than the default CloudWatch metrics * service */ Class metricsEmitter() default CloudWatchMetricsEmitter.class; } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/annotations/AnnotationProcessor.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.annotations; import java.lang.annotation.Annotation; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.LabelSet; import com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils; import com.amazonaws.services.kinesis.aggregators.TimeHorizon; import com.amazonaws.services.kinesis.aggregators.datastore.IDataStore; import com.amazonaws.services.kinesis.aggregators.exception.ClassNotAnnotatedException; import com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException; import com.amazonaws.services.kinesis.aggregators.metrics.IMetricsEmitter; import com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation; import com.amazonaws.services.kinesis.aggregators.summary.SummaryConfiguration; import com.amazonaws.services.kinesis.aggregators.summary.SummaryElement; /** * AnnotationProcess provides a helper mechanism to extract information from an * Annotationed Class which will be used to configure an Object Serialisation * based Aggregation. See * {@link com.amazonaws.services.kinesis.aggregators.factory.ObjectAggregatorFactory} * . */ public class AnnotationProcessor { @SuppressWarnings("rawtypes") private Class clazz; private LabelSet labelSet = new LabelSet(); private List labelMethodNames = new ArrayList<>(); private Map labelMethodMap = new LinkedHashMap<>(); private String dateMethodName; private Method dateMethod; private Map summaryMethods = new HashMap<>(); private SummaryConfiguration summaryConfig = new SummaryConfiguration(); private AggregatorType type; private List timeHorizons; private boolean timeHierarchy; private String namespace; private boolean failOnDataExtractionErrors = true; private boolean emitMetrics = false; private Class dataStore; private Class metricsEmitter; private AnnotationProcessor() { } /** * Create a new Annotation Processor for an Annotated Class. * * @param clazz The Class to extract annotation information from. * @throws Exception */ public AnnotationProcessor(@SuppressWarnings("rawtypes") Class clazz) throws Exception { this.clazz = clazz; boolean isAnnotated = false; // get the class annotations for (Annotation a : this.clazz.getAnnotations()) { if (a.annotationType().equals(Aggregate.class)) { isAnnotated = true; Aggregate annotatedObject = (Aggregate) a; this.namespace = annotatedObject.namespace(); if (this.namespace.contains(" ")) throw new ClassNotAnnotatedException("Namespace may not contain spaces"); this.type = annotatedObject.type(); // process time horizon annotations int[] timeGranularities = annotatedObject.timeGranularity(); TimeHorizon[] horizons = annotatedObject.timeHorizons(); this.timeHorizons = new ArrayList<>(); int i = 0; for (TimeHorizon h : horizons) { if (h.equals(TimeHorizon.MINUTES_GROUPED)) { try { // prevent use of the default time granularity if (timeGranularities[i] == -1) { throw new ArrayIndexOutOfBoundsException(); } h.setGranularity(timeGranularities[i]); } catch (ArrayIndexOutOfBoundsException e) { throw new InvalidConfigurationException( "Unable to generate a MINUTES_GROUPED Time Horizon without configuration of timeGranularity"); } } this.timeHorizons.add(h); i++; } this.failOnDataExtractionErrors = annotatedObject.failOnDataExtractionErrors(); this.emitMetrics = annotatedObject.emitMetrics(); this.dataStore = annotatedObject.dataStore(); this.metricsEmitter = annotatedObject.metricsEmitter(); } } if (!isAnnotated) throw new ClassNotAnnotatedException( "Cannot get Aggregator Config from non-Annotated Class"); // process the method annotations if (isAnnotated) { for (Method m : this.clazz.getDeclaredMethods()) { // label method if (m.getAnnotation(Label.class) != null) { this.labelMethodNames.add(m.getName()); m.setAccessible(true); this.labelMethodMap.put(m.getName(), m); this.labelSet.put(m.getName(), null); } // date method if (m.getAnnotation(DateValue.class) != null) { this.dateMethodName = m.getName(); m.setAccessible(true); this.dateMethod = m; } // summary methods Annotation summary = m.getAnnotation(Summary.class); if (summary != null) { m.setAccessible(true); this.summaryMethods.put(m.getName(), m); // process the summary configuration SummaryCalculation[] requestedCalcs = ((Summary) summary).type(); if (requestedCalcs != null) { for (SummaryCalculation c : requestedCalcs) { this.summaryConfig.add(m.getName(), new SummaryElement(m.getName(), c)); } } else { this.summaryConfig.add(m.getName(), new SummaryElement(m.getName(), SummaryCalculation.SUM)); } } } } } public List getLabelMethodNames() { return this.labelMethodNames; } public Map getLabelMethods() { return this.labelMethodMap; } public String getDateMethodName() { return this.dateMethodName; } public Method getDateMethod() { return this.dateMethod; } public Map getSummaryMethods() { return this.summaryMethods; } public SummaryConfiguration getSummaryConfig() { return this.summaryConfig; } public AggregatorType getType() { return this.type; } public List getTimeHorizon() { return this.timeHorizons; } public boolean hasTimeHierarchy() { return this.timeHierarchy; } public boolean shouldFailOnDataExtractionErrors() { return this.failOnDataExtractionErrors; } public boolean shouldEmitMetrics() { return this.emitMetrics; } public Class getMetricsEmitter() { return this.metricsEmitter; } public Class getDataStore() { return this.dataStore; } public String getNamespace() { return this.namespace; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/annotations/DateValue.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.annotations; import java.lang.annotation.ElementType; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; /** * Marker Annotation indicating that a method should be used as the date value * for Aggregation. */ @Target(ElementType.METHOD) @Retention(RetentionPolicy.RUNTIME) public @interface DateValue { } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/annotations/Label.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.annotations; import java.lang.annotation.ElementType; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; /** * Marker Annotation indicating that this indicated method is the label to be * used for Aggregation. */ @Target(ElementType.METHOD) @Retention(RetentionPolicy.RUNTIME) public @interface Label { } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/annotations/Summary.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.annotations; import java.lang.annotation.ElementType; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; import java.util.Arrays; import com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation; /** * Annotation which indicates that a method should be used as a summary * aggregation. If no type is indicated then it will be used as a * {@link com.amazonaws.services.kinesis.aggregators.SummaryCalculation.SUM}. */ @Target(ElementType.METHOD) @Retention(RetentionPolicy.RUNTIME) public @interface Summary { /** * The type of summary calculations to apply to the method. * * @return */ public SummaryCalculation[] type() default SummaryCalculation.SUM; } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/app/AbstractQueryServlet.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.app; import java.io.IOException; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; public abstract class AbstractQueryServlet extends HttpServlet { public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { doAction(request, response); } public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { doAction(request, response); } protected abstract void doAction(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException; protected void doError(HttpServletResponse response, String message) throws ServletException { try { response.getWriter().print(message); response.setStatus(400); } catch (IOException e) { throw new ServletException(e); } } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/app/AggregatorsBeanstalkApp.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.app; import javax.servlet.ServletContextEvent; import javax.servlet.ServletContextListener; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.services.kinesis.aggregators.AggregatorGroup; import com.amazonaws.services.kinesis.aggregators.AggregatorsConstants; import com.amazonaws.services.kinesis.aggregators.consumer.AggregatorConsumer; import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream; public class AggregatorsBeanstalkApp implements ServletContextListener { private static final Log LOG = LogFactory.getLog(AggregatorsBeanstalkApp.class); protected static final String AGGREGATOR_GROUP_PARAM = "aggregator-group"; private AggregatorConsumer consumer; private Thread t; @Override public void contextDestroyed(ServletContextEvent arg0) { try { consumer.shutdown(); t.interrupt(); } catch (Exception e) { LOG.error(e); } } @SuppressWarnings({ "unchecked" }) @Override public void contextInitialized(ServletContextEvent contextEvent) { String configPath = System.getProperty(AggregatorsConstants.CONFIG_URL_PARAM); if (configPath != null && !configPath.equals("")) { LOG.info("Starting Managed Beanstalk Aggregators Worker"); String streamNameParam = System.getProperty(AggregatorsConstants.STREAM_NAME_PARAM); String appNameParam = System.getProperty(AggregatorsConstants.APP_NAME_PARAM); String regionNameParam = System.getProperty(AggregatorsConstants.REGION_PARAM); String streamPosParam = System.getProperty(AggregatorsConstants.STREAM_POSITION_PARAM); String maxRecordsParam = System.getProperty(AggregatorsConstants.MAX_RECORDS_PARAM); String environmentParam = System.getProperty(AggregatorsConstants.ENVIRONMENT_PARAM); String failuresToleratedParam = System.getProperty(AggregatorsConstants.FAILURES_TOLERATED_PARAM); if (streamNameParam == null || streamNameParam.equals("") || appNameParam == null || appNameParam.equals("")) { LOG.error(String.format( "Unable to run Beanstalk Managed Aggregator Consumer without Configuration of Parameters %s and %s. Application is Idle.", AggregatorsConstants.STREAM_NAME_PARAM, AggregatorsConstants.APP_NAME_PARAM)); return; } InitialPositionInStream initialPosition = null; if (streamPosParam != null) { try { initialPosition = InitialPositionInStream.valueOf(streamPosParam); LOG.info(String.format("Starting from %s Position in Stream", streamPosParam)); } catch (Exception e) { LOG.error(String.format("%s is an invalid Initial Position in Stream", streamPosParam)); return; } } try { AggregatorConsumer consumer = new AggregatorConsumer(streamNameParam, appNameParam, configPath); // add consumer parameters, if set from System Properties if (regionNameParam != null && !regionNameParam.equals("")) { consumer.withRegionName(regionNameParam); } if (initialPosition != null) { consumer.withInitialPositionInStream(initialPosition.name()); } if (maxRecordsParam != null && !maxRecordsParam.equals("")) { consumer.withMaxRecords(Integer.parseInt(maxRecordsParam)); } if (environmentParam != null && !environmentParam.equals("")) { consumer.withEnvironment(environmentParam); } if (failuresToleratedParam != null && !failuresToleratedParam.equals("")) { consumer.withToleratedWorkerFailures(Integer.parseInt(failuresToleratedParam)); } // configure the consumer so that the aggregators get // instantiated consumer.configure(); AggregatorGroup aggGroup = consumer.getAggregators(); // put the aggregator group reference and configureation // references into the application context contextEvent.getServletContext().setAttribute(AGGREGATOR_GROUP_PARAM, aggGroup); contextEvent.getServletContext().setAttribute( AggregatorsConstants.STREAM_NAME_PARAM, streamNameParam); LOG.info("Registered Stream and Aggregator Group with Servlet Context"); // start the consumer final class ConsumerRunner implements Runnable { final AggregatorConsumer consumer; public ConsumerRunner(AggregatorConsumer consumer) { this.consumer = consumer; } @Override public void run() { try { consumer.run(); } catch (Exception e) { e.printStackTrace(); LOG.error(e); } } } t = new Thread(new ConsumerRunner(consumer)); t.start(); } catch (Exception e) { LOG.error(e); } } else { LOG.warn(String.format( "No Aggregators Configuration File found in Beanstalk Configuration %s. Application is Idle", AggregatorsConstants.CONFIG_URL_PARAM)); } } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/app/DateQueryServlet.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.app; import java.io.IOException; import java.io.PrintWriter; import java.util.Date; import java.util.List; import java.util.Map; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import com.amazonaws.services.dynamodbv2.model.AttributeValue; import com.amazonaws.services.dynamodbv2.model.ComparisonOperator; import com.amazonaws.services.kinesis.aggregators.AggregatorGroup; import com.amazonaws.services.kinesis.aggregators.AggregatorsConstants; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.TimeHorizon; public class DateQueryServlet extends AbstractQueryServlet { public static final String NAMESPACE_PARAM = "namespace"; public static final String DATE_VALUE_PARAM = "date-value"; public static final String OPERATOR_PARAM = "operator"; public static final String GRANULARITY_PARAM = "granularity"; public static final int QUERY_THREADS = 10; private void respondWith(HttpServletResponse response, List> queryResult) throws IOException { response.setStatus(200); // cors grant response.setHeader("Access-Control-Allow-Origin", "*"); PrintWriter w = response.getWriter(); w.println("["); int i = 0; // write out the response values as json if (queryResult != null) { for (Map map : queryResult) { i++; int j = 0; w.print("{"); for (String s : map.keySet()) { j++; String toPrint = map.get(s).getS(); if (toPrint == null) { toPrint = map.get(s).getN(); } w.print(String.format("\"%s\":\"%s\"", s, toPrint)); if (j != map.keySet().size()) { w.println(","); } } w.print("}"); if (i != queryResult.size()) { w.println(","); } } } w.print("]"); } public void doAction(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { String namespace = request.getParameter(NAMESPACE_PARAM); String dateValue = request.getParameter(DATE_VALUE_PARAM); String operator = request.getParameter(OPERATOR_PARAM); String granularity = request.getParameter(GRANULARITY_PARAM); // create the date item Date d = null; try { d = StreamAggregator.dateFormatter.parse(dateValue); } catch (Exception e) { doError(response, String.format("Date Parameter must be in format %s", StreamAggregator.dateFormatter.getDateFormatSymbols().toString())); return; } // create the ComparisonOperator for Dynamo from the argument ComparisonOperator c = null; try { c = ComparisonOperator.fromValue(operator); } catch (Exception e) { doError(response, String.format("%s is an invalid Comparison Operator", operator)); return; } // create the Time Horizon value from the argument TimeHorizon h = null; try { h = TimeHorizon.valueOf(granularity); } catch (Exception e) { doError(response, String.format("%s is an invalid Granularity", granularity)); return; } String streamName = (String) request.getServletContext().getAttribute( AggregatorsConstants.STREAM_NAME_PARAM); AggregatorGroup aggGroup = (AggregatorGroup) request.getServletContext().getAttribute( AggregatorsBeanstalkApp.AGGREGATOR_GROUP_PARAM); if (aggGroup == null) { doError(response, "Aggregator Application Not Initialised"); return; } // initialise the aggregator group onto shard 'none' for this operation // - it may already be initialised try { aggGroup.initialize("none"); } catch (Exception e) { throw new ServletException(e); } // put the initialised aggregator group back into the context request.getServletContext().setAttribute(AggregatorsBeanstalkApp.AGGREGATOR_GROUP_PARAM, aggGroup); // acquire the correct aggregator by namespace for (StreamAggregator agg : aggGroup.getAggregators()) { if (agg.getNamespace().equals(namespace)) { // run the query try { respondWith(response, agg.queryByDate(d, h, c, QUERY_THREADS)); return; } catch (Exception e) { throw new ServletException(e); } } } // shouldn't get here, so bail with a meaning error on namespace doError(response, String.format("Unable to acquire Aggregator with Namespace %s", namespace)); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/app/FetchConfigurationServlet.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.app; import java.io.IOException; import java.io.PrintWriter; import java.util.HashMap; import java.util.Map; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import com.amazonaws.services.kinesis.aggregators.AggregatorsConstants; import com.amazonaws.services.kinesis.aggregators.configuration.ConfigFileUtils; public class FetchConfigurationServlet extends AbstractQueryServlet { private void respondWith(HttpServletResponse response, Map configItems) throws IOException { response.setStatus(200); // cors grant response.setHeader("Access-Control-Allow-Origin", "*"); PrintWriter w = response.getWriter(); int i = 0; // write out the response values as json w.println("{"); int resultCount = 0; for (String s : configItems.keySet()) { resultCount++; String value = configItems.get(s); w.print(String.format("\"%s\":%s", s, value == null ? "null" : String.format("\"%s\"", value))); if (resultCount != configItems.size()) { w.println(","); } } w.print("}"); } @Override protected void doAction(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { try { Map config = new HashMap<>(); // required items config.put(AggregatorsConstants.REGION_PARAM, System.getProperty(AggregatorsConstants.REGION_PARAM)); config.put(AggregatorsConstants.STREAM_NAME_PARAM, System.getProperty(AggregatorsConstants.STREAM_NAME_PARAM)); config.put(AggregatorsConstants.APP_NAME_PARAM, System.getProperty(AggregatorsConstants.APP_NAME_PARAM)); config.put(AggregatorsConstants.CONFIG_URL_PARAM, System.getProperty(AggregatorsConstants.CONFIG_URL_PARAM)); config.put( "fetch-config-url", ConfigFileUtils.makeConfigFileURL(System.getProperty(AggregatorsConstants.CONFIG_URL_PARAM))); // optional items config.put(AggregatorsConstants.ENVIRONMENT_PARAM, System.getProperty(AggregatorsConstants.ENVIRONMENT_PARAM)); config.put(AggregatorsConstants.MAX_RECORDS_PARAM, System.getProperty(AggregatorsConstants.MAX_RECORDS_PARAM)); config.put(AggregatorsConstants.FAILURES_TOLERATED_PARAM, System.getProperty(AggregatorsConstants.FAILURES_TOLERATED_PARAM)); respondWith(response, config); } catch (Exception e) { throw new ServletException(e); } } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/app/ListAggregateKeysServlet.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.app; import java.io.IOException; import java.io.PrintWriter; import java.util.List; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import com.amazonaws.services.kinesis.aggregators.AggregatorGroup; import com.amazonaws.services.kinesis.aggregators.AggregatorsConstants; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.TableKeyStructure; import com.amazonaws.services.kinesis.aggregators.datastore.DynamoQueryEngine.QueryKeyScope; public class ListAggregateKeysServlet extends AbstractQueryServlet { public static final String NAMESPACE_PARAM = "namespace"; public static final String SCOPE_PARAM = "scope"; public static final int QUERY_THREADS = 3; private void respondWith(HttpServletResponse response, List queryResult) throws IOException { response.setStatus(200); // cors grant response.setHeader("Access-Control-Allow-Origin", "*"); PrintWriter w = response.getWriter(); w.println("{"); int i = 0; // write out the response values as json if (queryResult != null) { int result = 0; for (TableKeyStructure t : queryResult) { if (result == 0) { w.println(String.format("\"labelName\":\"%s\",", t.getLabelAttributeName())); w.println(String.format("\"dateName\":\"%s\",", t.getDateAttributeName())); w.println("\"values\":["); } // write the value as a struct w.print("{"); w.print(String.format("\"value\":\"%s\"", t.getLabelAttributeValue())); int dateItem = 0; if (t.getDateValues() != null) { if (dateItem == 0) { w.print(",\n\"dates\":["); } for (String s : t.getDateValues()) { // write the date value w.print(String.format("\"%s\"", s)); if (dateItem != t.getDateValues().size() - 1) { w.println(","); } else { w.print("]"); } dateItem++; } } w.print("}"); if (result != queryResult.size() - 1) { w.println(","); } result++; } } w.print("]}"); } @Override protected void doAction(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { String namespace = request.getParameter(NAMESPACE_PARAM); String scope = request.getParameter(SCOPE_PARAM); // resolve the scope QueryKeyScope queryScope = null; try { queryScope = QueryKeyScope.valueOf(scope); } catch (Exception e) { doError(response, String.format("Invalid Query Scope %s", scope)); return; } String streamName = (String) request.getServletContext().getAttribute( AggregatorsConstants.STREAM_NAME_PARAM); AggregatorGroup aggGroup = (AggregatorGroup) request.getServletContext().getAttribute( AggregatorsBeanstalkApp.AGGREGATOR_GROUP_PARAM); if (aggGroup == null) { doError(response, "Aggregator Application Not Initialised"); return; } // initialise the aggregator group onto shard 'none' for this operation // - it may already be initialised try { aggGroup.initialize("none"); } catch (Exception e) { throw new ServletException(e); } // put the initialised aggregator group back into the context request.getServletContext().setAttribute(AggregatorsBeanstalkApp.AGGREGATOR_GROUP_PARAM, aggGroup); // acquire the correct aggregator by namespace for (StreamAggregator agg : aggGroup.getAggregators()) { if (agg.getNamespace().equals(namespace)) { // run the query try { respondWith(response, agg.parallelQueryKeys(queryScope, QUERY_THREADS)); return; } catch (Exception e) { throw new ServletException(e); } } } // shouldn't get here, so bail with a meaning error on namespace doError(response, String.format("Unable to acquire Aggregator with Namespace %s", namespace)); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/app/QueryByLabelServlet.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.app; import java.io.IOException; import java.io.PrintWriter; import java.text.ParseException; import java.util.Date; import java.util.List; import java.util.Map; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import com.amazonaws.services.dynamodbv2.model.AttributeValue; import com.amazonaws.services.dynamodbv2.model.ComparisonOperator; import com.amazonaws.services.kinesis.aggregators.AggregatorGroup; import com.amazonaws.services.kinesis.aggregators.AggregatorsConstants; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; public class QueryByLabelServlet extends AbstractQueryServlet { public static final String NAMESPACE_PARAM = "namespace"; public static final String LABEL_VALUE_PARAM = "label-value"; public static final String DATE_VALUE_PARAM = "date-value"; public static final String OPERATOR_PARAM = "operator"; private void respondWith(HttpServletResponse response, List> queryResult) throws IOException { response.setStatus(200); // cors grant response.setHeader("Access-Control-Allow-Origin", "*"); PrintWriter w = response.getWriter(); w.println("["); int i = 0; // write out the response values as json if (queryResult != null) { int resultCount = 0; for (Map map : queryResult) { resultCount++; int mapCount = 0; if (map != null) { w.println("{"); for (String s : map.keySet()) { mapCount++; if (map.get(s).getN() == null) { w.print(String.format("\"%s\":\"%s\"", s, map.get(s).getS())); } else { w.print(String.format("\"%s\":%s", s, map.get(s).getN())); } if (mapCount != map.size()) { w.println(","); } } w.print("}"); if (resultCount != queryResult.size()) { w.println(","); } } } } w.print("]"); } @Override protected void doAction(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { String namespace = request.getParameter(NAMESPACE_PARAM); String labelValue = request.getParameter(LABEL_VALUE_PARAM); String dateValue = request.getParameter(DATE_VALUE_PARAM); String operator = request.getParameter(OPERATOR_PARAM); // have to provide namespace and label if (namespace == null) { doError(response, String.format("Argument '%s' must not be null", NAMESPACE_PARAM)); return; } if (labelValue == null || labelValue.equals("")) { doError(response, String.format("Argument '%s' must not be null", LABEL_VALUE_PARAM)); return; } // if date value is provided, the so too must operator and granularity ComparisonOperator setOperator = null; if (dateValue != null && operator == null) { setOperator = ComparisonOperator.EQ; } if (operator != null) { try { setOperator = ComparisonOperator.fromValue(operator); } catch (Exception e) { doError(response, String.format("%s is an invalid Comparison Operator", operator)); return; } } String streamName = (String) request.getServletContext().getAttribute( AggregatorsConstants.STREAM_NAME_PARAM); AggregatorGroup aggGroup = (AggregatorGroup) request.getServletContext().getAttribute( AggregatorsBeanstalkApp.AGGREGATOR_GROUP_PARAM); if (aggGroup == null) { doError(response, "Aggregator Application Not Initialised"); return; } else { // initialise the aggregator group onto shard 'none' for this // operation // - it may already be initialised try { aggGroup.initialize("none"); } catch (Exception e) { throw new ServletException(e); } } Date dateValueAsDate = null; if (dateValue != null) { try { dateValueAsDate = StreamAggregator.dateFormatter.parse(dateValue); } catch (ParseException e1) { throw new ServletException(e1); } } // put the initialised aggregator group back into the context request.getServletContext().setAttribute(AggregatorsBeanstalkApp.AGGREGATOR_GROUP_PARAM, aggGroup); // acquire the correct aggregator by namespace for (StreamAggregator agg : aggGroup.getAggregators()) { if (agg.getNamespace().equals(namespace)) { // run the query try { respondWith(response, agg.queryValue(labelValue, dateValueAsDate, setOperator)); return; } catch (Exception e) { throw new ServletException(e); } } } // shouldn't get here, so bail with a meaning error on namespace doError(response, String.format("Unable to acquire Aggregator with Namespace %s", namespace)); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/app/ShowConfigFileServlet.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.app; import java.io.IOException; import java.net.URL; import java.util.Date; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.HttpMethod; import com.amazonaws.services.kinesis.aggregators.AggregatorsConstants; import com.amazonaws.services.kinesis.aggregators.configuration.ConfigFileUtils; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.GeneratePresignedUrlRequest; public class ShowConfigFileServlet extends AbstractQueryServlet { private static final Log LOG = LogFactory.getLog(ShowConfigFileServlet.class); @Override protected void doAction(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { try { String configUrl = System.getProperty(AggregatorsConstants.CONFIG_URL_PARAM); String url = null; if (configUrl == null) { response.setStatus(404); } else { url = ConfigFileUtils.makeConfigFileURL(configUrl); LOG.info(String.format("Sending Redirect for Config File to S3 Temporary URL %s", url)); response.setHeader("Access-Control-Allow-Origin", "*"); response.sendRedirect(url); } } catch (Exception e) { throw new ServletException(e); } } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/app/ShowConfigurationServlet.java ================================================ package com.amazonaws.services.kinesis.aggregators.app; import java.io.IOException; import java.io.PrintWriter; import java.util.HashMap; import java.util.Map; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import com.amazonaws.services.kinesis.aggregators.AggregatorsConstants; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.fasterxml.jackson.databind.ObjectMapper; public class ShowConfigurationServlet extends AbstractQueryServlet { @Override protected void doAction(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setStatus(200); // cors grant response.setHeader("Access-Control-Allow-Origin", "*"); ObjectMapper mapper = new ObjectMapper(); Map configMap = new HashMap<>(); configMap.put("version", StreamAggregator.version); configMap.put(AggregatorsConstants.STREAM_NAME_PARAM, System.getProperty(AggregatorsConstants.STREAM_NAME_PARAM)); configMap.put(AggregatorsConstants.APP_NAME_PARAM, System.getProperty(AggregatorsConstants.APP_NAME_PARAM)); configMap.put(AggregatorsConstants.REGION_PARAM, System.getProperty(AggregatorsConstants.REGION_PARAM)); configMap.put(AggregatorsConstants.STREAM_POSITION_PARAM, System.getProperty(AggregatorsConstants.STREAM_POSITION_PARAM)); configMap.put(AggregatorsConstants.MAX_RECORDS_PARAM, System.getProperty(AggregatorsConstants.MAX_RECORDS_PARAM)); configMap.put(AggregatorsConstants.ENVIRONMENT_PARAM, System.getProperty(AggregatorsConstants.ENVIRONMENT_PARAM)); configMap.put(AggregatorsConstants.FAILURES_TOLERATED_PARAM, System .getProperty(AggregatorsConstants.FAILURES_TOLERATED_PARAM)); configMap.put(AggregatorsConstants.CONFIG_URL_PARAM, System .getProperty(AggregatorsConstants.CONFIG_URL_PARAM)); PrintWriter w = response.getWriter(); w.println(mapper.writeValueAsString(configMap)); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/cache/AggregateCache.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.cache; import java.util.HashMap; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.regions.Region; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.EnvironmentType; import com.amazonaws.services.kinesis.aggregators.LabelSet; import com.amazonaws.services.kinesis.aggregators.TimeHorizon; import com.amazonaws.services.kinesis.aggregators.datastore.AggregateAttributeModification; import com.amazonaws.services.kinesis.aggregators.datastore.DynamoDataStore; import com.amazonaws.services.kinesis.aggregators.datastore.IDataStore; import com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException; import com.amazonaws.services.kinesis.aggregators.metrics.IMetricsEmitter; import com.amazonaws.services.kinesis.aggregators.summary.SummaryConfiguration; import com.amazonaws.services.kinesis.aggregators.summary.SummaryElement; /** * AggregateCache encapsulates the in-flight copy of aggregated data, which is * flushed to Dynamo DB when the aggregator checkpoints. */ /* * Flush and Update methods are not thread safe so are marked as synchronised. * Intended utilisation in an inherently multi-threaded environment will be with * multiple instances of an Aggregator, which will by definition generate * multiple instances of the cache, so this synchronisation should not be an * issue in practice */ public class AggregateCache { private String shardId; private String environment; private String streamName, tableName, labelName, dateName; private AWSCredentialsProvider credentials; private AggregatorType aggregatorType = AggregatorType.COUNT; private Map pendingUpdates; private long reportUpdatesPendingCount = -1; private long warnUpdatesPendingCount = -1; private long forceCheckpointOnPendingUpdateCount = -1; private final int updateForceCheckpointFrequency = 3; private int forcedCount = 0; private final Log LOG = LogFactory.getLog(AggregateCache.class); private boolean online = false; private IMetricsEmitter metricsEmitter; private IDataStore dataStore = null; private Region region; public AggregateCache(String shardId) { this.shardId = shardId; } private void logInfo(String message) { LOG.info("[" + this.shardId + "] " + message); } private void logWarn(String message) { LOG.warn("[" + this.shardId + "] " + message); } /** * Configure the Aggregate Cache with its underlying data store. * * @throws Exception */ public void initialise() throws Exception { if (pendingUpdates == null) { pendingUpdates = new HashMap<>(); } // configure the default dynamo data store if (this.dataStore == null) { this.dataStore = new DynamoDataStore(this.credentials, this.aggregatorType, this.streamName, this.tableName, this.labelName, dateName).withStorageCapacity( DynamoDataStore.DEFAULT_READ_CAPACITY, DynamoDataStore.DEFAULT_WRITE_CAPACITY); this.dataStore.setRegion(region); } this.dataStore.initialise(); // set the checkpointing thresholds based on the current io throughputs setCheckpointForcingThresholds(); LOG.info("Aggregator Cache Online\nIDataStore: " + this.getDataStore().getClass().getName() + "\n" + "IMetricsEmitter: " + (this.metricsEmitter == null ? "Null" : this.metricsEmitter .getClass().getName())); this.online = true; } protected long getReportUpdatesPendingCount() { return reportUpdatesPendingCount; } protected long getWarnUpdatesPendingCount() { return warnUpdatesPendingCount; } protected long getForceCheckpointOnPendingUpdateCount() { return forceCheckpointOnPendingUpdateCount; } /* builder methods */ public AggregateCache withEnvironment(EnvironmentType environment) { this.environment = environment.name(); return this; } public AggregateCache withEnvironment(String environment) { this.environment = environment; return this; } public AggregateCache withTableName(String tableName) { this.tableName = tableName; return this; } public AggregateCache withStreamName(String streamName) { this.streamName = streamName; return this; } public AggregateCache withRegion(Region region) { this.region = region; return this; } public AggregateCache withLabelColumn(String labelColumn) { this.labelName = labelColumn; return this; } public AggregateCache withDateColumn(String dateColumn) { this.dateName = dateColumn; return this; } public AggregateCache withCredentials(AWSCredentialsProvider credentials) { this.credentials = credentials; return this; } public AggregateCache withAggregateType(AggregatorType type) { this.aggregatorType = type; return this; } public AggregateCache withMetricsEmitter(IMetricsEmitter metricsEmitter) { this.metricsEmitter = metricsEmitter; return this; } public AggregateCache withDataStore(IDataStore dataStore) { this.dataStore = dataStore; return this; } protected void setCheckpointForcingThresholds() throws Exception { // set the force checkpoint level @ 4 minutes of write capacity, warning // at half that, and info an half the warning threshold if (this.dataStore.refreshForceCheckpointThresholds() > 0) { this.forceCheckpointOnPendingUpdateCount = this.dataStore .refreshForceCheckpointThresholds(); this.warnUpdatesPendingCount = (long) Math .ceil(this.forceCheckpointOnPendingUpdateCount / 2); this.reportUpdatesPendingCount = (long) Math .ceil(this.warnUpdatesPendingCount / 2); } } /** * Mechanism to update the pending update set with new summary values, based * upon new events being consumed and calculated with the indicated * calculation. * * @param aggregatorType * The type of Aggregator that the cache is being used with * @param fieldLabel * The label value on which data will be aggregated * @param dateValue * The date value on which data will be aggregated * @param seq * The sequence number of the underlying Kinesis record which * generated the update * @param countIncrement * The increment of count for the item * @param summedIncrements * The set of summary values to be added to the aggregate * @param calculationConfig * The configuration of what types of summaries should be applied * to the summed fields * @throws Exception */ /* * This method is synchronised to prevent any issues where the consumer has * not implemented the aggregator=>worker mapping in a threadsafe manner. * Using the internal IRecordProcessor and IRecordProcessorFactory, we * generate new instances of the aggregator per shard worker thread. * However, a customer may allocate a single aggregator to multiple workers, * and while this will be slower, at least the data in the backing store * will be correct */ public synchronized void update(final AggregatorType aggregatorType, final LabelSet fieldLabel, final String dateValue, final TimeHorizon timeHorizon, final String seq, final Integer countIncrement, final Map summedIncrements, SummaryConfiguration calculationConfig) throws Exception { // lazy validate the configuration if (!online) initialise(); // get the payload for the current label value to be updated UpdateKey key = new UpdateKey(fieldLabel, this.dateName, dateValue, timeHorizon); UpdateValue payload = pendingUpdates.get(key); if (payload == null) { payload = new UpdateValue(); } // always update the count payload.incrementCount(countIncrement); // process summary updates based on the summary configuration if (aggregatorType.equals(AggregatorType.SUM)) { // process all the requested calculations for (String s : calculationConfig.getItemSet()) { for (SummaryElement e : calculationConfig .getRequestedCalculations(s)) { // be tolerant that not every summary item may be present on // every extracted item if (summedIncrements.containsKey(s)) { payload.updateSummary(e.getAttributeAlias(), summedIncrements.get(e.getStreamDataElement()), e); } else { logWarn(String .format("Summary Item '%s' not found in Extracted Data - Ignoring", s)); } } } } // update the last write sequence and time payload.lastWrite(seq, System.currentTimeMillis()); // write the updates back pendingUpdates.put(key, payload); // put some nags into the log to remind an implementer to checkpoint // periodically if (reportUpdatesPendingCount > 0) { if (pendingUpdates.size() % reportUpdatesPendingCount == 0) { logInfo(String.format("%s Pending Aggregates to be flushed", pendingUpdates.size())); } } if (warnUpdatesPendingCount > 0) { if (pendingUpdates.size() > warnUpdatesPendingCount) { logWarn(String.format( "Warning - %s Pending Aggregates - Checkpoint NOW", pendingUpdates.size())); } } // checkpoint manually at the force threshold to prevent the aggregator // falling over if (forceCheckpointOnPendingUpdateCount > 0) { if (pendingUpdates.size() > forceCheckpointOnPendingUpdateCount) { logWarn(String .format("Forcing checkpoint at %s Aggregates to avoid KCL Worker Disconnect - please ensure you have checkpointed the enclosing IRecordProcessor", pendingUpdates.size())); flush(); forcedCount++; if (forcedCount % updateForceCheckpointFrequency == 0) { // allow the system to refresh the force checkpoint // thresholds // periodically setCheckpointForcingThresholds(); } } } } public UpdateValue get(UpdateKey key) { return pendingUpdates.get(key); } protected IDataStore getDataStore() { return this.dataStore; } /** * Flush the state of all pending in memory updates to Dynamo DB. * * @throws Exception */ /* * See comments on aggregate() as to why this method is synchronised */ public synchronized void flush() throws Exception { long startTime = System.currentTimeMillis(); Map> dataModifications = this.dataStore .write(pendingUpdates); logInfo(String.format("Cache Flushed %s modifications in %sms", this.pendingUpdates.size(), (System.currentTimeMillis() - startTime))); // publish the cloudwatch metrics if (this.metricsEmitter != null) try { startTime = System.currentTimeMillis(); this.metricsEmitter.emit(dataModifications); logInfo(String .format("Instrumentation Dispatched to Metrics Service in %sms", (System.currentTimeMillis() - startTime))); } catch (Exception e) { // log the error but do not fail LOG.error("Metrics Emitter Exception - Aggregate Cache will NOT terminate"); LOG.error(e); } pendingUpdates = new HashMap<>(); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/cache/UpdateKey.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.cache; import java.text.ParseException; import java.util.Calendar; import java.util.Date; import com.amazonaws.services.kinesis.aggregators.LabelSet; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils; import com.amazonaws.services.kinesis.aggregators.TimeHorizon; /** * Class which is used by the object Aggregator as the key to the in-memory * version of the AggregateTable. */ public class UpdateKey { private LabelSet labelValues; private String dateAttribute; private String dateValue; private TimeHorizon timeHorizon; private Calendar cal = Calendar.getInstance(); public UpdateKey(LabelSet labelValues, String dateAttribute, String dateValue, TimeHorizon timeHorizon) { this.labelValues = labelValues; this.dateAttribute = dateAttribute; this.dateValue = dateValue; this.timeHorizon = timeHorizon; } public String getAggregateColumnName() { return this.labelValues.getName(); } public String getDateValueColumnName() { return this.dateAttribute; } public String getAggregatedValue() { return this.labelValues.valuesAsString(); } public String getDateValue() { return this.dateValue; } public Date getDateValueAsDate() throws ParseException { // instrument the FOREVER metric at current time if (this.getTimeHorizon().equals(TimeHorizon.FOREVER)) { cal.setTimeInMillis(System.currentTimeMillis()); return cal.getTime(); } else { return StreamAggregator.dateFormatter.parse(StreamAggregatorUtils .extractDateFromMultivalue(this.getTimeHorizon(), this.getDateValue())); } } public TimeHorizon getTimeHorizon() { return this.timeHorizon; } @Override public boolean equals(Object o) { if (o == null) return false; if (!(o instanceof UpdateKey)) return false; UpdateKey other = (UpdateKey) o; if (this.labelValues.equals(other.labelValues) && this.dateValue.equals(other.dateValue)) { return true; } else { return false; } } @Override public int hashCode() { int res = 17; res = 31 * res + (this.labelValues == null ? 0 : this.labelValues.hashCode()); res = 31 * res + (this.dateValue == null ? 0 : this.dateValue.hashCode()); return res; } @Override public String toString() { return String .format("Update Key - Date Value: %s, Date Column: %s, Label Values: %s", this.dateValue, this.dateAttribute, this.labelValues); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/cache/UpdateValue.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.cache; import java.util.HashMap; import java.util.Map; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.datastore.AggregateAttributeModification; import com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation; import com.amazonaws.services.kinesis.aggregators.summary.SummaryElement; /** * Class which is used as the payload container for data which is cached in the * Aggregator prior to checkpointing. */ public class UpdateValue { private double aggregateCount; /* * The pending update summaries are comprised of the value to be applied to * the attribute, the calculation that was applied to get that value, and * the original value from the stream used to extract the data */ private Map summaryValues; private String lastWriteSeq; private long lastWriteTime; public UpdateValue() { this.aggregateCount = 0; this.summaryValues = new HashMap<>(); } public void incrementCount(int count) { this.aggregateCount += count; } public void updateSummary(String label, double withValue, SummaryElement element) { // apply the calculation to the old and new values in the update // payload AggregateAttributeModification current = this.summaryValues.get(element.getAttributeAlias()); Double currentValue = current == null ? null : current.getFinalValue(); // apply the calculation using the apply method Double newValue = element.getCalculation().apply(currentValue, withValue); // build the summary value to be tracked in memory AggregateAttributeModification update = new AggregateAttributeModification( element.getAttributeAlias(), label, currentValue, withValue, newValue, element.getCalculation(), 0); // update the in memory version of the update payload for the label this.summaryValues.put(element.getAttributeAlias(), update); } public void lastWrite(String lastSeq, long lastTime) { this.lastWriteSeq = lastSeq; this.lastWriteTime = lastTime; } public double getAggregateCount() { return aggregateCount; } public double getSummaryValue(String label) { return getSummary(label).getFinalValue(); } public AggregateAttributeModification getSummary(String label) { return this.summaryValues.get(label); } public AggregateAttributeModification getValueByOriginal(String attributeName, SummaryCalculation calculation) { return this.summaryValues.get(SummaryElement.makeStoreAttributeName(attributeName, calculation)); } public Map getSummaryValues() { return this.summaryValues; } public String getLastWriteSeq() { return lastWriteSeq; } public long getLastWriteTime() { return lastWriteTime; } @Override public String toString() { String summary = ""; if (this.summaryValues != null && this.summaryValues.size() > 0) { summary = ","; for (String s : this.summaryValues.keySet()) { summary = summary + summaryValues.get(s).toString() + ","; } summary = summary.substring(0, summary.length() - 1); } return String.format( "Update Value - Aggregate Count: %s, Last Write Seq: %s, Last Write Time: %s%s", this.aggregateCount, this.lastWriteSeq, StreamAggregator.dateFormatter.format(this.lastWriteTime), summary); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/cli/AggregatorsCli.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.cli; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.ClasspathPropertiesFileCredentialsProvider; import com.amazonaws.auth.EnvironmentVariableCredentialsProvider; import com.amazonaws.regions.Region; import com.amazonaws.regions.Regions; import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.AggregatorsConstants; import com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils; import com.amazonaws.services.kinesis.aggregators.datastore.DynamoUtils; public class AggregatorsCli { public static final String DELETE_TO_HWM = "delete-to-hwm"; public static final String GET_REDSHIFT_COPY = "get-redshift-copy-command"; public static final String GET_HIVE_WRAPPER = "get-hive-wrapper-statement"; public static final String GET_TABLE_STRUCTURE = "get-dynamo-table-structure"; private static void validateAction(String actionRequested) throws Exception { if (!actionRequested.equals(DELETE_TO_HWM) && !actionRequested.equals(GET_REDSHIFT_COPY) && !actionRequested.equals(GET_HIVE_WRAPPER) && !actionRequested.equals(GET_TABLE_STRUCTURE)) throw new Exception(String.format("Invalid Action %s", actionRequested)); } public static void main(String[] args) throws Exception { String applicationName = System.getProperty(AggregatorsConstants.APP_NAME_PARAM); String namespace = System.getProperty(AggregatorsConstants.NAMESPACE_PARAM); String action = System.getProperty("action"); String regionName = System.getProperty(AggregatorsConstants.REGION_PARAM); Region region = null; if (regionName != null && !regionName.equals("")) { region = Region.getRegion(Regions.fromName(regionName)); } validateAction(action); final AWSCredentialsProvider credentialsProvider; final String accessKey = System.getenv("AWS_ACCESS_KEY_ID"); if (accessKey == null) { credentialsProvider = new ClasspathPropertiesFileCredentialsProvider(); } else { credentialsProvider = new EnvironmentVariableCredentialsProvider(); } String aggregatorTableName; final AmazonDynamoDB dynamoClient = new AmazonDynamoDBClient(credentialsProvider); if (region != null) dynamoClient.setRegion(region); switch (action) { case DELETE_TO_HWM: String hwm = System.getProperty("last-sequence-number"); aggregatorTableName = System.getProperty("from-aggregator-table"); DynamoUtils.cleanupAggTable(credentialsProvider, region, aggregatorTableName, hwm); break; case GET_REDSHIFT_COPY: // get the redshift target table name String redshiftTableName = System.getProperty("to-redshift-table"); aggregatorTableName = System.getProperty("from-aggregator-table"); System.out.println(StreamAggregatorUtils.getRedshiftCopyCommand(dynamoClient, redshiftTableName, aggregatorTableName)); break; case GET_HIVE_WRAPPER: AggregatorType aggType = AggregatorType.valueOf(System.getProperty("aggregator-type")); String hiveTableName = System.getProperty("hive-table-name"); aggregatorTableName = System.getProperty("from-aggregator-table"); System.out.println(StreamAggregatorUtils.getDynamoHiveWrapper(dynamoClient, hiveTableName, aggregatorTableName)); break; case GET_TABLE_STRUCTURE: aggregatorTableName = System.getProperty("from-aggregator-table"); System.out.println(DynamoUtils.getDynamoTableStructure(dynamoClient, aggregatorTableName)); break; default: break; } } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/configuration/ConfigFileUtils.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.configuration; import java.net.URL; import java.util.Date; import com.amazonaws.HttpMethod; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.GeneratePresignedUrlRequest; public class ConfigFileUtils { public static final String makeConfigFileURL(String configUrl) throws Exception { String url = null; if (configUrl.startsWith("http")) { url = configUrl; } else if (configUrl.startsWith("s3")) { AmazonS3 s3Client = new AmazonS3Client(); String bucket = configUrl.split("/")[2]; String prefix = configUrl.substring(configUrl.indexOf(bucket) + bucket.length() + 1); // generate a presigned url for X hours Date expiration = new Date(); long msec = expiration.getTime(); msec += 1000 * 60 * 60; // 1 hour. expiration.setTime(msec); GeneratePresignedUrlRequest generatePresignedUrlRequest = new GeneratePresignedUrlRequest( bucket, prefix); generatePresignedUrlRequest.setMethod(HttpMethod.GET); generatePresignedUrlRequest.setExpiration(expiration); URL s3url = s3Client.generatePresignedUrl(generatePresignedUrlRequest); url = s3url.toString(); } else { url = new URL(String.format("file://%s", configUrl)).toString(); } return url; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/configuration/DataExtractor.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.configuration; public enum DataExtractor { JSON("com.amazonaws.services.kinesis.io.JsonDataExtractor"), CSV( "com.amazonaws.services.kinesis.io.CsvDataExtractor"), OBJECT( "com.amazonaws.services.kinesis.io.ObjectExtractor"), REGEX( "com.amazonaws.services.kinesis.io.RegexDataExtractor"); private DataExtractor(String linkedClass) { this.linkedClass = linkedClass; } private String linkedClass; public String getLinkedClass() { return this.linkedClass; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/configuration/ExternalConfigurationModel.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.configuration; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils; import com.amazonaws.services.kinesis.aggregators.TimeHorizon; import com.amazonaws.services.kinesis.aggregators.annotations.AnnotationProcessor; import com.amazonaws.services.kinesis.aggregators.datastore.IDataStore; import com.amazonaws.services.kinesis.aggregators.exception.ClassNotAnnotatedException; import com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException; import com.amazonaws.services.kinesis.aggregators.metrics.IMetricsEmitter; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.transfer.Download; import com.amazonaws.services.s3.transfer.TransferManager; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; public class ExternalConfigurationModel { private static final Log LOG = LogFactory.getLog(ExternalConfigurationModel.class); private String namespace; private List timeHorizons; private AggregatorType aggregatorType; private DataExtractor dataExtractor; private List labelItems = new ArrayList<>(); private String labelAttributeAlias; private String dateItem, dateFormat, dateAttributeAlias; private List summaryItems; private String delimiter; private String itemTerminator; private String filterRegex; private String regularExpression; private boolean isAnnotatedClass; private Class clazz; private static ObjectMapper mapper = new ObjectMapper(); private String tableName; private Long readIOPs; private Long writeIOPs; private boolean failOnDataExtraction; private boolean emitMetrics; private Class dataStore; private Class metricsEmitter; private static void configureCsv(JsonNode document, ExternalConfigurationModel model) { model.setDelimiter(StreamAggregatorUtils.readValueAsString(document, "delimiter")); } private static void configureStringCommon(JsonNode document, ExternalConfigurationModel model) { model.setItemTerminator(StreamAggregatorUtils.readValueAsString(document, "lineTerminator")); model.setFilterRegex(StreamAggregatorUtils.readValueAsString(document, "filterRegex")); } private static void configureRegex(JsonNode document, ExternalConfigurationModel model) throws InvalidConfigurationException { String regex = StreamAggregatorUtils.readValueAsString(document, "regularExpression"); if (regex == null || regex.equals("")) throw new InvalidConfigurationException( "Cannot configure a Regular Expression Aggregator without a Regular Expression (configuration 'regularExpression'"); model.setRegularExpression(regex); } private static void configureObject(JsonNode document, ExternalConfigurationModel model) throws InvalidConfigurationException { String classname = StreamAggregatorUtils.readValueAsString(document, "class"); if (classname == null || classname.equals("")) throw new InvalidConfigurationException( "Cannot configure an Aggregator which uses Object based data extraction without a 'class' configuration item"); try { model.setClazz(Class.forName(classname)); } catch (ClassNotFoundException e) { throw new InvalidConfigurationException(String.format( "ClassNotFoundException: %s not found on Classpath", classname)); } // try to load the class using its annotations try { AnnotationProcessor p = new AnnotationProcessor(model.getClazz()); model.setAnnotatedClass(true); } catch (ClassNotAnnotatedException e) { // no problem } catch (Exception e) { throw new InvalidConfigurationException(e); } } private static void addTimeHorizons(JsonNode document, ExternalConfigurationModel model) throws Exception { JsonNode node = StreamAggregatorUtils.readJsonValue(document, "timeHorizons"); if (node != null) { Iterator timeHorizonValues = node.elements(); while (timeHorizonValues.hasNext()) { String t = timeHorizonValues.next().asText(); String timeHorizonName = null; int granularity = -1; // process parameterised time horizons if (t.contains("MINUTES_GROUPED")) { String[] items = t.split("\\("); timeHorizonName = items[0]; granularity = Integer.parseInt(items[1].replaceAll("\\)", "")); } else { timeHorizonName = t; } try { TimeHorizon th = TimeHorizon.valueOf(timeHorizonName); if (th.equals(TimeHorizon.MINUTES_GROUPED) && granularity == -1) { throw new InvalidConfigurationException( "Unable to create Grouped Minutes Time Horizon without configuration of Granularity using notation MINUTES_GROUPED()"); } else { if (th.equals(TimeHorizon.MINUTES_GROUPED)) { th.setGranularity(granularity); } } model.addTimeHorizon(th); } catch (Exception e) { throw new Exception(String.format("Unable to configure Time Horizon %s", t), e); } } } } private static void setAggregatorType(JsonNode document, ExternalConfigurationModel model) throws Exception { String aggType = StreamAggregatorUtils.readValueAsString(document, "type"); if (aggType == null || aggType.equals("")) { model.setAggregatorType(AggregatorType.COUNT); } else { try { model.setAggregatorType(AggregatorType.valueOf(aggType)); } catch (Exception e) { throw new Exception(String.format("Unable to configure AggregatorType %s", aggType)); } } } public static List buildFromConfig(String configFilePath) throws Exception { List response = new ArrayList<>(); // reference the config file as a full path File configFile = new File(configFilePath); if (!configFile.exists()) { // try to load the file from the classpath InputStream classpathConfig = ExternalConfigurationModel.class.getClassLoader().getResourceAsStream( configFilePath); if (classpathConfig != null && classpathConfig.available() > 0) { configFile = new File(ExternalConfigurationModel.class.getResource( (configFilePath.startsWith("/") ? "" : "/") + configFilePath).toURI()); LOG.info(String.format("Loaded Configuration %s from Classpath", configFilePath)); } else { if (configFilePath.startsWith("s3://")) { AmazonS3 s3Client = new AmazonS3Client(new DefaultAWSCredentialsProviderChain()); TransferManager tm = new TransferManager(s3Client); // parse the config path to get the bucket name and prefix final String s3ProtoRegex = "s3:\\/\\/"; String bucket = configFilePath.replaceAll(s3ProtoRegex, "").split("/")[0]; String prefix = configFilePath.replaceAll( String.format("%s%s\\/", s3ProtoRegex, bucket), ""); // download the file using TransferManager configFile = File.createTempFile(configFilePath, null); Download download = tm.download(bucket, prefix, configFile); download.waitForCompletion(); // shut down the transfer manager tm.shutdownNow(); LOG.info(String.format("Loaded Configuration from Amazon S3 %s/%s to %s", bucket, prefix, configFile.getAbsolutePath())); } else { // load the file from external URL try { configFile = File.createTempFile(configFilePath, null); FileUtils.copyURLToFile(new URL(configFilePath), configFile, 1000, 1000); LOG.info(String.format("Loaded Configuration from %s to %s", configFilePath, configFile.getAbsolutePath())); } catch (IOException e) { // handle the timeouts and so on with a generalised // config // file not found handler later } } } } else { LOG.info(String.format("Loaded Configuration from Filesystem %s", configFilePath)); } // if we haven't been able to load a config file, then bail if (configFile == null || !configFile.exists()) { throw new InvalidConfigurationException(String.format( "Unable to Load Config File from %s", configFilePath)); } JsonNode document = StreamAggregatorUtils.asJsonNode(configFile); ExternalConfigurationModel config = null; Iterator i = document.elements(); while (i.hasNext()) { config = new ExternalConfigurationModel(); JsonNode section = i.next(); // set generic properties config.setNamespace(StreamAggregatorUtils.readValueAsString(section, "namespace")); config.setDateFormat(StreamAggregatorUtils.readValueAsString(section, "dateFormat")); addTimeHorizons(section, config); setAggregatorType(section, config); // set the label items JsonNode labelItems = StreamAggregatorUtils.readJsonValue(section, "labelItems"); if (labelItems != null && labelItems.size() > 0) { Iterator iterator = labelItems.elements(); while (iterator.hasNext()) { JsonNode n = iterator.next(); config.addLabelItems(n.asText()); } } config.setLabelAttributeAlias(StreamAggregatorUtils.readValueAsString(section, "labelAttributeAlias")); config.setDateItem(StreamAggregatorUtils.readValueAsString(section, "dateItem")); config.setDateAttributeAlias(StreamAggregatorUtils.readValueAsString(section, "dateAttributeAlias")); JsonNode summaryItems = StreamAggregatorUtils.readJsonValue(section, "summaryItems"); if (summaryItems != null && summaryItems.size() > 0) { Iterator iterator = summaryItems.elements(); while (iterator.hasNext()) { JsonNode n = iterator.next(); config.addSummaryItem(n.asText()); } } config.setTableName(StreamAggregatorUtils.readValueAsString(section, "tableName")); String readIO = StreamAggregatorUtils.readValueAsString(section, "readIOPS"); if (readIO != null) config.setReadIOPs(Long.parseLong(readIO)); String writeIO = StreamAggregatorUtils.readValueAsString(section, "writeIOPS"); if (writeIO != null) config.setWriteIOPs(Long.parseLong(writeIO)); // configure tolerance of data extraction problems String failOnDataExtraction = StreamAggregatorUtils.readValueAsString(section, "failOnDataExtraction"); if (failOnDataExtraction != null) config.setFailOnDataExtraction(Boolean.parseBoolean(failOnDataExtraction)); // configure whether metrics should be emitted String emitMetrics = StreamAggregatorUtils.readValueAsString(section, "emitMetrics"); String metricsEmitterClassname = StreamAggregatorUtils.readValueAsString(section, "metricsEmitterClass"); if (emitMetrics != null || metricsEmitterClassname != null) { if (metricsEmitterClassname != null) { config.setMetricsEmitter((Class) ClassLoader.getSystemClassLoader().loadClass( metricsEmitterClassname)); } else { config.setEmitMetrics(Boolean.parseBoolean(emitMetrics)); } } // configure the data store class String dataStoreClass = StreamAggregatorUtils.readValueAsString(section, "IDataStore"); if (dataStoreClass != null) { Class dataStore = (Class) ClassLoader.getSystemClassLoader().loadClass( dataStoreClass); config.setDataStore(dataStore); } // get the data extractor configuration, so we know what other json // elements to retrieve from the configuration document String useExtractor = null; try { useExtractor = StreamAggregatorUtils.readValueAsString(section, "dataExtractor"); config.setDataExtractor(DataExtractor.valueOf(useExtractor)); } catch (Exception e) { throw new Exception(String.format( "Unable to configure aggregator with Data Extractor %s", useExtractor)); } switch (config.getDataExtractor()) { case CSV: configureStringCommon(section, config); configureCsv(section, config); break; case JSON: configureStringCommon(section, config); break; case OBJECT: configureObject(section, config); break; case REGEX: configureRegex(section, config); } response.add(config); } return response; } public String getNamespace() { return this.namespace; } public List getTimeHorizons() { return this.timeHorizons; } public String getFilterRegex() { return this.filterRegex; } public String getRegularExpression() { return this.regularExpression; } public String getTableName() { return this.tableName; } public Long getReadIOPs() { return this.readIOPs; } public Long getWriteIOPs() { return this.writeIOPs; } public String getLabelAttributeAlias() { return this.labelAttributeAlias; } public String getDateAttributeAlias() { return this.dateAttributeAlias; } public boolean isAnnotatedClass() { return this.isAnnotatedClass; } public void addTimeHorizon(TimeHorizon timeHorizon) { if (this.timeHorizons == null) this.timeHorizons = new ArrayList<>(); this.timeHorizons.add(timeHorizon); } public AggregatorType getAggregatorType() { return this.aggregatorType; } public List getLabelItems() { return this.labelItems; } public String getDateItem() { return this.dateItem; } public String getDateAlias() { return this.dateAttributeAlias; } public String getDateFormat() { return this.dateFormat; } public List getSummaryItems() { return this.summaryItems; } public String getDelimiter() { return this.delimiter; } public String getItemTerminator() { return this.itemTerminator; } public void addSummaryItem(String summaryItem) { if (this.summaryItems == null) this.summaryItems = new ArrayList<>(); this.summaryItems.add(summaryItem); } public Class getClazz() { return this.clazz; } public DataExtractor getDataExtractor() { return this.dataExtractor; } public boolean shouldFailOnDataExtraction() { return this.failOnDataExtraction; } public boolean shouldEmitMetrics() { return this.emitMetrics; } public Class getMetricsEmitter() { return this.metricsEmitter; } public Class getDataStore() { return this.dataStore; } private void setNamespace(String namespace) { this.namespace = namespace; } private void setAggregatorType(AggregatorType aggregatorType) { this.aggregatorType = aggregatorType; } private void addLabelItems(String labelItem) { this.labelItems.add(labelItem); } private void setLabelItems(List labelItems) { this.labelItems = labelItems; } private void setDateItem(String dateItem) { this.dateItem = dateItem; } private void setDateFormat(String dateFormat) { this.dateFormat = dateFormat; } private void setDelimiter(String delimiter) { if (delimiter != null && !delimiter.equals("")) this.delimiter = delimiter; } private void setItemTerminator(String itemTerminator) { if (itemTerminator != null && !itemTerminator.equals("")) this.itemTerminator = itemTerminator; } private void setFilterRegex(String filterRegex) { this.filterRegex = filterRegex; } private void setRegularExpression(String regularExpression) { this.regularExpression = regularExpression; } private void setClazz(Class clazz) { this.clazz = clazz; } private void setDataExtractor(DataExtractor dataExtractor) { this.dataExtractor = dataExtractor; } private void setAnnotatedClass(boolean isAnnotatedClass) { this.isAnnotatedClass = isAnnotatedClass; } private void setTableName(String tableName) { if (tableName != null && !tableName.equals("")) this.tableName = tableName; } private void setReadIOPs(Long readIOPs) { this.readIOPs = readIOPs; } private void setWriteIOPs(Long writeIOPs) { this.writeIOPs = writeIOPs; } private void setFailOnDataExtraction(boolean failOnDataExtraction) { this.failOnDataExtraction = failOnDataExtraction; } private void setEmitMetrics(boolean emitMetrics) { this.emitMetrics = emitMetrics; } private void setMetricsEmitter(Class metricsEmitter) { this.metricsEmitter = metricsEmitter; } private void setDataStore(Class dataStore) { this.dataStore = dataStore; } private void setLabelAttributeAlias(String labelAttributeAlias) { this.labelAttributeAlias = labelAttributeAlias; } private void setDateAttributeAlias(String dateAttributeAlias) { this.dateAttributeAlias = dateAttributeAlias; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/configuration/json.schema ================================================ { "title": "Amazon Kinesis Aggregators Configuration Schema", "type": "object", "properties": { "namespace": { "type": "string", "description": "The namespace for data stored by the Aggregator" }, "timeHorizons": { "type": "array", "description": "The list of all time horizons to be used as the granularity of the Aggregators", "items": { "type": "string" }, "minItems": 1, "uniqueItems": true }, "type": { "description": "The Type of the Aggregator (COUNT or SUM)", "type": "integer" }, "dataExtractor": { "description": "Mechanism for extracting data in form for aggregation from the Kinesis Stream. Must be one of CSV, JSON or OBJECT", "type": "string" }, "delimiter": { "description": "Delimiter to be used in CSV Data Extractors", "type": "string" }, "labelItem": { "description": "Index, Attribute or Method for the value in the stream which should be used as the top level aggregate", "type": "integer", "minValue" : 0 }, "dateItem": { "description": "Index, Attribute or Method for the value in the stream which should be used as the event date", "type": "integer", "minValue" : 0 }, "dateFormat": { "description": "(Optional) If the event date is stored on the stream as a String, then supply the date format which can be used to convert it into a date", "type": "string" }, "summaryItems": { "type": "array", "description": "(Optional) The list of expressions of indicies, attributes or methods to be used as aggregated values in addition to event count", "items": { "type": "string" }, "minItems": 1, "uniqueItems": true }, "filterRegex": { "description": "(Optional) Regular Expression used to filter String type stream data prior to data extraction", "type": "string" }, "tableName": { "description": "(Optional) The name of the table to be used for storing Aggregated data in Dynamo DB", "type": "string" }, "readIOPS": { "description": "(Optional) The number of provisioned Read IOPS for the Dynamo DB Table", "type": "long" }, "writeIOPS": { "description": "(Optional) The number of provisioned Write IOPS for the Dynamo DB Table", "type": "long" }, "class": { "description": "(Optional) For Object based Data Extractors, the class to use for serialising data to and from the Kinesis Stream. This may also be an Annotated Class, which will be used over all other configuration", "type": "string" }, "failOnDataExtraction": { "description": "(Optional) Controls whether the Aggregator process should stop when data extraction from the stream fails. The default is 'true', but setting this value to 'false' will ensure that an Aggregator continues to process data from a Shard even if it contains bad data", "type": "boolean" } }, "required": ["namespace", "timeHorizons", "type", "dataExtractor"] } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/consumer/AggregatorConsumer.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.consumer; import java.net.NetworkInterface; import java.util.UUID; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; import com.amazonaws.regions.Region; import com.amazonaws.regions.Regions; import com.amazonaws.services.kinesis.aggregators.AggregatorGroup; import com.amazonaws.services.kinesis.aggregators.AggregatorsConstants; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException; import com.amazonaws.services.kinesis.aggregators.factory.ExternallyConfiguredAggregatorFactory; import com.amazonaws.services.kinesis.aggregators.processor.AggregatorProcessorFactory; import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory; import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream; import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration; import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker; public final class AggregatorConsumer { private static final Log LOG = LogFactory.getLog(AggregatorConsumer.class); private String streamName, appName, regionName, environmentName, configFilePath, positionInStream, kinesisEndpoint; private AWSCredentialsProvider credentialsProvider; private boolean emitMetrics = false; private InitialPositionInStream streamPosition; private int failuresToTolerate = -1; private int maxRecords = -1; private KinesisClientLibConfiguration config; private AggregatorGroup aggGroup; private boolean isConfigured = false; private Worker worker; public AggregatorConsumer(String streamName, String appName, String configFilePath) { this.streamName = streamName; this.appName = appName; this.configFilePath = configFilePath; } private AggregatorGroup buildAggregatorsFromConfig() throws Exception { return ExternallyConfiguredAggregatorFactory.buildFromConfig( this.streamName, this.appName, this.config, configFilePath); } public void shutdown() throws Exception { this.aggGroup.shutdown(true); worker.shutdown(); } public int run() throws Exception { configure(); System.out.println(String.format("Starting %s", appName)); LOG.info(String.format("Running %s to process stream %s", appName, streamName)); IRecordProcessorFactory recordProcessorFactory = new AggregatorProcessorFactory( aggGroup); worker = new Worker(recordProcessorFactory, this.config); int exitCode = 0; int failures = 0; // run the worker, tolerating as many failures as is configured while (failures < failuresToTolerate || failuresToTolerate == -1) { try { worker.run(); } catch (Throwable t) { LOG.error("Caught throwable while processing data.", t); failures++; if (failures < failuresToTolerate) { LOG.error("Restarting..."); } else { shutdown(); } exitCode = 1; } } return exitCode; } private void assertThat(boolean condition, String message) throws Exception { if (!condition) { throw new InvalidConfigurationException(message); } } private void validateConfig() throws InvalidConfigurationException { try { assertThat(this.streamName != null, "Must Specify a Stream Name"); assertThat(this.appName != null, "Must Specify an Application Name"); } catch (Exception e) { throw new InvalidConfigurationException(e.getMessage()); } } public void configure() throws Exception { if (!isConfigured) { validateConfig(); if (this.positionInStream != null) { streamPosition = InitialPositionInStream .valueOf(this.positionInStream); } else { streamPosition = InitialPositionInStream.LATEST; } // append the environment name to the application name if (environmentName != null) { appName = String.format("%s-%s", appName, environmentName); } // ensure the JVM will refresh the cached IP values of AWS resources // (e.g. service endpoints). java.security.Security .setProperty("networkaddress.cache.ttl", "60"); String workerId = NetworkInterface.getNetworkInterfaces() + ":" + UUID.randomUUID(); LOG.info("Using Worker ID: " + workerId); // obtain credentials using the default provider chain or the // credentials provider supplied AWSCredentialsProvider credentialsProvider = this.credentialsProvider == null ? new DefaultAWSCredentialsProviderChain() : this.credentialsProvider; LOG.info("Using credentials with Access Key ID: " + credentialsProvider.getCredentials().getAWSAccessKeyId()); config = new KinesisClientLibConfiguration(appName, streamName, credentialsProvider, workerId).withInitialPositionInStream( streamPosition).withKinesisEndpoint(kinesisEndpoint); config.getKinesisClientConfiguration().setUserAgent( StreamAggregator.AWSApplication); if (regionName != null) { Region region = Region.getRegion(Regions.fromName(regionName)); config.withRegionName(region.getName()); } if (maxRecords != -1) config.withMaxRecords(maxRecords); // initialise the Aggregators aggGroup = buildAggregatorsFromConfig(); LOG.info(String .format("Amazon Kinesis Aggregators Managed Client prepared for %s on %s in %s (%s) using %s Max Records", config.getApplicationName(), config.getStreamName(), config.getRegionName(), config.getWorkerIdentifier(), config.getMaxRecords())); isConfigured = true; } } public AggregatorConsumer withKinesisEndpoint(String kinesisEndpoint) { this.kinesisEndpoint = kinesisEndpoint; return this; } public AggregatorConsumer withToleratedWorkerFailures(int failuresToTolerate) { this.failuresToTolerate = failuresToTolerate; return this; } public AggregatorConsumer withMaxRecords(int maxRecords) { this.maxRecords = maxRecords; return this; } public AggregatorConsumer withRegionName(String regionName) { this.regionName = regionName; return this; } public AggregatorConsumer withEnvironment(String environmentName) { this.environmentName = environmentName; return this; } public AggregatorConsumer withCredentialsProvider( AWSCredentialsProvider credentialsProvider) { this.credentialsProvider = credentialsProvider; return this; } public AggregatorConsumer withInitialPositionInStream( String positionInStream) { this.positionInStream = positionInStream; return this; } public AggregatorConsumer withMetricsEmitter() { this.emitMetrics = true; return this; } public AggregatorGroup getAggregators() { return this.aggGroup; } public static void main(String[] args) throws Exception { String streamName = System .getProperty(AggregatorsConstants.STREAM_NAME_PARAM); String appName = System .getProperty(AggregatorsConstants.APP_NAME_PARAM); String configFilePath = System .getProperty(AggregatorsConstants.CONFIG_PATH_PARAM); String regionName = System .getProperty(AggregatorsConstants.REGION_PARAM); String failuresToTolerate = System .getProperty(AggregatorsConstants.FAILURES_TOLERATED_PARAM); String maxRecords = System .getProperty(AggregatorsConstants.MAX_RECORDS_PARAM); String environmentName = System .getProperty(AggregatorsConstants.ENVIRONMENT_PARAM); String positionInStream = System .getProperty(AggregatorsConstants.STREAM_POSITION_PARAM); AggregatorConsumer consumer = new AggregatorConsumer(streamName, appName, configFilePath); // add optional configuration items if (regionName != null && regionName != "") { consumer.withRegionName(regionName); } if (failuresToTolerate != null && failuresToTolerate != "") { consumer.withToleratedWorkerFailures(Integer .parseInt(failuresToTolerate)); } if (maxRecords != null && maxRecords != "") { consumer.withMaxRecords(Integer.parseInt(maxRecords)); } if (environmentName != null && environmentName != "") { consumer.withEnvironment(environmentName); } if (positionInStream != null && positionInStream != "") { consumer.withInitialPositionInStream(positionInStream); } System.exit(consumer.run()); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/datastore/AggregateAttributeModification.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.datastore; import com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation; public class AggregateAttributeModification { private String attributeName, originatingValueName; private Double oldValue, newValue, finalValue; private SummaryCalculation calculationApplied; private int writesSoFar; private AggregateAttributeModification() { } public AggregateAttributeModification(String attributeName, String originatingValueName, Double finalValue, SummaryCalculation calculationApplied) { this(attributeName, originatingValueName, null, null, finalValue, calculationApplied, 0); } public AggregateAttributeModification(String attributeName, String originatingValueName, Double finalValue, SummaryCalculation calculationApplied, int writesSoFar) { this(attributeName, originatingValueName, null, null, finalValue, calculationApplied, writesSoFar); } public AggregateAttributeModification(String attributeName, String originatingValueName, Double oldValue, Double newValue, Double finalValue, SummaryCalculation calculationApplied, int writesSoFar) { this.attributeName = attributeName; this.originatingValueName = originatingValueName; this.oldValue = oldValue; this.newValue = newValue; this.finalValue = finalValue; this.calculationApplied = calculationApplied; this.writesSoFar = writesSoFar; } public String getAttributeName() { return attributeName; } public String getOriginatingValueName() { return originatingValueName; } public Double getOldValue() { return oldValue; } public Double getNewValue() { return newValue; } public Double getFinalValue() { return finalValue; } public SummaryCalculation getCalculationApplied() { return calculationApplied; } public int getWritesSoFar() { return writesSoFar; } @Override public String toString() { return String.format( "Aggregate Attribute Modification - Originating Value Name: %s, Attribute Name: %s, Calculation Applied: %s, Old Value: %s, New Value: %s, Final Value: %s", this.originatingValueName, this.attributeName, this.calculationApplied.name(), this.oldValue, this.newValue, this.finalValue); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/datastore/DevNullDataStore.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.datastore; import java.util.HashMap; import java.util.Map; import com.amazonaws.regions.Region; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.cache.UpdateKey; import com.amazonaws.services.kinesis.aggregators.cache.UpdateValue; import com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation; public class DevNullDataStore implements IDataStore { @Override public Map> write( Map data) throws Exception { /* * Simply return a remapped set of what the caller sent us - their * values are the final values for this data store */ Map> output = new HashMap<>(); for (UpdateKey key : data.keySet()) { Map updates = new HashMap<>(); updates.put(StreamAggregator.EVENT_COUNT, new AggregateAttributeModification( StreamAggregator.EVENT_COUNT, StreamAggregator.EVENT_COUNT, data.get(key).getAggregateCount(), SummaryCalculation.SUM)); for (String value : data.get(key).getSummaryValues().keySet()) { updates.put(value, data.get(key).getSummary(value)); } output.put(key, updates); } return output; } @Override public void initialise() throws Exception { } @Override public long refreshForceCheckpointThresholds() throws Exception { return 0; } @Override public void setRegion(Region region) { } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/datastore/DynamoDataStore.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.datastore; import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.regions.Region; import com.amazonaws.regions.Regions; import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; import com.amazonaws.services.dynamodbv2.AmazonDynamoDBAsyncClient; import com.amazonaws.services.dynamodbv2.model.AttributeAction; import com.amazonaws.services.dynamodbv2.model.AttributeDefinition; import com.amazonaws.services.dynamodbv2.model.AttributeValue; import com.amazonaws.services.dynamodbv2.model.AttributeValueUpdate; import com.amazonaws.services.dynamodbv2.model.ConditionalCheckFailedException; import com.amazonaws.services.dynamodbv2.model.ExpectedAttributeValue; import com.amazonaws.services.dynamodbv2.model.GlobalSecondaryIndex; import com.amazonaws.services.dynamodbv2.model.KeySchemaElement; import com.amazonaws.services.dynamodbv2.model.KeyType; import com.amazonaws.services.dynamodbv2.model.Projection; import com.amazonaws.services.dynamodbv2.model.ProjectionType; import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput; import com.amazonaws.services.dynamodbv2.model.ReturnValue; import com.amazonaws.services.dynamodbv2.model.ScanRequest; import com.amazonaws.services.dynamodbv2.model.ScanResult; import com.amazonaws.services.dynamodbv2.model.Select; import com.amazonaws.services.dynamodbv2.model.UpdateItemRequest; import com.amazonaws.services.dynamodbv2.model.UpdateItemResult; import com.amazonaws.services.kinesis.AmazonKinesisClient; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils; import com.amazonaws.services.kinesis.aggregators.cache.UpdateKey; import com.amazonaws.services.kinesis.aggregators.cache.UpdateValue; import com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation; import com.amazonaws.services.kinesis.model.ResourceNotFoundException; public class DynamoDataStore implements IDataStore { public enum DynamoSummaryUpdateMethod { PUT(AttributeAction.PUT), ADD(AttributeAction.ADD), CONDITIONAL(null); private AttributeAction action; private DynamoSummaryUpdateMethod(AttributeAction a) { this.action = a; } public AttributeAction getAction() { return this.action; } } /** * The default amount of read IOPS to be provisioned, if the aggregator does * not override. */ public static final long DEFAULT_READ_CAPACITY = 10L; /** * The default amount of write IOPS to be provisioned, if the aggregator * does not override. */ public static final long DEFAULT_WRITE_CAPACITY = 10L; private final Log LOG = LogFactory.getLog(DynamoDataStore.class); private String environment, tableName, streamName; private AggregatorType aggregatorType; private boolean reportedStructure = false; private AmazonDynamoDB dynamoClient; private AmazonKinesisClient kinesisClient; private long readCapacity = DEFAULT_READ_CAPACITY; private long writeCapacity = DEFAULT_WRITE_CAPACITY; private String labelAttribute, dateAttribute; private boolean online = false; private Region region = Region.getRegion(Regions.US_EAST_1); public static final String SCATTER_PREFIX_ATTRIBUTE = "scatterPrefix"; public static final int SCATTER_WIDTH = 99; private final Random r = new Random(); private DynamoQueryEngine queryEngine; public DynamoDataStore(AmazonDynamoDB dynamoClient, AmazonKinesisClient kinesisClient, AggregatorType aggregatorType, String streamName, String tableName, String labelAttribute, String dateAttribute) { this.dynamoClient = dynamoClient; this.kinesisClient = kinesisClient; this.aggregatorType = aggregatorType; this.streamName = streamName; this.tableName = tableName; this.labelAttribute = labelAttribute; this.dateAttribute = dateAttribute; } public DynamoDataStore(AWSCredentialsProvider credentials, AggregatorType aggregatorType, String streamName, String tableName, String labelAttribute, String dateAttribute) { this(new AmazonDynamoDBAsyncClient(credentials), new AmazonKinesisClient(credentials), aggregatorType, streamName, tableName, labelAttribute, dateAttribute); } @Override public void initialise() throws Exception { if (!this.online) { if (this.region != null) { this.dynamoClient.setRegion(this.region); if (this.streamName != null) { this.kinesisClient.setRegion(this.region); } } initAggTable(this.labelAttribute, this.dateAttribute, this.readCapacity, this.writeCapacity); this.queryEngine = new DynamoQueryEngine(this.dynamoClient, this.tableName, this.labelAttribute, this.dateAttribute); this.online = true; } } @Override public Map> write( Map data) throws Exception { UpdateItemRequest req = null; UpdateItemResult result; Map updatedValues; Map> updatedData = new HashMap<>(); int conditionals = 0; if (data != null && data.keySet().size() > 0) { LOG.debug(String.format("Flushing %s Cache Updates", data.size())); // go through all pending updates and write down increments to event // counts and SUM operations first, then do other types of // calculations which need conditional updates after for (final UpdateKey key1 : data.keySet()) { // initialise the map of all updates made for final value // processing if (!updatedData.containsKey(key1)) { updatedValues = new HashMap<>(); } else { updatedValues = updatedData.get(key1); } Map updates = new HashMap<>(); updates.put( SCATTER_PREFIX_ATTRIBUTE, new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue( new AttributeValue().withN("" + r.nextInt(SCATTER_WIDTH)))); // add the event count update to the list of updates to be made updates.put( StreamAggregator.EVENT_COUNT, new AttributeValueUpdate().withAction(AttributeAction.ADD).withValue( new AttributeValue().withN("" + data.get(key1).getAggregateCount()))); // add the time horizon type to the item updates.put( StreamAggregator.TIME_HORIZON_ATTR, new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue( new AttributeValue().withS(key1.getTimeHorizon().getAbbrev()))); // add last update time and sequence updates.put( StreamAggregator.LAST_WRITE_SEQ, new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue( new AttributeValue().withS(data.get(key1).getLastWriteSeq()))); updates.put( StreamAggregator.LAST_WRITE_TIME, new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue( new AttributeValue().withS(StreamAggregator.dateFormatter.format(new Date( data.get(key1).getLastWriteTime()))))); if (this.aggregatorType.equals(AggregatorType.SUM)) { for (final String attribute : data.get(key1).getSummaryValues().keySet()) { final AggregateAttributeModification update = data.get(key1).getSummaryValues().get( attribute); if (!update.getCalculationApplied().getSummaryUpdateMethod().equals( DynamoSummaryUpdateMethod.CONDITIONAL)) { String setAttributeName = StreamAggregatorUtils.methodToColumn(attribute); updates.put( setAttributeName, new AttributeValueUpdate().withAction( update.getCalculationApplied().getSummaryUpdateMethod().getAction()).withValue( new AttributeValue().withN("" + update.getFinalValue()))); // add a stub entry so that we can extract the // updated value from the resultset updatedValues.put(setAttributeName, new AggregateAttributeModification( update.getAttributeName(), update.getOriginatingValueName(), null, update.getCalculationApplied())); } } } // do the update to all sum and count attributes as well // as the last write sequence and time - this gives us a key to // write other calculations onto req = new UpdateItemRequest().withTableName(tableName).withKey( StreamAggregatorUtils.getTableKey(key1)).withAttributeUpdates(updates).withReturnValues( ReturnValue.UPDATED_NEW); result = DynamoUtils.updateWithRetries(dynamoClient, req); // add the event count to the modifications made updatedValues.put( StreamAggregator.EVENT_COUNT, new AggregateAttributeModification(StreamAggregator.EVENT_COUNT, StreamAggregator.EVENT_COUNT, Double.parseDouble(result.getAttributes().get( StreamAggregator.EVENT_COUNT).getN()), SummaryCalculation.SUM)); // extract all updated values processed by the previous update for (String attribute : updatedValues.keySet()) { updatedValues.put( attribute, new AggregateAttributeModification( updatedValues.get(attribute).getAttributeName(), updatedValues.get(attribute).getOriginatingValueName(), Double.parseDouble(result.getAttributes().get(attribute).getN()), updatedValues.get(attribute).getCalculationApplied(), updatedValues.get(attribute).getWritesSoFar() + 1)); } // add all the updates for this key updatedData.put(key1, updatedValues); // log the structure of the table once, so the customer can // retrieve it directly if (!reportedStructure) { LOG.info(getTableStructure()); reportedStructure = true; } } // now process all non summing calculations which are conditional // and // require that the table keys already exist if (this.aggregatorType.equals(AggregatorType.SUM)) { for (final UpdateKey key2 : data.keySet()) { updatedValues = updatedData.get(key2); // we perform a single update for all SUM operations and the // count, last write sequence and time, and a // separate conditional update for every instance of MIN or // MAX // calculations as these must be conditionally applied to be // correct for (final String attribute : data.get(key2).getSummaryValues().keySet()) { final AggregateAttributeModification update = data.get(key2).getSummaryValues().get( attribute); if (update.getCalculationApplied().getSummaryUpdateMethod().equals( DynamoSummaryUpdateMethod.CONDITIONAL)) { conditionals++; result = updateConditionalValue(dynamoClient, tableName, key2, attribute, update); // if the update was made by this conditional // update, then add its items to the update set Double finalValue = null; int increment = update.getWritesSoFar(); if (result != null && result.getAttributes() != null) { finalValue = Double.parseDouble(result.getAttributes().get( attribute).getN()); increment++; } updatedValues.put( attribute, new AggregateAttributeModification(update.getAttributeName(), update.getOriginatingValueName(), finalValue, update.getCalculationApplied(), increment)); } } // add the conditional update items into the overall update // set updatedData.put(key2, updatedValues); } LOG.debug(String.format("Processed %s Conditional Updates", conditionals)); } } return updatedData; } public UpdateItemResult updateConditionalValue(final AmazonDynamoDB dynamoClient, final String tableName, final UpdateKey key, final String attribute, final AggregateAttributeModification update) throws Exception { Map updateKey = StreamAggregatorUtils.getTableKey(key); UpdateItemResult result; final ReturnValue returnValue = ReturnValue.UPDATED_NEW; final String setAttribute = StreamAggregatorUtils.methodToColumn(attribute); // create the update that we want to write final Map thisCalcUpdate = new HashMap() { { put(setAttribute, new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue( new AttributeValue().withN("" + update.getFinalValue()))); } }; // create the request UpdateItemRequest req = new UpdateItemRequest().withTableName(tableName).withKey(updateKey).withReturnValues( returnValue).withAttributeUpdates(thisCalcUpdate); Map expected = new HashMap<>(); final SummaryCalculation calc = update.getCalculationApplied(); // try an update to PUT the value if NOT EXISTS, to establish if we // are the first writer for this key expected = new HashMap() { { put(setAttribute, new ExpectedAttributeValue().withExists(false)); } }; req.setExpected(expected); try { result = DynamoUtils.updateWithRetries(dynamoClient, req); // yay - we were the first writer, so our value was written return result; } catch (ConditionalCheckFailedException e1) { // set the expected to the comparison contained in the update // calculation expected.clear(); expected.put( setAttribute, new ExpectedAttributeValue().withComparisonOperator( calc.getDynamoComparisonOperator()).withValue( new AttributeValue().withN("" + update.getFinalValue()))); req.setExpected(expected); // do the conditional update on the summary // calculation. this may result in no update being // applied because the new value is greater than the // current minimum for MIN, or less than the current // maximum for MAX. try { result = DynamoUtils.updateWithRetries(dynamoClient, req); return result; } catch (ConditionalCheckFailedException e2) { // no worries - we just weren't the min or max! return null; } } } /** * Method which examines an table which backs an Aggregator, and returns a * string value which represents the list of attributes in the table. This * method assumes that all elements in an aggregate table are the same. * * @param dynamoClient Dynamo DB Client to use for connection to Dynamo DB. * @param dynamoTable The Table to get the structure of. * @return A String representation of the attribute names in the table. * @throws Exception */ public String getTableStructure() throws Exception { List columns = getDictionaryEntry(); StringBuffer sb = new StringBuffer(); for (String s : columns) { sb.append(String.format("%s,", s)); } return String.format("Dynamo Table %s (%s)", sb.toString().substring(0, sb.length() - 1), this.tableName); } /** * Generate a list of attribute names found in the Aggregator's dynamo * table. Assumes that all Items in the Aggregator table are of the same * structure. * * @param dynamoClient Dynamo DB Client to use for connection to Dynamo DB. * @param dynamoTable The Dynamo Table for the Aggregator * @return A list of attribute names from the Dynamo table * @throws Exception */ protected List getDictionaryEntry() throws Exception { // get a list of all columns in the table, with keys first List columns = new ArrayList<>(); List keys = dynamoClient.describeTable(this.tableName).getTable().getKeySchema(); for (KeySchemaElement key : keys) { columns.add(key.getAttributeName()); } ScanResult scan = dynamoClient.scan(new ScanRequest().withTableName(this.tableName).withSelect( Select.ALL_ATTRIBUTES).withLimit(1)); List> scannedItems = scan.getItems(); for (Map map : scannedItems) { for (String s : map.keySet()) { if (!columns.contains(s)) columns.add(s); } } return columns; } /* * Configure the aggregate table with the indicated capacity, including * global secondary index on last_write_seq for facilitating aggregate * cleanup */ public void initAggTable(final String keyColumn, final String dateColumnName, final long readCapacity, final long writeCapacity) throws Exception { final String setDateColumn = dateColumnName == null ? StreamAggregator.DEFAULT_DATE_VALUE : dateColumnName; long setReadCapacity = readCapacity == -1 ? DEFAULT_READ_CAPACITY : readCapacity; long setWriteCapacity = writeCapacity == -1 ? DEFAULT_WRITE_CAPACITY : writeCapacity; // we have to add this attribute list so that we can project the key // into the GSI List attributes = new ArrayList() { { add(new AttributeDefinition().withAttributeName(keyColumn).withAttributeType("S")); add(new AttributeDefinition().withAttributeName(setDateColumn).withAttributeType( "S")); } }; Collection gsi = new ArrayList<>(); // Global Secondary Index for accessing the table by date item gsi.add(new GlobalSecondaryIndex().withIndexName( StreamAggregatorUtils.getDateDimensionIndexName(tableName, setDateColumn)).withKeySchema( new KeySchemaElement().withAttributeName(SCATTER_PREFIX_ATTRIBUTE).withKeyType( KeyType.HASH), new KeySchemaElement().withAttributeName(setDateColumn).withKeyType(KeyType.RANGE)).withProjection( new Projection().withProjectionType(ProjectionType.KEYS_ONLY)).withProvisionedThroughput( new ProvisionedThroughput().withReadCapacityUnits(setReadCapacity).withWriteCapacityUnits( setWriteCapacity))); attributes.add(new AttributeDefinition().withAttributeName(SCATTER_PREFIX_ATTRIBUTE).withAttributeType( "N")); // table is hash/range on value and date List key = new ArrayList() { { add(new KeySchemaElement().withAttributeName(keyColumn).withKeyType(KeyType.HASH)); add(new KeySchemaElement().withAttributeName(setDateColumn).withKeyType( KeyType.RANGE)); } }; // initialise the table DynamoUtils.initTable(this.dynamoClient, this.tableName, setReadCapacity, setWriteCapacity, attributes, key, gsi); } public long refreshForceCheckpointThresholds() { LOG.info("Refreshing Provisioned Throughput settings"); // get the current provisioned capacity this.writeCapacity = getProvisionedWrites(); // get the current number of provisioned kinesis shards for the stream, // if we know what stream we are working against int currentShardCount = 1; if (this.streamName != null) { try { currentShardCount = StreamAggregatorUtils.getShardCount(this.kinesisClient, this.streamName); return (4 * (60 * this.writeCapacity)) / currentShardCount; } catch (Exception e) { LOG.warn(String.format( "Unable to get Shard Count for Stream %s. Using Overly Optimistic Throughput Settings", this.streamName)); } } return (4 * (60 * this.writeCapacity)); } private long getProvisionedWrites() { return dynamoClient.describeTable(this.tableName).getTable().getProvisionedThroughput().getWriteCapacityUnits(); } public DynamoQueryEngine queryEngine() { return this.queryEngine; } public Region getRegion() { return this.region; } @Override public void setRegion(Region region) { this.region = region; } public DynamoDataStore withStorageCapacity(long readCapacity, long writeCapacity) { if (readCapacity > 0l) this.readCapacity = readCapacity; if (writeCapacity > 0l) this.writeCapacity = writeCapacity; return this; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/datastore/DynamoQueryEngine.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.datastore; import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.AmazonServiceException; import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; import com.amazonaws.services.dynamodbv2.model.AttributeValue; import com.amazonaws.services.dynamodbv2.model.BatchGetItemRequest; import com.amazonaws.services.dynamodbv2.model.BatchGetItemResult; import com.amazonaws.services.dynamodbv2.model.ComparisonOperator; import com.amazonaws.services.dynamodbv2.model.Condition; import com.amazonaws.services.dynamodbv2.model.GetItemRequest; import com.amazonaws.services.dynamodbv2.model.KeysAndAttributes; import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputExceededException; import com.amazonaws.services.dynamodbv2.model.QueryRequest; import com.amazonaws.services.dynamodbv2.model.QueryResult; import com.amazonaws.services.dynamodbv2.model.ScanRequest; import com.amazonaws.services.dynamodbv2.model.ScanResult; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils; import com.amazonaws.services.kinesis.aggregators.TableKeyStructure; public class DynamoQueryEngine { private final Log LOG = LogFactory.getLog(DynamoQueryEngine.class); private AmazonDynamoDB dynamoClient; private String tableName, labelAttribute, dateAttribute; protected final int BACKOFF_MILLIS = 10; public DynamoQueryEngine(AmazonDynamoDB dynamoClient, String tableName, String labelAttribute, String dateAttribute) { this.dynamoClient = dynamoClient; this.tableName = tableName; this.labelAttribute = labelAttribute; this.dateAttribute = dateAttribute; } public enum QueryKeyScope { HashKey, HashAndRangeKey; } public List parallelQueryKeys(QueryKeyScope scope, int threads) throws Exception { List workers = new ArrayList<>(); Collection> workerStatus = new ArrayList<>(); List output = new ArrayList<>(); int totalResultsProcessed = 0; // set up the executor thread pool ExecutorService executor = Executors.newFixedThreadPool(threads); // create workers for each segment that we need to do queries against for (int i = 0; i < threads; i++) { ParallelKeyScanWorker worker = new ParallelKeyScanWorker( this.tableName, i, threads, scope, this.labelAttribute, this.dateAttribute); workers.add(worker); workerStatus.add(executor.submit(worker)); } for (Future f : workerStatus) { f.get(); } executor.shutdown(); for (ParallelKeyScanWorker w : workers) { // throw any exceptions the worker incurred w.throwExceptions(); if (w.getResultCount() > 0) { output.addAll(w.getOutput()); } totalResultsProcessed += w.getResultsProcessed(); } LOG.info(String.format( "Key Extraction Complete - Processed %s Key Items", totalResultsProcessed)); return output; } public List> queryByKey(String label, Date dateValue, ComparisonOperator operator) throws Exception { if (dateValue != null && !operator.equals(ComparisonOperator.EQ)) { String dateAsString = StreamAggregator.dateFormatter .format(dateValue); LOG.info(String.format("Issuing Hash/Range Query for %s - %s", label, dateAsString)); // range query Map keyConditions = new HashMap<>(); // hash key Condition c = new Condition().withAttributeValueList( new AttributeValue().withS(label)).withComparisonOperator( ComparisonOperator.EQ); keyConditions.put(this.labelAttribute, c); // range key c = new Condition().withAttributeValueList( new AttributeValue().withS(dateAsString)) .withComparisonOperator(operator); keyConditions.put(this.dateAttribute, c); QueryRequest req = new QueryRequest().withTableName(this.tableName) .withKeyConditions(keyConditions); return DynamoUtils .queryUntilDone(dynamoClient, req, BACKOFF_MILLIS); } else { if (dateValue == null) { LOG.info(String.format("Issuing Hash Key Only Query for %s", label)); // hash key only query Map keyConditions = new HashMap<>(); Condition c = new Condition().withAttributeValueList( new AttributeValue().withS(label)) .withComparisonOperator(ComparisonOperator.EQ); keyConditions.put(this.labelAttribute, c); QueryRequest req = new QueryRequest().withTableName( this.tableName).withKeyConditions(keyConditions); return DynamoUtils.queryUntilDone(dynamoClient, req, BACKOFF_MILLIS); } else { String dateAsString = StreamAggregator.dateFormatter .format(dateValue); LOG.info(String.format( "Performing exact Hash/Range Lookup for %s - %s", label, dateAsString)); // exact key lookup List> output = new ArrayList<>(); Map keyMap = new HashMap<>(); keyMap.put(this.labelAttribute, new AttributeValue().withS(label)); keyMap.put(this.dateAttribute, new AttributeValue().withS(dateAsString)); GetItemRequest req = new GetItemRequest().withTableName( this.tableName).withKey(keyMap); output.add(this.dynamoClient.getItem(req).getItem()); return output; } } } private class ParallelKeyScanWorker implements Runnable { List output = new ArrayList<>(); private String tableName, hashKey, rangeKey; private QueryKeyScope scope; private int workerInstance, threads; private int resultsProcessed = 0; private Exception exception; public ParallelKeyScanWorker(String tableName, int workerInstance, int threads, QueryKeyScope scope, String hashKey, String rangeKey) { this.tableName = tableName; this.workerInstance = workerInstance; this.hashKey = hashKey; this.rangeKey = rangeKey; this.threads = threads; this.scope = scope; } public int getResultCount() { if (this.output == null) { return 0; } else { return this.output.size(); } } public int getResultsProcessed() { return this.resultsProcessed; } public void throwExceptions() throws Exception { if (this.exception != null) { throw this.exception; } } @Override public void run() { ScanRequest scanRequest = new ScanRequest() .withTableName(this.tableName) .withAttributesToGet(this.hashKey) .withSegment(this.workerInstance) .withTotalSegments(threads); Map> deduplicated = new HashMap<>(); Set rangeValues = null; Map lastKeyEvaluated = null; int scanAttempts = 0; int limit = -1; boolean returnedResults = false; String lastLabel = null; int uniqueLabels = 0; do { ScanResult result = null; // set query limits, to optimise for skip scan or for hash/range // query with no limit if (this.scope.equals(QueryKeyScope.HashKey)) { if (uniqueLabels > 0 && uniqueLabels == resultsProcessed) { // remove the query limit if every row being returned is // unique limit = -1; } else { // set a limit of twice the number of uniques, so we can // get a larger result set as we go if (uniqueLabels == 0) { limit = 100; } else { limit = uniqueLabels * 2; } // reset the unique labels so it doesn't grow without // limit uniqueLabels = 0; } } else { scanRequest.withAttributesToGet(this.rangeKey); } do { try { // set the limit if we have one if (limit != -1) { scanRequest.withLimit(limit); } result = dynamoClient.scan(scanRequest .withExclusiveStartKey(lastKeyEvaluated)); if (result.getItems().size() > 0) { returnedResults = true; } else { returnedResults = false; } } catch (ProvisionedThroughputExceededException e) { LOG.warn(String .format("Provisioned Throughput Exceeded - Retry Attempt %s", scanAttempts)); // back off try { Thread.sleep(2 ^ scanAttempts * BACKOFF_MILLIS); } catch (InterruptedException interrupted) { this.exception = interrupted; return; } scanAttempts++; } } while (scanAttempts < 10 && result == null); if (result == null) { this.exception = new Exception(String.format( "Unable to execute Scan after %s attempts", scanAttempts)); return; } // process the results, creating a deduplicated map/set of // hash/range keys String labelValue = null; if (returnedResults) { for (Map map : result.getItems()) { resultsProcessed++; labelValue = map.get(this.hashKey).getS(); // only enter the label value into the hash once if (scope.equals(QueryKeyScope.HashKey)) { if (!labelValue.equals(lastLabel) || lastLabel == null) { deduplicated.put(labelValue, null); lastLabel = labelValue; uniqueLabels++; } } else { if (deduplicated.containsKey(labelValue)) { rangeValues = deduplicated.get(labelValue); } else { rangeValues = new HashSet(); } rangeValues.add(map.get(this.rangeKey).getS()); deduplicated.put(labelValue, rangeValues); } } // set the last evaluated key. if we have processed a bunch // of data and are not at the end of the result set, then // we'll force a skip forward on date, to eliminate // continued processing of high cardinality hash values if (this.scope.equals(QueryKeyScope.HashKey) && result.getLastEvaluatedKey() != null) { // skip scan lastKeyEvaluated = new HashMap<>(); lastKeyEvaluated.put(this.hashKey, new AttributeValue().withS(labelValue)); lastKeyEvaluated.put(this.rangeKey, new AttributeValue() .withS("4000-01-01 00:00:00")); } else { lastKeyEvaluated = result.getLastEvaluatedKey(); } } else { lastKeyEvaluated = null; } } while (lastKeyEvaluated != null); if (this.scope.equals(QueryKeyScope.HashKey)) { LOG.debug(String.format("Worker %s extracted %s results", this.workerInstance, deduplicated.size())); } else { LOG.debug(String .format("Worker %s deduplicated %s results, creating distinct set of %s keys", this.workerInstance, resultsProcessed, deduplicated.size())); } this.output = new ArrayList<>(); if (deduplicated.size() > 0) { for (String s : deduplicated.keySet()) { TableKeyStructure t = new TableKeyStructure(this.hashKey, s, this.rangeKey); if (scope.equals(QueryKeyScope.HashAndRangeKey)) { for (String rangeValue : deduplicated.get(s)) { t.withDateValue(rangeValue); } } output.add(t); } } } public List getOutput() { return this.output; } } private class ParallelDateQueryWorker implements Runnable { private int start, range; private String tableName, indexName, labelAttribute, dateAttribute; private Map conditions; private Exception exception; private Map> resultKeys = new HashMap<>(); public void throwException() throws Exception { if (this.exception != null) throw this.exception; } public ParallelDateQueryWorker(String tableName, String indexName, int start, int range, Map conditions, String labelAttribute, String dateAttribute) { this.tableName = tableName; this.indexName = indexName; this.start = start; this.range = range; this.conditions = conditions; this.labelAttribute = labelAttribute; this.dateAttribute = dateAttribute; } @Override public void run() { List> results = new ArrayList<>(); for (int i = this.start; i < this.start + this.range; i++) { Condition c = new Condition().withComparisonOperator( ComparisonOperator.EQ).withAttributeValueList( new AttributeValue().withN("" + i)); this.conditions .put(DynamoDataStore.SCATTER_PREFIX_ATTRIBUTE, c); QueryRequest req = new QueryRequest() .withIndexName(this.indexName) .withTableName(this.tableName) .withKeyConditions(this.conditions); Map lastKeyEvaluated = null; do { int queryAttempts = 0; QueryResult result = null; do { try { result = dynamoClient.query(req) .withLastEvaluatedKey(lastKeyEvaluated); results.addAll(result.getItems()); } catch (ProvisionedThroughputExceededException e) { LOG.warn(String .format("Provisioned Throughput Exceeded - Retry Attempt %s", queryAttempts)); try { Thread.sleep(2 ^ queryAttempts * BACKOFF_MILLIS); } catch (InterruptedException interrupted) { this.exception = interrupted; return; } queryAttempts++; } } while (queryAttempts < 10 && result == null); if (result == null) { this.exception = new Exception(String.format( "Unable to execute Query after %s attempts", queryAttempts)); return; } lastKeyEvaluated = result.getLastEvaluatedKey(); } while (lastKeyEvaluated != null); // pivot the results into a list of label values and set of date // values String labelValue = null; String dateValue = null; Set values; for (Map map : results) { // process each attribute for (String s : map.keySet()) { // grab the label and date values if (s.equals(this.labelAttribute)) { labelValue = map.get(s).getS(); } else if (s.equals(this.dateAttribute)) { dateValue = map.get(s).getS(); } } if (labelValue != null && dateValue != null) { // get the current set of date values for the label, or // create a new one if (!resultKeys.containsKey(labelValue)) { values = new HashSet<>(); } else { values = resultKeys.get(labelValue); } // add the current date value to the set of all date // values // fore label values.add(dateValue); // write back the map of label to date values resultKeys.put(labelValue, values); } } } } public Map> getResultKeys() { return this.resultKeys; } } private KeysAndAttributes convertResultKeys( Map> resultKeys) { KeysAndAttributes keys = new KeysAndAttributes(); for (final String s : resultKeys.keySet()) { for (final String value : resultKeys.get(s)) { keys.withKeys(new HashMap() { { put(labelAttribute, new AttributeValue().withS(s)); put(dateAttribute, new AttributeValue().withS(value)); } }); } } return keys; } private List> batchGetDataByKeys( final String tableName, final KeysAndAttributes keys) { Map requestMap = new HashMap<>(); keys.setConsistentRead(true); requestMap.put(tableName, keys); BatchGetItemResult result = null; try { result = dynamoClient.batchGetItem(new BatchGetItemRequest( requestMap)); } catch (AmazonServiceException e) { LOG.error(e); throw e; } return result.getResponses().get(this.tableName); } @SuppressWarnings("unchecked") public List> parallelQueryDate( String onAttribute, Map conditions, int threads) throws Exception { // figure out the range of scatter prefix values we are going to assign // to each thread int range = (DynamoDataStore.SCATTER_WIDTH / threads) + 1; List workers = new ArrayList<>(); Collection> workerStatus = new ArrayList<>(); List> output = new ArrayList<>(); // set up the executor thread pool ExecutorService executor = Executors.newFixedThreadPool(threads); // determine which index we should work with String indexName; if (onAttribute.equals(StreamAggregator.LAST_WRITE_SEQ)) { indexName = StreamAggregatorUtils .getLastWriteSeqIndexName(this.tableName); } else { indexName = StreamAggregatorUtils.getDateDimensionIndexName( this.tableName, onAttribute); } StringBuilder conditionString = new StringBuilder(); for (String s : conditions.keySet()) { conditionString.append(String.format("%s %s %s,", s, conditions .get(s).getComparisonOperator(), conditions.get(s) .getAttributeValueList().get(0))); } LOG.info(String.format( "Querying %s with %s Threads on %s (Conditions: %s)", indexName, threads, onAttribute, conditionString.length() > 0 ? conditionString.substring(0, conditionString.length() - 1).toString() : "None")); // create workers for each segment that we need to do queries against for (int i = 0; i < DynamoDataStore.SCATTER_WIDTH; i++) { if (i == 0 || i % range == 0) { ParallelDateQueryWorker worker = new ParallelDateQueryWorker( this.tableName, indexName, i, range, conditions, this.labelAttribute, this.dateAttribute); workers.add(worker); workerStatus.add(executor.submit(worker)); } } for (Future f : workerStatus) { f.get(); } executor.shutdown(); // collect the results from the workers int outputCounter = 0; for (ParallelDateQueryWorker w : workers) { // throw any exceptions that the worker handled w.throwException(); // generate a set of KeysAndAttributes from the deduplicated output // map of table keys Map> workerKeys = w.getResultKeys(); KeysAndAttributes k = convertResultKeys(workerKeys); // break the KeysAndAttributes up into batches of 25 and // query for them KeysAndAttributes queryKeys = new KeysAndAttributes(); if (k != null && k.getKeys() != null) { for (Map key : k.getKeys()) { queryKeys.withKeys(key); outputCounter++; if (outputCounter % 25 == 0) { output.addAll(batchGetDataByKeys(this.tableName, queryKeys)); queryKeys = new KeysAndAttributes(); } } // one final query for anything < mod(25)=0 if (queryKeys.getKeys() != null && queryKeys.getKeys().size() > 0) { output.addAll(batchGetDataByKeys(this.tableName, queryKeys)); } } } return output; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/datastore/DynamoUtils.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.datastore; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.regions.Region; import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; import com.amazonaws.services.dynamodbv2.AmazonDynamoDBAsyncClient; import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient; import com.amazonaws.services.dynamodbv2.model.AttributeDefinition; import com.amazonaws.services.dynamodbv2.model.AttributeValue; import com.amazonaws.services.dynamodbv2.model.ConditionalCheckFailedException; import com.amazonaws.services.dynamodbv2.model.CreateTableRequest; import com.amazonaws.services.dynamodbv2.model.CreateTableResult; import com.amazonaws.services.dynamodbv2.model.DescribeTableResult; import com.amazonaws.services.dynamodbv2.model.GlobalSecondaryIndex; import com.amazonaws.services.dynamodbv2.model.KeySchemaElement; import com.amazonaws.services.dynamodbv2.model.KeyType; import com.amazonaws.services.dynamodbv2.model.LimitExceededException; import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput; import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputExceededException; import com.amazonaws.services.dynamodbv2.model.QueryRequest; import com.amazonaws.services.dynamodbv2.model.QueryResult; import com.amazonaws.services.dynamodbv2.model.ResourceInUseException; import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException; import com.amazonaws.services.dynamodbv2.model.ScanRequest; import com.amazonaws.services.dynamodbv2.model.ScanResult; import com.amazonaws.services.dynamodbv2.model.Select; import com.amazonaws.services.dynamodbv2.model.TableStatus; import com.amazonaws.services.dynamodbv2.model.UpdateItemRequest; import com.amazonaws.services.dynamodbv2.model.UpdateItemResult; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; public class DynamoUtils { private static final Log LOG = LogFactory.getLog(DynamoUtils.class); private DynamoUtils() { } /** * Private interface for creating tables which handles any instances of * Throttling of the API * * @param dynamoClient * @param dynamoTable * @return * @throws Exception */ public static CreateTableResult safeCreateTable( final AmazonDynamoDB dynamoClient, final CreateTableRequest createTableRequest) throws Exception { CreateTableResult res = null; final int tryMax = 10; int tries = 0; while (true) { try { res = dynamoClient.createTable(createTableRequest); return res; } catch (LimitExceededException le) { if (tries < tryMax) { // back off for 1 second Thread.sleep(1000); tries++; } else { throw le; } } catch (ResourceInUseException rie) { // someone else is trying to create the table while we are, so // return ok return null; } } } /** * Creates a table in Dynamo DB with the requested read and write capacity, * attributes, key schema and GSI's. This method will block until the table * is Active in Dynamo DB. * * @param dynamoClient * Dynamo DB Client to use for connection to Dynamo DB. * @param dynamoTable * The table name to create in Dynamo DB. * @param readCapacity * The requested amount of read IOPS to be provisioned. * @param writeCapacity * The requested amount of write IOPS to be provisioned. * @param attributes * Attribute Names which must be indicated to create the key * schema and/or GSI's. * @param keySchema * The keys used for the primary key of the table. * @param gsi * List of Global Secondary Indexes to be created on the table * @throws Exception */ public static void initTable(final AmazonDynamoDB dynamoClient, final String dynamoTable, final long readCapacity, final long writeCapacity, List attributes, List keySchema, final Collection gsi) throws Exception { try { DescribeTableResult res = safeDescribeTable(dynamoClient, dynamoTable); if (!res.getTable().getTableStatus().equals("ACTIVE")) { waitForTableActive(dynamoClient, dynamoTable); } } catch (ResourceInUseException r) { waitForTableActive(dynamoClient, dynamoTable); } catch (ResourceNotFoundException e) { LOG.info(String .format("Table %s Not Found - Creating with %s Reads/sec & %s Writes/sec", dynamoTable, readCapacity, writeCapacity)); CreateTableRequest createTableRequest = new CreateTableRequest() .withTableName(dynamoTable) .withProvisionedThroughput( new ProvisionedThroughput().withReadCapacityUnits( readCapacity).withWriteCapacityUnits( writeCapacity)).withKeySchema(keySchema) .withAttributeDefinitions(attributes); if (gsi != null) createTableRequest.withGlobalSecondaryIndexes(gsi); // create the table try { safeCreateTable(dynamoClient, createTableRequest); } catch (Exception ex) { LOG.error(ex); throw e; } // wait for it to go to active state waitForTableActive(dynamoClient, dynamoTable); } } /** * Private interface for describing tables which handles any instances of * Throttling of the API * * @param dynamoClient * @param dynamoTable * @return * @throws Exception */ public static DescribeTableResult safeDescribeTable( final AmazonDynamoDB dynamoClient, final String dynamoTable) throws Exception { DescribeTableResult res = null; final int tryMax = 10; int tries = 0; while (true) { try { res = dynamoClient.describeTable(dynamoTable); return res; } catch (ResourceNotFoundException e) { if (tries < tryMax) { // sleep for a short time as this is potentially an eventual // consistency issue with the table having been created ms // ago Thread.sleep(10); tries++; } else { throw e; } } } } /** * Method which waits for a Dynamo table to enter status 'Active'. * * @param dynamoClient * Dynamo DB Client to use for connection to Dynamo DB. * @param dynamoTable * The Table in Dynamo. * @throws Exception */ public static void waitForTableActive(final AmazonDynamoDB dynamoClient, final String dynamoTable) throws Exception { waitForTableState(dynamoClient, dynamoTable, TableStatus.ACTIVE); } /** * Interface which will block until a dynamo table reaches a specified * state. Also returns immediately if the object doesn't exist * * @param dynamoClient * Dynamo DB Client to use for connection to Dynamo DB. * @param dynamoTable * The table name to check. * @param status * The status to wait for * @throws Exception */ private static void waitForTableState(final AmazonDynamoDB dynamoClient, final String dynamoTable, TableStatus status) throws Exception { DescribeTableResult tableRequest = null; while (true) { try { tableRequest = dynamoClient.describeTable(dynamoTable); if (tableRequest.getTable().getTableStatus() .equals(status.name())) break; Thread.sleep(1000); } catch (InterruptedException e) { return; } } } public static void dropTable(final AmazonDynamoDB dynamoClient, final String dynamoTable) throws Exception { if (dynamoTable != null) { LOG.info(String.format("Dropping Dynamo Table %s", dynamoTable)); try { dynamoClient.deleteTable(dynamoTable); waitForTableState(dynamoClient, dynamoTable, TableStatus.DELETING); } catch (ResourceNotFoundException e) { LOG.info("OK - Table Not Found"); } } } public static void cleanupAggTable(AWSCredentialsProvider credentials, Region region, final String dynamoTable, final String toSeq) throws Exception { final Double deleteBelow = Double.parseDouble(toSeq); // create two clients - one synchronous for the read of all candidate // values, and another for the delete operations final AmazonDynamoDB dynamoClient = new AmazonDynamoDBClient( credentials); if (region != null) dynamoClient.setRegion(region); final AmazonDynamoDBAsyncClient deleteCli = new AmazonDynamoDBAsyncClient( credentials); deleteCli.setRegion(region); Map lastKey = null; Map deleteKey = null; // work out what the key and date column name is String keyColumn = null; String dateColumn = null; List keySchema = dynamoClient .describeTable(dynamoTable).getTable().getKeySchema(); for (KeySchemaElement element : keySchema) { if (element.getKeyType().equals(KeyType.HASH.name())) keyColumn = element.getAttributeName(); if (element.getKeyType().equals(KeyType.RANGE.name())) dateColumn = element.getAttributeName(); } LOG.info(String.format( "Deleting data from %s where %s values are below %s", dynamoTable, StreamAggregator.LAST_WRITE_SEQ, deleteBelow)); int deleteCount = 0; do { // read data from the table ScanRequest scan = new ScanRequest() .withTableName(dynamoTable) .withAttributesToGet(keyColumn, dateColumn, StreamAggregator.LAST_WRITE_SEQ) .withExclusiveStartKey(lastKey); ScanResult results = dynamoClient.scan(scan); // delete everything up to the system provided change number for (Map map : results.getItems()) { deleteKey = new HashMap<>(); deleteKey.put(keyColumn, map.get(keyColumn)); deleteKey.put(dateColumn, map.get(dateColumn)); if (Double.parseDouble(map.get(StreamAggregator.LAST_WRITE_SEQ) .getS()) < deleteBelow) { deleteCli.deleteItem(dynamoTable, deleteKey); deleteCount++; } } lastKey = results.getLastEvaluatedKey(); } while (lastKey != null); LOG.info(String.format( "Operation Complete - %s Records removed from Aggregate Store", deleteCount)); } public static UpdateItemResult updateWithRetries( AmazonDynamoDB dynamoClient, UpdateItemRequest req) throws Exception { final double initialBackoff = 2D; final int updateRetries = 10; final double backoffRatio = 1.2; double backoff = initialBackoff; UpdateItemResult res = null; for (int i = 0; i < updateRetries; i++) { try { res = dynamoClient.updateItem(req); break; } catch (ProvisionedThroughputExceededException ptee) { LOG.warn(String.format( "Exceeded Provisioned Througput - Backing off for %s", backoff)); try { Thread.sleep(new Double(backoff).longValue()); } catch (InterruptedException e) { e.printStackTrace(); } // simple linear backoff backoff = backoff * backoffRatio; } catch (ConditionalCheckFailedException ccfe) { // silently rethrow these exceptions as they are part of the // conditional update logic for MIN/MAX calculations throw ccfe; } catch (Exception e) { LOG.warn(e); throw e; } } if (res == null) { throw new Exception(String.format( "Unable to write after %s retries", updateRetries)); } else { return res; } } /** * Method which examines an table which backs an Aggregator, and returns a * string value which represents the list of attributes in the table. This * method assumes that all elements in an aggregate table are the same. * * @param dynamoClient * Dynamo DB Client to use for connection to Dynamo DB. * @param dynamoTable * The Table to get the structure of. * @return A String representation of the attribute names in the table. * @throws Exception */ public static String getDynamoTableStructure(AmazonDynamoDB dynamoClient, String dynamoTable) throws Exception { List columns = getDictionaryEntry(dynamoClient, dynamoTable); StringBuffer sb = new StringBuffer(); for (String s : columns) { sb.append(String.format("%s,", s)); } return String.format("Dynamo Table %s (%s)", sb.toString().substring(0, sb.length() - 1), dynamoTable); } /** * Generate a list of attribute names found in the Aggregator's dynamo * table. Assumes that all Items in the Aggregator table are of the same * structure. * * @param dynamoClient * Dynamo DB Client to use for connection to Dynamo DB. * @param dynamoTable * The Dynamo Table for the Aggregator * @return A list of attribute names from the Dynamo table * @throws Exception */ public static List getDictionaryEntry( final AmazonDynamoDB dynamoClient, final String dynamoTable) throws Exception { // get a list of all columns in the table, with keys first List columns = new ArrayList<>(); List keys = dynamoClient.describeTable(dynamoTable) .getTable().getKeySchema(); for (KeySchemaElement key : keys) { columns.add(key.getAttributeName()); } ScanResult scan = dynamoClient.scan(new ScanRequest() .withTableName(dynamoTable).withSelect(Select.ALL_ATTRIBUTES) .withLimit(1)); List> scannedItems = scan.getItems(); for (Map map : scannedItems) { for (String s : map.keySet()) { if (!columns.contains(s)) columns.add(s); } } return columns; } public static List> queryUntilDone( AmazonDynamoDB dynamoClient, QueryRequest qr, int backoffMillis) throws Exception { List> output = new ArrayList<>(); Map lastKeyEvaluated = null; do { int queryAttempts = 0; QueryResult result = null; do { try { result = dynamoClient.query(qr).withLastEvaluatedKey( lastKeyEvaluated); output.addAll(result.getItems()); } catch (ProvisionedThroughputExceededException e) { LOG.warn(String .format("Provisioned Throughput Exceeded - Retry Attempt %s", queryAttempts)); Thread.sleep(2 ^ queryAttempts * backoffMillis); queryAttempts++; } } while (queryAttempts < 10 && result == null); if (result == null) { throw new Exception(String.format( "Unable to execute Query after %s attempts", queryAttempts)); } lastKeyEvaluated = result.getLastEvaluatedKey(); } while (lastKeyEvaluated != null); return output; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/datastore/IDataStore.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.datastore; import java.util.Map; import com.amazonaws.regions.Region; import com.amazonaws.services.kinesis.aggregators.cache.UpdateKey; import com.amazonaws.services.kinesis.aggregators.cache.UpdateValue; /** * Interface which is used to allow the in memory cached aggregates to be saved * to a persistent store */ public interface IDataStore { /** * Write a set of Update Key/Value pairs back to the backing store * * @param data The Input Dataset to be updated * @return A data structure which maps a set of * AggregateAttributeModifications back to the values that were * affected on the underlying datastore, by UpdateKey * @throws Exception */ public Map> write( Map data) throws Exception; /** * Method called on creation of the IDataStore * * @throws Exception */ public void initialise() throws Exception; /** * Method which will be periodically invoked to allow the IDataStore to * refresh tolerated limits for how often write() should be called * * @return * @throws Exception */ public long refreshForceCheckpointThresholds() throws Exception; /** * Method called to set the region for the IDataStore * * @param region */ public void setRegion(Region region); } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/exception/ClassNotAnnotatedException.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.exception; @SuppressWarnings("serial") public class ClassNotAnnotatedException extends Exception { private String message; public ClassNotAnnotatedException(Exception e) { super(e); } public ClassNotAnnotatedException(String message, Exception e) { super(message, e); } public ClassNotAnnotatedException(String message) { super(message); } @Override public String getMessage() { return this.message; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/exception/InvalidConfigurationException.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.exception; public class InvalidConfigurationException extends Exception { public InvalidConfigurationException() { } public InvalidConfigurationException(String message) { super(message); // TODO Auto-generated constructor stub } public InvalidConfigurationException(Throwable cause) { super(cause); } public InvalidConfigurationException(String message, Throwable cause) { super(message, cause); } public InvalidConfigurationException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { super(message, cause, enableSuppression, writableStackTrace); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/exception/SerializationException.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.exception; @SuppressWarnings("serial") public class SerializationException extends Exception { private String message; public SerializationException(String message) { super(message); } public SerializationException(Exception e) { super(e); } public SerializationException(String message, Exception e) { super(message, e); } @Override public String getMessage() { return this.message; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/exception/UnsupportedCalculationException.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.exception; import com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation; /** * Exception thrown when a summary value is indicated that is not one of * {@link com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation} */ public class UnsupportedCalculationException extends Exception { private String message; public UnsupportedCalculationException(String message) { super(); this.message = message; } @Override public String getMessage() { return this.message; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/factory/CSVAggregatorFactory.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.factory; import java.util.Arrays; import java.util.List; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.TimeHorizon; import com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation; import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration; import com.amazonaws.services.kinesis.io.CsvDataExtractor; import com.amazonaws.services.kinesis.io.StringDataExtractor; /** * Factory Class used for generating Aggregators which support CSV data on the * Kinesis Stream. */ public class CSVAggregatorFactory { private CSVAggregatorFactory() { } /** * Factory Method to generate a new Aggregator for CSV Data. * * @param streamName The name of the Stream to aggregate against. * @param appName The application name to associate with the aggregator. * @param config The Kinesis Configuration used for the containing worker. * @param namespace The namespace to associate with the aggregated data. * @param timeHorizon The time horizons on which to aggregate data. * @param aggregatorType The type of aggregator to create. * @param delimiter The character delimiter for data on the stream. * @param labelIndicies The position of the field in the stream data which * should be used to aggregate data. * @param dateIndex The position of the field which includes a date item * used to aggregate data by the timeHorizon. Values can be in String * format if dateFormat is supplied, or in epoch seconds. * @param dateFormat The format of the date item, if provided as a String * @param summaryIndicies The list of field positions, or expressions using * a {@link SummaryCalculation} against the field positions. For * example, simple summaries might have a list of '0,1,2' or when * expressions are used, a list of 'min(0),sum(1),max(2)'. * @return Returns a new CSV Aggregator. * @throws Exception */ public static final StreamAggregator newInstance(String streamName, String appName, KinesisClientLibConfiguration config, String namespace, TimeHorizon timeHorizon, AggregatorType aggregatorType, String delimiter, List labelIndicies, String labelAttributeAlias, int dateIndex, String dateFormat, String dateAlias, List summaryIndicies) throws Exception { return newInstance(streamName, appName, config, namespace, Arrays.asList(new TimeHorizon[] { timeHorizon }), aggregatorType, delimiter, labelIndicies, labelAttributeAlias, dateIndex, dateFormat, dateAlias, summaryIndicies); } public static final StreamAggregator newInstance(String streamName, String appName, KinesisClientLibConfiguration config, String namespace, List timeHorizons, AggregatorType aggregatorType, String delimiter, List labelIndicies, String labelAttributeAlias, int dateIndex, String dateFormat, String dateAlias, List summaryIndicies) throws Exception { StringDataExtractor dataExtractor = new CsvDataExtractor(labelIndicies).withDelimiter( delimiter).withDateValueIndex(dateIndex).withDateFormat(dateFormat).withSummaryIndicies( summaryIndicies); dataExtractor.setAggregatorType(aggregatorType); if (labelAttributeAlias != null && !labelAttributeAlias.equals("")) { dataExtractor.withLabelAttributeAlias(labelAttributeAlias); } if (dateAlias != null && !dateAlias.equals("")) { dataExtractor.withDateAttributeAlias(dateAlias); } return new StreamAggregator(streamName, appName, namespace, config, dataExtractor).withTimeHorizon( timeHorizons).withAggregatorType(aggregatorType); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/factory/ExternallyConfiguredAggregatorFactory.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.factory; import java.util.ArrayList; import java.util.List; import com.amazonaws.services.kinesis.aggregators.AggregatorGroup; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.configuration.ExternalConfigurationModel; import com.amazonaws.services.kinesis.aggregators.datastore.IDataStore; import com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException; import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration; import com.amazonaws.services.kinesis.io.CsvDataExtractor; import com.amazonaws.services.kinesis.io.IDataExtractor; import com.amazonaws.services.kinesis.io.JsonDataExtractor; import com.amazonaws.services.kinesis.io.ObjectExtractor; import com.amazonaws.services.kinesis.io.RegexDataExtractor; public class ExternallyConfiguredAggregatorFactory { private ExternallyConfiguredAggregatorFactory() { } private static List intList(List stringList) { List list = new ArrayList<>(); for (String s : stringList) { list.add(Integer.parseInt(s)); } return list; } public static AggregatorGroup buildFromConfig(String streamName, String applicationName, KinesisClientLibConfiguration config, String configFile) throws Exception { List models = ExternalConfigurationModel.buildFromConfig(configFile); if (models.size() == 0) { throw new InvalidConfigurationException(String.format( "Unable to build any Aggregators from External Configuration %s", configFile)); } AggregatorGroup aggregators = new AggregatorGroup(); StreamAggregator agg = null; IDataExtractor dataExtractor = null; // the configuration may have included many configuration models for (ExternalConfigurationModel model : models) { switch (model.getDataExtractor()) { case CSV: CsvDataExtractor d = new CsvDataExtractor(intList(model.getLabelItems())).withDateValueIndex( Integer.parseInt(model.getDateItem())).withDelimiter( model.getDelimiter()).withItemTerminator(model.getItemTerminator()).withRegexFilter( model.getFilterRegex()).withDateFormat(model.getDateFormat()).withStringSummaryIndicies( model.getSummaryItems()); if (model.getLabelAttributeAlias() != null) { d.withLabelAttributeAlias(model.getLabelAttributeAlias()); } if (model.getDateAttributeAlias() != null) { d.withDateAttributeAlias(model.getDateAttributeAlias()); } dataExtractor = d; break; case REGEX: RegexDataExtractor e = new RegexDataExtractor(model.getRegularExpression(), intList(model.getLabelItems())).withItemTerminator( model.getItemTerminator()).withDateValueIndex( Integer.parseInt(model.getDateItem())).withDateFormat( model.getDateFormat()).withStringSummaryIndicies( model.getSummaryItems()); if (model.getLabelAttributeAlias() != null) { e.withLabelAttributeAlias(model.getLabelAttributeAlias()); } if (model.getDateAttributeAlias() != null) { e.withDateAttributeAlias(model.getDateAttributeAlias()); } dataExtractor = e; break; case JSON: dataExtractor = new JsonDataExtractor(model.getLabelItems()) .withDateFormat(model.getDateFormat()) .withDateValueAttribute(model.getDateItem()) .withSummaryAttributes(model.getSummaryItems()) .withItemTerminator(model.getItemTerminator()) .withRegexFilter(model.getFilterRegex()) ; break; case OBJECT: ObjectExtractor extractor = null; if (model.isAnnotatedClass()) { extractor = new ObjectExtractor(model.getClazz()); } else { extractor = new ObjectExtractor(model.getLabelItems(), model.getClazz()); } extractor.withDateMethod(model.getDateItem()).withSummaryMethods( model.getSummaryItems()); dataExtractor = extractor; break; } dataExtractor.setAggregatorType(model.getAggregatorType()); agg = new StreamAggregator(streamName, applicationName, model.getNamespace(), config, dataExtractor).withAggregatorType(model.getAggregatorType()).withStorageCapacity( model.getReadIOPs(), model.getWriteIOPs()).withTableName(model.getTableName()).withTimeHorizon( model.getTimeHorizons()).withRaiseExceptionOnDataExtractionErrors( model.shouldFailOnDataExtraction()); // configure metrics service on the aggregator if it's been // configured if (model.shouldEmitMetrics() || model.getMetricsEmitter() != null) { if (model.getMetricsEmitter() != null) { agg.withMetricsEmitter(model.getMetricsEmitter().newInstance()); } else { agg.withCloudWatchMetrics(); } } // create a new instance of the Data Store if one has been // configured. Currently we only support pluggable data stores that // are configured via their environment or have self defined // configuration models: only no args public constructors can be // called if (model.getDataStore() != null) { agg.withDataStore((IDataStore) model.getDataStore().newInstance()); } aggregators.registerAggregator(agg); } return aggregators; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/factory/JsonAggregatorFactory.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.factory; import java.util.Arrays; import java.util.List; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.TimeHorizon; import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration; import com.amazonaws.services.kinesis.io.IDataExtractor; import com.amazonaws.services.kinesis.io.JsonDataExtractor; public class JsonAggregatorFactory { private JsonAggregatorFactory() { } /** * Creates an Aggregator for data that is formatted as JSON Strings on the * Kinesis Stream. * * @param streamName The Stream Name that the Aggregator is receiving data * from. * @param appName The Application Name that an Aggregator is part of. * @param config The Kinesis Client Library Configuration to inherit * credentials and connectivity to the database from. * @param namespace The namespace used to separate this Aggregator's output * data from other Aggregated data * @param timeHorizon The Time Horizon value to use for the granularity of * the Aggregated data * @param aggregatorType The type of Aggregator to create. Default is COUNT. * @param labelAttributes The attribute name in the JSON document which * should be used as the label value for Aggregation * @param dateAttribute The attribute name in the JSON document which should * be used for the time element of the Aggregation. If NULL then the * client receive time will be used. * @param dateFormat The format of the dateAttribute, if String based dates * are used. This should follow {@link java.text.SimpleDateFormat} * convention. * @param summaryAttributes List of attributes or expressions on attributes * which should be used for summary aggregation. * @return A Stream Aggregator which can process JSON data containing the * indicated attributes. * @throws Exception */ public static final StreamAggregator newInstance(String streamName, String appName, KinesisClientLibConfiguration config, String namespace, TimeHorizon timeHorizon, AggregatorType aggregatorType, List labelAttributes, String dateAttribute, String dateFormat, List summaryAttributes) throws Exception { return newInstance(streamName, appName, config, namespace, Arrays.asList(new TimeHorizon[] { timeHorizon }), aggregatorType, labelAttributes, dateAttribute, dateFormat, summaryAttributes); } /** * Creates an Aggregator for data that is formatted as JSON Strings on the * Kinesis Stream. * * @param streamName The Stream Name that the Aggregator is receiving data * from. * @param appName The Application Name that an Aggregator is part of. * @param workerId The worker ID hosting the Aggregator. * @param config The Kinesis Client Library Configuration to inherit * credentials and connectivity to the database from. * @param namespace The namespace used to separate this Aggregator's output * data from other Aggregated data. * @param timeHorizons The list of Time Horizon values to use the * aggregator. Data will be automatically managed at ALL of the * requested granularities using a prefixed namespace on dates. * @param aggregatorType The type of Aggregator to create. Default is COUNT. * @param labelAttributes The attribute name in the JSON document which * should be used as the label value for Aggregation. * @param dateAttribute The attribute name in the JSON document which should * be used for the time element of the Aggregation. If NULL then the * client receive time will be used. * @param dateFormat The format of the dateAttribute, if String based dates * are used. This should follow {@link java.text.SimpleDateFormat} * convention. * @param summaryAttributes List of attributes or expressions on attributes * which should be used for summary aggregation. * @return A Stream Aggregator which can process JSON data containing the * indicated attributes. * @throws Exception */ public static final StreamAggregator newInstance(String streamName, String appName, KinesisClientLibConfiguration config, String namespace, List timeHorizons, AggregatorType aggregatorType, List labelAttributes, String dateAttribute, String dateFormat, List summaryAttributes) throws Exception { IDataExtractor dataExtractor = new JsonDataExtractor(labelAttributes).withDateValueAttribute( dateAttribute).withSummaryAttributes(summaryAttributes).withDateFormat(dateFormat); dataExtractor.setAggregatorType(aggregatorType); return new StreamAggregator(streamName, appName, namespace, config, dataExtractor).withTimeHorizon( timeHorizons).withAggregatorType(aggregatorType); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/factory/ObjectAggregatorFactory.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.factory; import java.util.Arrays; import java.util.List; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.TimeHorizon; import com.amazonaws.services.kinesis.aggregators.annotations.AnnotationProcessor; import com.amazonaws.services.kinesis.aggregators.datastore.DynamoDataStore; import com.amazonaws.services.kinesis.aggregators.datastore.IDataStore; import com.amazonaws.services.kinesis.aggregators.metrics.CloudWatchMetricsEmitter; import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration; import com.amazonaws.services.kinesis.io.IDataExtractor; import com.amazonaws.services.kinesis.io.ObjectExtractor; public class ObjectAggregatorFactory { private ObjectAggregatorFactory() { } /** * Create a new Aggregator for Object Serialised Data based upon a Class * which is configured using Annotations from the base class. * * @param streamName The Stream Name that the Aggregator is receiving data * from. * @param appName The Application Name that an Aggregator is part of. * @param config The Kinesis Client Library Configuration to inherit * credentials and connectivity to the database from. * @param clazz The annotated class to use for configuration of the * aggregator * @return A Stream Aggregator which can process object serialised data * @throws Exception */ public static final StreamAggregator newInstance(String streamName, String appName, KinesisClientLibConfiguration config, Class clazz) throws Exception { AnnotationProcessor p = new AnnotationProcessor(clazz); ObjectExtractor dataExtractor = new ObjectExtractor(p.getLabelMethodNames(), clazz).withDateMethod(p.getDateMethodName()); dataExtractor.withSummaryConfig(p.getSummaryConfig()); //dataExtractor.withSummaryMethods(new ArrayList<>(p.getSummaryMethods().keySet())); StreamAggregator agg = new StreamAggregator(streamName, appName, p.getNamespace(), config, dataExtractor).withTimeHorizon(p.getTimeHorizon()).withAggregatorType(p.getType()).withRaiseExceptionOnDataExtractionErrors( p.shouldFailOnDataExtractionErrors()); // configure metrics service on the aggregator if it's been // configured if (p.shouldEmitMetrics() || (p.getMetricsEmitter() != null && !p.getMetricsEmitter().equals( CloudWatchMetricsEmitter.class))) { if (p.getMetricsEmitter() != null) { agg.withMetricsEmitter(p.getMetricsEmitter().newInstance()); } else { agg.withCloudWatchMetrics(); } } // create a new instance of the Data Store if one has been // configured. Currently we only support pluggable data stores that // are configured via their environment or have self defined // configuration models: only no args public constructors can be // called if (p.getDataStore() != null && !p.getDataStore().equals(DynamoDataStore.class)) { agg.withDataStore((IDataStore) p.getDataStore().newInstance()); } return agg; } /** * Create a new Aggregator for data which is object serialised on the stream * using Jackson JSON Serialisation. * * @param streamName The Stream Name that the Aggregator is receiving data * from. * @param appName The Application Name that an Aggregator is part of. * @param config The Kinesis Client Library Configuration to inherit * credentials and connectivity to the database from. * @param namespace The namespace used to separate this Aggregator's output * data from other Aggregated data * @param timeHorizon The Time Horizon value to use for the granularity of * the Aggregated data * @param aggregatorType The type of Aggregator to create. Default is COUNT. * @param clazz The base class to use as a Transfer Object for the data * stream. * @param labelMethods The method on the base class to use to obtain the * label for aggregation. * @param dateMethod The method on the object which should be used to * establish the time. If NULL then the client receive time will be * used. * @param summaryMethods List of summary method names or expressions to be * used when the AggregatorType is SUM, as secondary aggregated data * points * @return A Stream Aggregator which can process object serialised data * @throws Exception */ public static final StreamAggregator newInstance(String streamName, String appName, KinesisClientLibConfiguration config, String namespace, TimeHorizon timeHorizon, AggregatorType aggregatorType, Class clazz, List labelMethods, String dateMethod, List summaryMethods) throws Exception { return newInstance(streamName, appName, config, namespace, Arrays.asList(new TimeHorizon[] { timeHorizon }), aggregatorType, clazz, labelMethods, dateMethod, summaryMethods); } /** * Create a new Aggregator for data which is object serialised on the stream * using Jackson JSON Serialisation. * * @param streamName The Stream Name that the Aggregator is receiving data * from. * @param appName The Application Name that an Aggregator is part of. * @param config The Kinesis Client Library Configuration to inherit * credentials and connectivity to the database from. * @param namespace The namespace used to separate this Aggregator's output * data from other Aggregated data. * @param timeHorizons The list of Time Horizon values to use the * aggregator. Data will be automatically managed at ALL of the * requested granularities using a prefixed namespace on dates. * @param aggregatorType The type of Aggregator to create. Default is COUNT. * @param clazz The base class to use as a Transfer Object for the data * stream. * @param labelMethods The methods on the base class to use to obtain the * label for aggregation. * @param dateMethod The method on the object which should be used to * establish the time. If NULL then the client receive time will be * used. * @param summaryMethods List of summary method names or expressions to be * used when the AggregatorType is SUM, as secondary aggregated data * points. * @return A Stream Aggregator which can process object serialised data. * @return * @throws Exception */ public static final StreamAggregator newInstance(String streamName, String appName, KinesisClientLibConfiguration config, String namespace, List timeHorizons, AggregatorType aggregatorType, Class clazz, List labelMethods, String dateMethod, List summaryMethods) throws Exception { IDataExtractor dataExtractor = new ObjectExtractor(labelMethods, clazz).withDateMethod( dateMethod).withSummaryMethods(summaryMethods); dataExtractor.setAggregatorType(aggregatorType); return new StreamAggregator(streamName, appName, namespace, config, dataExtractor).withTimeHorizon( timeHorizons).withAggregatorType(aggregatorType); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/factory/RegexAggregatorFactory.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.factory; import java.util.Arrays; import java.util.List; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.TimeHorizon; import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration; import com.amazonaws.services.kinesis.io.RegexDataExtractor; import com.amazonaws.services.kinesis.io.StringDataExtractor; /** * Factory Class used for generating Aggregators which use Regular Expressions * to extract data from the Kinesis Stream. */ public class RegexAggregatorFactory { private RegexAggregatorFactory() { } /** * Factory Method which generates a Regular Expression based Aggregator for * a number of Time Horizons * * @param streamName The name of the Stream to aggregate against. * @param appName The application name to associate with the aggregator. * @param config The Kinesis Configuration used for the containing worker. * @param namespace The namespace to associate with the aggregated data. * @param timeHorizon The time horizons on which to aggregate data. * @param aggregatorType The type of aggregator to create. * @param regularExpression The regular expression used to extract data from * the Kinesis Stream via Character Classes * @param labelIndicies The index of the extracted data to be used as the * aggregation label * @param dateIndex The index of the extracted data to be used as the time * value * @param dateFormat The format of the data which represents the event time * when shipped as a String * @param summaryIndicies The indicies or Summary Expressions on indicies * which contain summary values to be aggregated * @return * @throws Exception */ public static final StreamAggregator newInstance(String streamName, String appName, KinesisClientLibConfiguration config, String namespace, List timeHorizons, AggregatorType aggregatorType, String regularExpression, List labelIndicies, String labelAttributeAlias, int dateIndex, String dateFormat, String dateAlias, List summaryIndicies) throws Exception { StringDataExtractor dataExtractor = new RegexDataExtractor(regularExpression, labelIndicies).withDateValueIndex( dateIndex).withDateFormat(dateFormat).withSummaryIndicies(summaryIndicies); dataExtractor.setAggregatorType(aggregatorType); if (labelAttributeAlias != null && !labelAttributeAlias.equals("")) { dataExtractor.withLabelAttributeAlias(labelAttributeAlias); } if (dateAlias != null && !dateAlias.equals("")) { dataExtractor.withDateAttributeAlias(dateAlias); } return new StreamAggregator(streamName, appName, namespace, config, dataExtractor).withTimeHorizon( timeHorizons).withAggregatorType(aggregatorType); } /** * Factory Method which generates a Regular Expression based Aggregator for * a single Time Horizon * * @param streamName * @param appName * @param config * @param namespace * @param timeHorizon * @param aggregatorType * @param regularExpression * @param labelIndicies * @param dateIndex * @param dateFormat * @param summaryIndicies * @return * @throws Exception */ public static final StreamAggregator newInstance(String streamName, String appName, KinesisClientLibConfiguration config, String namespace, TimeHorizon timeHorizon, AggregatorType aggregatorType, String regularExpression, List labelIndicies, String labelAttributeAlias, int dateIndex, String dateFormat, String dateAlias, List summaryIndicies) throws Exception { return newInstance(streamName, appName, config, namespace, Arrays.asList(new TimeHorizon[] { timeHorizon }), aggregatorType, regularExpression, labelIndicies, labelAttributeAlias, dateIndex, dateFormat, dateAlias, summaryIndicies); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/idempotency/DefaultIdempotencyCheck.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.idempotency; import com.amazonaws.services.kinesis.aggregators.AggregateData; /** * Default implementation of an Idempotency Check. Always returns True - that an * input element should be processed */ public class DefaultIdempotencyCheck implements IIdempotencyCheck { public DefaultIdempotencyCheck() { } public boolean doProcess(String partitionKey, String sequenceNumber, AggregateData dataElement, byte[] originalData) { return true; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/idempotency/IIdempotencyCheck.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.idempotency; import com.amazonaws.services.kinesis.aggregators.AggregateData; /** * Interface which allows for the configuration of an Idempotency Check, which * will conditionally select whether a record should be processed */ public interface IIdempotencyCheck { /** * Should the input event be processed by the configured Aggregators? * * @param event The Deserialised and resolved data element * @return True for process, False for don't */ public boolean doProcess(String partitionKey, String sequenceNumber, AggregateData dataElement, byte[] originalData); } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/metrics/CloudWatchMetricsEmitter.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.metrics; import java.text.ParseException; import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.AmazonServiceException; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.regions.Region; import com.amazonaws.services.cloudwatch.AmazonCloudWatchAsyncClient; import com.amazonaws.services.cloudwatch.AmazonCloudWatchClient; import com.amazonaws.services.cloudwatch.model.Dimension; import com.amazonaws.services.cloudwatch.model.LimitExceededException; import com.amazonaws.services.cloudwatch.model.MetricDatum; import com.amazonaws.services.cloudwatch.model.PutMetricDataRequest; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.cache.UpdateKey; import com.amazonaws.services.kinesis.aggregators.datastore.AggregateAttributeModification; public class CloudWatchMetricsEmitter implements IMetricsEmitter { private final Log LOG = LogFactory.getLog(CloudWatchMetricsEmitter.class); private String metricsNamespace; private AmazonCloudWatchClient cloudWatchClient; private Region region; private static final int THROTTLING_RETRIES = 10; private static final int BACKOFF_MILLIS = 10; private static final int MAX_WRITE_ATTEMPTS = 10; public CloudWatchMetricsEmitter() { } public CloudWatchMetricsEmitter(String metricsNamespace, AWSCredentialsProvider credentials) { this.metricsNamespace = metricsNamespace; this.cloudWatchClient = new AmazonCloudWatchAsyncClient(credentials); } @Override public void emit( Map> metricData) throws Exception { if (metricData != null) { Date metricDate = null; for (UpdateKey key : metricData.keySet()) { PutMetricDataRequest req = new PutMetricDataRequest() .withNamespace(this.metricsNamespace); Collection data = new ArrayList<>(); if (key.getDateValue().equals("*")) { LOG.debug("Not Emitting Cloudwatch Metrics for Time Horizon FOREVER"); return; } else { try { metricDate = key.getDateValueAsDate(); } catch (ParseException pe) { LOG.error(String.format( "Unable to Parse Date Value %s", key.getDateValue())); return; } } // send in every update as a datum for (String summary : metricData.get(key).keySet()) { final AggregateAttributeModification mod = metricData.get( key).get(summary); // TODO Handle that we've been sent an update for which a // new final value which might not have been set. This // means, for example, that on an hourly aggregate of FIRST, // we'd get a single modification at the beginning of the // hour, and then not again after if (mod.getFinalValue() != null) { data.add(new MetricDatum() .withMetricName(mod.getOriginatingValueName()) .withTimestamp(metricDate) .withDimensions( new Dimension() .withName("Calculation") .withValue( mod.getCalculationApplied() .name()), new Dimension() .withName( key.getAggregateColumnName()) .withValue( key.getAggregatedValue())) .withValue(mod.getFinalValue())); } } boolean success = false; int iterations = 0; int backoffMillis = BACKOFF_MILLIS; while (!success && iterations < MAX_WRITE_ATTEMPTS) { iterations++; boolean backoff = false; try { cloudWatchClient .putMetricData(req.withMetricData(data)); success = true; } catch (LimitExceededException e) { backoff = true; } catch (AmazonServiceException ase) { if (ase.getErrorCode().startsWith("Throttling")) { backoff = true; } } if (backoff) { LOG.warn("CloudWatch Limit Exceeded - backing off"); Thread.sleep(2 ^ iterations * BACKOFF_MILLIS); } } if (!success) { throw new MetricsEmitterThrottledException( String.format( "CloudWatch Metrics Emitter failed to write metrics after %s attempts", MAX_WRITE_ATTEMPTS)); } } } } @Override public void setRegion(Region region) { this.region = region; this.cloudWatchClient.setRegion(region); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/metrics/IMetricsEmitter.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.metrics; import java.util.Map; import com.amazonaws.regions.Region; import com.amazonaws.services.kinesis.aggregators.cache.UpdateKey; import com.amazonaws.services.kinesis.aggregators.datastore.AggregateAttributeModification; /** * Interface for providing classes which can write to metrics services. It * receives the output of the IDataStore modifications, and applies the data to * the metrics service */ public interface IMetricsEmitter { /** * Emit a new set of metrics to the metrics service * * @param metricData Input Data to be intrumented * @throws Exception */ public void emit(Map> metricData) throws Exception; /** * Method called to indicate the Region of the metrics service * * @param region */ public void setRegion(Region region); } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/metrics/MetricsEmitterThrottledException.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.metrics; public class MetricsEmitterThrottledException extends Exception { public MetricsEmitterThrottledException() { super(); } public MetricsEmitterThrottledException(String message) { super(message); } public MetricsEmitterThrottledException(Exception e) { super(e); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/processor/AggregatorProcessor.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.processor; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.services.kinesis.aggregators.IStreamAggregator; import com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException; import com.amazonaws.services.kinesis.clientlibrary.exceptions.ShutdownException; import com.amazonaws.services.kinesis.clientlibrary.exceptions.ThrottlingException; import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor; import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer; import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason; import com.amazonaws.services.kinesis.model.Record; /** * Container IRecordProcessor Application which can be used as a standalone KCL * application. Simply build an Aggregator using a Factory method or direct * configuration, and then create the IRecordProcessor within your KCL * application. */ public class AggregatorProcessor implements IRecordProcessor { private static final Log LOG = LogFactory.getLog(AggregatorProcessor.class); private final int NUM_RETRIES = 10; private final long BACKOFF_TIME_IN_MILLIS = 100L; private String kinesisShardId; private IStreamAggregator agg; public AggregatorProcessor(IStreamAggregator agg) { super(); this.agg = agg; } /** * {@inheritDoc} */ @Override public void initialize(String shardId) { LOG.info("Initializing AggregatorProcessor for Shard: " + shardId); this.kinesisShardId = shardId; try { this.agg.initialize(shardId); } catch (Exception e) { e.printStackTrace(); } } /** * {@inheritDoc} */ @Override public void processRecords(List records, IRecordProcessorCheckpointer checkpointer) { LOG.info("Aggregating " + records.size() + " records for Kinesis Shard " + kinesisShardId); try { // run data into the aggregator agg.aggregate(records); // checkpoint the aggregator and kcl agg.checkpoint(); checkpointer.checkpoint(records.get(records.size() - 1)); LOG.debug("Kinesis Checkpoint for Shard " + kinesisShardId + " Complete"); } catch (Exception e) { e.printStackTrace(); LOG.error(e); shutdown(checkpointer, ShutdownReason.ZOMBIE); } } /** * {@inheritDoc} */ @Override public void shutdown(IRecordProcessorCheckpointer checkpointer, ShutdownReason reason) { LOG.info("Shutting down record processor for shard: " + kinesisShardId); // Important to checkpoint after reaching end of shard, so we can start // processing data from child shards. if (reason == ShutdownReason.TERMINATE) { try { agg.shutdown(true); checkpoint(checkpointer); } catch (Exception e) { e.printStackTrace(); } } else { // shutdown the aggregator without flushing state try { agg.shutdown(false); } catch (Exception e) { e.printStackTrace(); } } } /** * Checkpoint with retries. * * @param checkpointer */ private void checkpoint(IRecordProcessorCheckpointer checkpointer) { LOG.info("Checkpointing shard " + kinesisShardId); for (int i = 0; i < NUM_RETRIES; i++) { try { checkpointer.checkpoint(); break; } catch (ShutdownException se) { // Ignore checkpoint if the processor instance has been shutdown // (fail over). LOG.info("Caught shutdown exception, skipping checkpoint.", se); break; } catch (ThrottlingException e) { // Backoff and re-attempt checkpoint upon transient failures if (i >= (NUM_RETRIES - 1)) { LOG.error("Checkpoint failed after " + (i + 1) + "attempts.", e); break; } else { LOG.info("Transient issue when checkpointing - attempt " + (i + 1) + " of " + NUM_RETRIES, e); } } catch (InvalidStateException e) { // This indicates an issue with the DynamoDB table (check for // table, provisioned IOPS). LOG.error( "Cannot save checkpoint to the DynamoDB table used by the KinesisClientLibrary.", e); break; } try { Thread.sleep(BACKOFF_TIME_IN_MILLIS); } catch (InterruptedException e) { LOG.debug("Interrupted sleep", e); } } } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/processor/AggregatorProcessorFactory.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.processor; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.services.kinesis.aggregators.AggregatorGroup; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor; import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory; /** * Simple factory class to generate a standalone Kinesis Aggregator * IRecordProcessor for the application */ public class AggregatorProcessorFactory implements IRecordProcessorFactory { private AggregatorGroup aggregators; private final Log LOG = LogFactory.getLog(AggregatorProcessorFactory.class); private AggregatorProcessorFactory() { } /** * Create a Processor Factory that will create an Aggregator Processor which * wraps the indicated Aggregator * * @param agg */ public AggregatorProcessorFactory(StreamAggregator agg) { this.aggregators = new AggregatorGroup(); this.aggregators.registerAggregator(agg); } public AggregatorProcessorFactory(AggregatorGroup group) { this.aggregators = group; } /** * {@inheritDoc} */ public IRecordProcessor createProcessor() { try { // every time we create a new processor instance, we have to embed a // new instance of the AggregatorGroup, to eliminate any thread // contention return new AggregatorProcessor(new AggregatorGroup(this.aggregators)); } catch (Exception e) { LOG.error(e); return null; } } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/summary/SummaryCalculation.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.summary; import com.amazonaws.services.dynamodbv2.model.ComparisonOperator; import com.amazonaws.services.kinesis.aggregators.datastore.DynamoDataStore.DynamoSummaryUpdateMethod; public enum SummaryCalculation { /** * SUM Calculations simply always increase the aggregate value based upon * the value observed on the stream */ SUM(null, DynamoSummaryUpdateMethod.ADD) { @Override public Double apply(Double currentValue, Double newValue) { // add the values including dealing with nulls return nvl(currentValue) + nvl(newValue); } }, /** * FIRST Calculations always return the first value observed, without * considering the newest */ FIRST(null, DynamoSummaryUpdateMethod.CONDITIONAL) { @Override public Double apply(Double currentValue, Double newValue) { // always return the current value unless its null, then overwrite // with the first value return currentValue == null ? newValue : currentValue; } }, /** * LAST Calculations always return the latest value observed, without * considering the previous */ LAST(null, DynamoSummaryUpdateMethod.PUT) { @Override public Double apply(Double currentValue, Double newValue) { // always return the latest value return nvl(newValue); } }, /** * The Min calculation seeks to always record only the lowest value ever * observed for a data value in the specified time horizon */ MIN(ComparisonOperator.GT, DynamoSummaryUpdateMethod.CONDITIONAL) { // The comparison operator is compared to the existing values. So to // apply a // minimum value, the existing value should be greater than the new // value @Override public Double apply(Double currentValue, Double newValue) { // the lower value wins, or 0 if values have not yet been // initialised if (currentValue == null) return nvl(newValue); if (newValue == null) return nvl(currentValue); Double output = nvl(newValue) < currentValue ? nvl(newValue) : currentValue; return output; } }, /** * The Max calculation will store only the maximum value observed on the * stream for the time period */ MAX(ComparisonOperator.LT, DynamoSummaryUpdateMethod.CONDITIONAL) { // apply a new value only if the existing value is less than the new // value @Override public Double apply(Double currentValue, Double newValue) { // the greater value wins, or 0 if values have not yet been // initialised if (currentValue == null) return nvl(newValue); if (newValue == null) return nvl(currentValue); return nvl(newValue) > currentValue ? nvl(newValue) : currentValue; } }; private ComparisonOperator comparisonOperator; private DynamoSummaryUpdateMethod updateMethod; private SummaryCalculation(ComparisonOperator c, DynamoSummaryUpdateMethod updateMethod) { this.comparisonOperator = c; this.updateMethod = updateMethod; } private SummaryCalculation() { } /** * Apply the calculation to the values provided to the interface * * @param currentValue The current aggregate value being managed by the * {@link com.amazonaws.services.kinesis.aggregators.cache.AggregateCache} * @param newValue The new value from the stream to be applied to the * calculation * @return */ public abstract Double apply(Double currentValue, Double newValue); /** * Return the * {@link com.amazonaws.services.dynamodbv2.model.ComparisonOperator} which * will be applied when this calculation is written to the database * * @return */ public ComparisonOperator getDynamoComparisonOperator() { return this.comparisonOperator; } public DynamoSummaryUpdateMethod getSummaryUpdateMethod() { return this.updateMethod; } private static double nvl(Double val) { return val == null ? 0D : val; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/summary/SummaryConfiguration.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.summary; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import com.amazonaws.services.kinesis.aggregators.exception.UnsupportedCalculationException; /** * The Summary Configuration object contains the required calculations to be * performed against summary items extracted from a Kinesis Data Stream. For * each item listed as summary value to be extracted from the stream, the * Summary configuration will store a list of the calculations against the base * item being calculated. For example, if the summary expression was: * sum(value_a), max(value_a), min(value_b) then the SummaryConfiguration would * be: "key" :[list of * {@link com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation}] * --------- ------------ "value_a":[sum,max] "value_b":[min] */ public class SummaryConfiguration { private Map> config = new HashMap<>(); /* closure over the map which contains the items to list */ final class ConfigWriter { public void write(String s, SummaryElement e) { List calculations = config.get(s); // setup the list if (calculations == null) { calculations = new ArrayList<>(); } calculations.add(e); config.put(s, calculations); } } private ConfigWriter writer = new ConfigWriter(); public SummaryConfiguration() { } public SummaryConfiguration(List summaries) throws UnsupportedCalculationException { for (String s : summaries) { addConfig(s); } } /** * Add a calculation for a base attribute into the list of all calculations * to be done * * @param value * @param calc */ public void add(String value, SummaryElement e) { writer.write(value, e); } private void addConfig(String summary) throws UnsupportedCalculationException { SummaryElement e = new SummaryElement(summary); add(e.getStreamDataElement(), e); } /** * Add a fully formed expression to the list of all calculations. This uses * the parseSummary method to parse the expression into its component parts. * * @param summary The expression to add * @throws UnsupportedCalculationException */ public SummaryConfiguration withConfigItem(String summary) throws UnsupportedCalculationException { addConfig(summary); return this; } /** * Get all * {@link com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation}s * for an attribute item * * @param s The attribute of the stream to get the list of calculations for * @return */ public List getRequestedCalculations(String s) { return this.config.get(s); } /** * Get all attributes which this summary configuration is stored against * * @return */ public Set getItemSet() { return this.config.keySet(); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/aggregators/summary/SummaryElement.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.summary; import com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils; import com.amazonaws.services.kinesis.aggregators.exception.UnsupportedCalculationException; public class SummaryElement { private String streamDataElement, attributeAlias; private SummaryCalculation calculation; public SummaryElement(String streamDataElement, SummaryCalculation calculation) { this(streamDataElement, calculation, makeStoreAttributeName(streamDataElement, calculation)); } public SummaryElement(String streamDataElement, SummaryCalculation calculation, String attributeAlias) { this.streamDataElement = streamDataElement; this.calculation = calculation; if (attributeAlias != null) { this.attributeAlias = attributeAlias; } else { this.attributeAlias = makeStoreAttributeName(streamDataElement, calculation); } } /** * Parse a summary calculation expression to a Pair of the base item name, * and the SummaryCalculation to be applied to that base item. The * expression must take the form of: * {@link com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation} * (attribute of the data stream) * * @param s * @return * @throws UnsupportedCalculationException */ public SummaryElement(String s) throws UnsupportedCalculationException { if (!s.contains("(")) { this.streamDataElement = s; this.calculation = SummaryCalculation.SUM; this.attributeAlias = makeStoreAttributeName(s, this.calculation); } else { if (!s.contains(")")) throw new UnsupportedCalculationException(String.format( "\"%s\" is not a valid summary calculation", s)); String[] tokens = s.split("\\("); String requested = tokens[0].replaceAll(" ", "").toUpperCase(); try { SummaryCalculation c = SummaryCalculation.valueOf(requested); String[] onItems = tokens[1].split("\\)"); this.streamDataElement = onItems[0].replaceAll(" ", ""); this.calculation = c; if (onItems.length > 1 && onItems[1] != null) { this.attributeAlias = onItems[1].replaceAll(" ", ""); } else { this.attributeAlias = makeStoreAttributeName(this.streamDataElement, this.calculation); } } catch (Exception e) { throw new UnsupportedCalculationException(String.format( "Unsupported Calculation %s", requested)); } } } public static String makeStoreAttributeName(String attribute, SummaryCalculation calculation) { return String.format("%s-%s", StreamAggregatorUtils.methodToColumn(attribute), calculation.name()); } public String getStreamDataElement() { return streamDataElement; } public String getAttributeAlias() { return attributeAlias; } public SummaryCalculation getCalculation() { return calculation; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/io/AbstractDataExtractor.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.io; import java.util.List; import com.amazonaws.services.kinesis.aggregators.AggregateData; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.InputEvent; import com.amazonaws.services.kinesis.aggregators.exception.SerializationException; import com.amazonaws.services.kinesis.aggregators.summary.SummaryConfiguration; /** * Abstract class which provides some helper methods for creating IDataExtractor * classes. */ public abstract class AbstractDataExtractor implements IDataExtractor { protected AggregatorType aggregatorType = AggregatorType.COUNT; protected SummaryConfiguration summaryConfig = new SummaryConfiguration(); public abstract String getAggregateLabelName(); public abstract String getDateValueName(); public abstract List getData(InputEvent event) throws SerializationException; public abstract void validate() throws Exception; public AggregatorType getAggregatorType() { return this.aggregatorType; } public void setAggregatorType(AggregatorType type) { this.aggregatorType = type; } public SummaryConfiguration getSummaryConfig() { return this.summaryConfig; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/io/CsvDataExtractor.java ================================================ /** * Amazon Kinesis Aggregators Copyright 2014, Amazon.com, Inc. or its * affiliates. All Rights Reserved. Licensed under the Amazon Software License * (the "License"). You may not use this file except in compliance with the * License. A copy of the License is located at http://aws.amazon.com/asl/ or in * the "license" file accompanying this file. This file is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions * and limitations under the License. */ package com.amazonaws.services.kinesis.io; import java.util.List; import com.amazonaws.services.kinesis.io.serializer.CsvSerializer; /** * IDataExtractor implementation which allows for extraction of data from * Streams formatted as Character Separated Values. Also optionally allows for * regular expression based filtering of the stream prior to aggregation. */ public class CsvDataExtractor extends StringDataExtractor implements IDataExtractor { private static String delimiter = ","; private static String itemTerminator = "\n"; private CsvSerializer serialiser; /** * Create a new data extractor using the indicated index for the label value * to be aggregated on, and the delimiter for tokenising the data value. * * @param labelIndex Index (base 0) of where in the CSV stream the label * value occurs * @param delimiter The character delimiter separating items in the stream * data. */ public CsvDataExtractor(List labelIndicies) { super.labelIndicies = labelIndicies; this.serialiser = new CsvSerializer().withFieldDelimiter(delimiter).withItemTerminator( itemTerminator); super.serialiser = serialiser; } public CsvDataExtractor(List labelIndicies, String labelAttributeAlias, int dateValueIndex, String dateAttributeAlias, String fieldDelimiter, CsvSerializer serialiser) { super.labelIndicies = labelIndicies; super.labelAttributeAlias = labelAttributeAlias; super.dateValueIndex = dateValueIndex; super.dateAttributeAlias = dateAttributeAlias; this.serialiser = serialiser; super.serialiser = serialiser; } /** * Add a regular expression filter to this data extractor. When configured, * only string values which match the regular expression will be * deserialised and have data extracted from it. * * @param filterRegex Regular expression which must match in order for data * to be subject to data extraction. * @return */ public CsvDataExtractor withRegexFilter(String filterRegex) { if (filterRegex != null) { this.serialiser.withFilterRegex(filterRegex); super.serialiser = this.serialiser; } return this; } /** * Add a non default field delimiter. The default is "," * * @param delimiter The characters used for delimiting items within a line * @return */ public CsvDataExtractor withDelimiter(String delimiter) { if (delimiter != null) { this.serialiser.withFieldDelimiter(delimiter); super.serialiser = this.serialiser; } return this; } /** * Add a non default item terminator. The default is "\n" * * @param lineTerminator The characters used for delimiting lines of text * @return */ public CsvDataExtractor withItemTerminator(String lineTerminator) { if (lineTerminator != null) { this.serialiser.withItemTerminator(lineTerminator); super.serialiser = this.serialiser; } return this; } /** * Add a custom configured serialiser * * @param serialiser * @return */ public CsvDataExtractor withSerialiser(CsvSerializer serialiser) { this.serialiser = serialiser; super.serialiser = serialiser; return this; } /** * Builder method for adding a index to the extraction configuration which * indicates where the date item to be used for aggregation can be found. * * @param dateValueIndex The index value (base 0) in the CSV stream which * contains the date value. * @return */ public CsvDataExtractor withDateValueIndex(Integer dateValueIndex) { if (dateValueIndex != null) { this.dateValueIndex = dateValueIndex; } return this; } @Override public IDataExtractor copy() throws Exception { return new CsvDataExtractor(this.labelIndicies, super.labelAttributeAlias, this.dateValueIndex, super.dateAttributeAlias, this.delimiter, this.serialiser).withSummaryIndicies(this.getOriginalSummaryExpressions()); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/io/IDataExtractor.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.io; import java.util.List; import com.amazonaws.services.kinesis.aggregators.AggregateData; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.InputEvent; import com.amazonaws.services.kinesis.aggregators.exception.SerializationException; import com.amazonaws.services.kinesis.aggregators.summary.SummaryConfiguration; /** * Interface which allows for pluggable data extractors for different types of * stream data. Aggregators use IDataExtractor to interoperate between the * stream data format and the internal format required for Aggregation. * IDataExtractors likely use IKinesisSerialisers to read and write to and from * the stream */ public interface IDataExtractor { /** * Get the name of the element which represents the unique ID for the event, * if there is one */ public String getUniqueIdName(); /** * Get the name of the label value to be extracted. * * @return */ public String getAggregateLabelName(); /** * Get the name of the date value to be extracted. * * @return */ public String getDateValueName(); /** * Extract one or more aggregatable items from a Kinesis Record. * * @param event The Kinesis Record data from which we want to extract data. * @return A list of ExtractedData elements which have been resolved from * the input data. * @throws SerializationException */ public List getData(InputEvent event) throws SerializationException; /** * Set the type of aggregator which contains this IDataExtractor. Used to * boost efficiency in that the Extractor will not extract summary items for * COUNT based Aggregator integration. * * @param type */ public void setAggregatorType(AggregatorType type); /** * Validate that the extractor is well formed. * * @throws Exception */ public void validate() throws Exception; /** * Get the summary configuration that is driving data extraction against the * data stream. * * @return */ public SummaryConfiguration getSummaryConfig(); public IDataExtractor copy() throws Exception; } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/io/JsonDataExtractor.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.io; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.services.kinesis.aggregators.AggregateData; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.InputEvent; import com.amazonaws.services.kinesis.aggregators.LabelSet; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils; import com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException; import com.amazonaws.services.kinesis.aggregators.exception.SerializationException; import com.amazonaws.services.kinesis.aggregators.exception.UnsupportedCalculationException; import com.amazonaws.services.kinesis.aggregators.summary.SummaryConfiguration; import com.amazonaws.services.kinesis.io.serializer.JsonSerializer; import com.fasterxml.jackson.databind.JsonNode; public class JsonDataExtractor extends AbstractDataExtractor implements IDataExtractor { private List labelAttributes; private String labelName, dateFormat, uniqueIdAttribute, dateValueAttribute; private SimpleDateFormat dateFormatter; private List summaryAttributes; private final Log LOG = LogFactory.getLog(JsonDataExtractor.class); private Map sumUpdates = new HashMap<>(); private JsonSerializer serialiser = new JsonSerializer(); private JsonDataExtractor() { } public JsonDataExtractor(List labelAttributes) { this.labelAttributes = labelAttributes; this.labelName = LabelSet.fromStringKeys(labelAttributes).getName(); } public JsonDataExtractor(List labelAttributes, JsonSerializer serialiser) { this(labelAttributes); this.serialiser = serialiser; } /** * {@inheritDoc} */ @Override public List getData(InputEvent event) throws SerializationException { try { List aggregateData = new ArrayList<>(); Date dateValue = null; JsonNode jsonContent = null; String dateString, summary = null; sumUpdates = new HashMap<>(); List items = (List) serialiser.toClass(event); // log a warning if we didn't get anything back from the serialiser // - this could be OK, but probably isn't if (items == null || items.size() == 0) LOG.warn(String .format("Failed to deserialise any content for Record (Partition Key %s, Sequence %s", event.getPartitionKey(), event.getSequenceNumber())); // process all the items returned by the serialiser for (String item : items) { // Convert the string to a Jackson JsonNode for navigation jsonContent = StreamAggregatorUtils.asJsonNode(item); LabelSet labels = new LabelSet(); for (String key : this.labelAttributes) { labels.put(key, StreamAggregatorUtils.readValueAsString( jsonContent, key)); } // get the unique ID for the event String uniqueId = null; if (this.uniqueIdAttribute != null) { switch (this.uniqueIdAttribute) { case StreamAggregator.REF_PARTITION_KEY: uniqueId = event.getPartitionKey(); break; case StreamAggregator.REF_SEQUENCE: uniqueId = event.getSequenceNumber(); break; default: uniqueId = StreamAggregatorUtils.readValueAsString( jsonContent, uniqueIdAttribute); break; } } // get the date value from the line if (dateValueAttribute != null) { dateString = StreamAggregatorUtils.readValueAsString( jsonContent, dateValueAttribute); // bail on no date returned if (dateString == null || dateString.equals("")) throw new SerializationException( String.format( "Unable to read date value attribute %s from JSON Content %s", dateValueAttribute, item)); // turn date as long or string into Date if (this.dateFormat != null) { dateValue = dateFormatter.parse(dateString); } else { // no formatter, so treat as epoch seconds try { dateValue = new Date(Long.parseLong(dateString)); } catch (Exception e) { LOG.error(String .format("Unable to create Date Value element from item '%s' due to invalid format as Epoch Seconds", dateValueAttribute)); throw new SerializationException(e); } } } else { // no date value attribute configured, so use now dateValue = new Date(System.currentTimeMillis()); } // get the summed values if (this.aggregatorType.equals(AggregatorType.SUM)) { // get the positional sum items for (String s : summaryConfig.getItemSet()) { try { summary = StreamAggregatorUtils.readValueAsString( jsonContent, s); // if a summary is not found in the data element, // then we simply continue without it // StreamAggregatorUtils.readValueAsString returns // "" if // attribute is not found. if (summary != null && !summary.equals("")) { sumUpdates.put(s, Double.parseDouble(summary)); } } catch (NumberFormatException nfe) { LOG.error(String .format("Unable to deserialise Summary '%s' due to NumberFormatException", s)); throw new SerializationException(nfe); } } } aggregateData.add(new AggregateData(uniqueId, labels, dateValue, sumUpdates)); } return aggregateData; } catch (Exception e) { throw new SerializationException(e); } } /** Builder method to add the attribute which is the event unique id */ public JsonDataExtractor withUniqueIdAttribute(String uniqueIdAttribute) { this.uniqueIdAttribute = uniqueIdAttribute; return this; } /** * Builder Method to add the attribute name which contains the date value * for the stream item. * * @param dateValueAttribute * The attribute name which contains the date item. * @return */ public JsonDataExtractor withDateValueAttribute(String dateValueAttribute) { if (dateValueAttribute != null) this.dateValueAttribute = dateValueAttribute; return this; } /** * Builder method which allows adding a date format string which can be used * to convert the data value in the dateValueAttribute, if the value is a * string. * * @param dateFormat * Date Format in {@link java.text.SimpleDateFormat} form. * @return */ public JsonDataExtractor withDateFormat(String dateFormat) { if (dateFormat != null && !dateFormat.equals("")) { this.dateFormat = dateFormat; this.dateFormatter = new SimpleDateFormat(dateFormat); } return this; } public JsonDataExtractor withSerialiser(JsonSerializer serialiser) { this.serialiser = serialiser; return this; } /** * Builder method which allows for setting a list of summary attribute names * or expressions on the data extractor. * * @param summaryAttributes * List of summary attribute names or expressions which should be * extracted from the data * @return * @throws UnsupportedCalculationException */ public JsonDataExtractor withSummaryAttributes( List summaryAttributes) throws UnsupportedCalculationException { if (summaryAttributes != null) { this.aggregatorType = AggregatorType.SUM; this.summaryAttributes = summaryAttributes; this.summaryConfig = new SummaryConfiguration(summaryAttributes); } return this; } /** * Add a regular expression filter to this data extractor. When configured, * only string values which match the regular expression will be * deserialised and have data extracted from it. * * @param filterRegex * Regular expression which must match in order for data to be * subject to data extraction. * @return */ public JsonDataExtractor withRegexFilter(String filterRegex) { if (filterRegex != null) { this.serialiser.withFilterRegex(filterRegex); } return this; } /** * Add a non default item terminator. The default is "\n" * * @param lineTerminator * The characters used for delimiting lines of text * @return */ public JsonDataExtractor withItemTerminator(String lineTerminator) { if (lineTerminator != null) { this.serialiser.withItemTerminator(lineTerminator); } return this; } /** * {@inheritDoc} */ @Override public String getAggregateLabelName() { return this.labelName; } /** * {@inheritDoc} */ @Override public String getDateValueName() { return this.dateValueAttribute == null ? StreamAggregator.DEFAULT_DATE_VALUE : this.dateValueAttribute; } /** * {@inheritDoc} */ @Override public String getUniqueIdName() { return this.uniqueIdAttribute; } /** * {@inheritDoc} */ @Override public void validate() throws Exception { if (this.serialiser == null) { throw new InvalidConfigurationException( "Cannot create instance of JsonDataExtractor without an IKinesisSerialiser"); } if (this.aggregatorType.equals(AggregatorType.SUM) && this.summaryAttributes == null) { throw new InvalidConfigurationException( "Summary aggregators require configuration of Summary Attributes"); } } public IDataExtractor copy() throws Exception { return new JsonDataExtractor(this.labelAttributes, this.serialiser) .withDateFormat(this.dateFormat) .withDateValueAttribute(this.dateValueAttribute) .withSummaryAttributes(this.summaryAttributes); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/io/ObjectExtractor.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.io; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.services.kinesis.aggregators.AggregateData; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.InputEvent; import com.amazonaws.services.kinesis.aggregators.LabelSet; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils; import com.amazonaws.services.kinesis.aggregators.annotations.AnnotationProcessor; import com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException; import com.amazonaws.services.kinesis.aggregators.exception.SerializationException; import com.amazonaws.services.kinesis.aggregators.exception.UnsupportedCalculationException; import com.amazonaws.services.kinesis.aggregators.summary.SummaryConfiguration; import com.amazonaws.services.kinesis.aggregators.summary.SummaryElement; import com.amazonaws.services.kinesis.io.serializer.IKinesisSerializer; import com.amazonaws.services.kinesis.io.serializer.JsonSerializer; /** * IDataExtractor which supports extracting data from Objects via reflected * method signatures. */ public class ObjectExtractor extends AbstractDataExtractor implements IDataExtractor { @SuppressWarnings("rawtypes") private Class clazz; private String uniqueIdMethodName; private List aggregateLabelMethods; private Map aggregateLabelMethodMap = new LinkedHashMap<>(); private String aggregateLabelColumn, dateValueColumn, dateMethodName; private Method dateMethod, uniqueIdMethod; private Object eventDate; private Map sumValueMap; private Object summaryValue; private final Log LOG = LogFactory.getLog(ObjectExtractor.class); private Date dateValue; private Map sumUpdates = new HashMap<>(); private IKinesisSerializer serialiser; private List data; private boolean validated = false; private ObjectExtractor() { } public ObjectExtractor(Class clazz) throws Exception { AnnotationProcessor p = new AnnotationProcessor(clazz); this.aggregateLabelMethods = p.getLabelMethodNames(); for (String s : p.getLabelMethodNames()) { this.aggregateLabelMethodMap.put(s, p.getLabelMethods().get(s)); } LabelSet labels = LabelSet.fromStringKeys(this.aggregateLabelMethods); this.aggregateLabelColumn = StreamAggregatorUtils.methodToColumn(labels.getName()); this.dateMethodName = p.getDateMethodName(); this.dateValueColumn = StreamAggregatorUtils.methodToColumn(p.getDateMethodName()); this.dateMethod = p.getDateMethod(); this.sumValueMap = p.getSummaryMethods(); this.summaryConfig = p.getSummaryConfig(); this.clazz = clazz; this.serialiser = new JsonSerializer(clazz); } /** * Create an Object Extractor using Default serialisation for the class. * * @param aggregateLabelMethod The method to be used as the label for * aggregation. * @param clazz The base class used for deserialisation and accessed using * configured accessor methods. */ public ObjectExtractor(List aggregateLabelMethods, Class clazz) throws Exception { this(aggregateLabelMethods, clazz, null); } /** * Create an Object Extractor using indicated serialisation for the class. * * @param aggregateLabelMethod The method to be used as the label for * aggregation. * @param clazz The base class used for deserialisation and accessed using * configured accessor methods. * @param serialiser Instance of an ITransformer which converts between the * binary Kinesis format and the required Object format indicated by * the base class. */ public ObjectExtractor(List aggregateLabelMethodNames, Class clazz, IKinesisSerializer serialiser) throws Exception { this.clazz = clazz; if (serialiser == null) { this.serialiser = new JsonSerializer(clazz); } else { this.serialiser = serialiser; } if (aggregateLabelMethodNames == null || aggregateLabelMethodNames.size() == 0) { throw new InvalidConfigurationException( "Cannot Aggregate an Object without a Label Method"); } else { this.aggregateLabelMethods = aggregateLabelMethodNames; for (String s : aggregateLabelMethodNames) { Method m = clazz.getDeclaredMethod(s); m.setAccessible(true); this.aggregateLabelMethodMap.put(s, m); } } LabelSet labels = LabelSet.fromStringKeys(this.aggregateLabelMethods); this.aggregateLabelColumn = labels.getName(); } /** * {@inheritDoc} */ @Override public void validate() throws Exception { if (!validated) { // validate sum config if ((this.aggregatorType.equals(AggregatorType.SUM)) && this.sumValueMap == null) { throw new Exception( "Summary Aggregators require both a Label Field and a Value Field Set"); } if (this.aggregatorType.equals(AggregatorType.SUM)) { for (String s : this.sumValueMap.keySet()) { try { Method m = clazz.getDeclaredMethod(s); m.setAccessible(true); this.sumValueMap.put(s, m); } catch (NoSuchMethodException e) { LOG.error(e); throw e; } } } LOG.info(String.format("Object Extractor Configuration\n" + "Class: %s\n" + "Date Method: %s\n", this.clazz.getSimpleName(), this.dateMethodName)); validated = true; } } /** * {@inheritDoc} */ @Override public List getData(InputEvent event) throws SerializationException { if (!validated) { try { validate(); } catch (Exception e) { throw new SerializationException(e); } } try { List data = new ArrayList<>(); Object o = serialiser.toClass(event); // get the value of the reflected labels LabelSet labels = new LabelSet(); for (String key : this.aggregateLabelMethods) { labels.put(key, aggregateLabelMethodMap.get(key).invoke(o).toString()); } // get the unique ID value from the object String uniqueId = null; if (this.uniqueIdMethodName != null) { switch (this.uniqueIdMethodName) { case StreamAggregator.REF_PARTITION_KEY: uniqueId = event.getPartitionKey(); break; case StreamAggregator.REF_SEQUENCE: uniqueId = event.getSequenceNumber(); break; default: Object id = uniqueIdMethod.invoke(o); if (id != null) { uniqueId = id.toString(); } break; } } // get the date value from the object if (this.dateMethod != null) { eventDate = dateMethod.invoke(o); if (eventDate == null) { dateValue = new Date(System.currentTimeMillis()); } else { if (eventDate instanceof Date) { dateValue = (Date) eventDate; } else if (eventDate instanceof Long) { dateValue = new Date((Long) eventDate); } else { throw new Exception(String.format( "Cannot use data type %s for date value on event", eventDate.getClass().getSimpleName())); } } } // extract all summed values from the serialised object if (this.aggregatorType.equals(AggregatorType.SUM)) { // lift out the aggregated method value for (String s : this.sumValueMap.keySet()) { summaryValue = this.sumValueMap.get(s).invoke(o); if (summaryValue != null) { if (summaryValue instanceof Double) { sumUpdates.put(s, (Double) summaryValue); } else if (summaryValue instanceof Long) { sumUpdates.put(s, ((Long) summaryValue).doubleValue()); } else if (summaryValue instanceof Integer) { sumUpdates.put(s, ((Integer) summaryValue).doubleValue()); } else { String msg = String.format( "Unable to access Summary %s due to NumberFormatException", s); LOG.error(msg); throw new SerializationException(msg); } } } } data.add(new AggregateData(uniqueId, labels, dateValue, sumUpdates)); return data; } catch (Exception e) { throw new SerializationException(e); } } /** * Builder which allows for configuration a date method to be used as the * date item for aggregation. * * @param dateMethodName The name of the method which returns the date for * the event. * @return */ public ObjectExtractor withDateMethod(String dateMethodName) throws NoSuchMethodException { this.dateMethodName = dateMethodName; this.dateValueColumn = StreamAggregatorUtils.methodToColumn(dateMethodName); this.dateMethod = this.clazz.getDeclaredMethod(dateMethodName); this.dateMethod.setAccessible(true); return this; } public ObjectExtractor withUniqueIdMethod(String uniqueIdMethodName) throws NoSuchMethodException { this.uniqueIdMethodName = uniqueIdMethodName; switch (this.uniqueIdMethodName) { case StreamAggregator.REF_PARTITION_KEY: break; case StreamAggregator.REF_SEQUENCE: break; default: this.uniqueIdMethod = this.clazz.getDeclaredMethod(this.uniqueIdMethodName); break; } return this; } /** * Builder which allows associating a set of method names or expressions * against methods for use as summary aggregate values. * * @param summaryMethodName The method name or an expression against the * method name which will be used as summary aggregate values. For * instance, when an expression is used against a method, the format * is SummaryCalculation(methodName), for example: * sum(getObjectValue) * @return * @throws UnsupportedCalculationException */ public ObjectExtractor withSummaryMethods(List summaryMethodName) throws UnsupportedCalculationException { if (summaryMethodName != null) { this.aggregatorType = AggregatorType.SUM; if (this.sumValueMap == null) this.sumValueMap = new HashMap<>(); for (String s : summaryMethodName) { this.summaryConfig.withConfigItem(s); // parse the requested summary method name into a calculation // and name, as we require the method name directly SummaryElement e = new SummaryElement(s); this.sumValueMap.put(e.getStreamDataElement(), null); } } return this; } public ObjectExtractor withSummaryConfig(SummaryConfiguration config) { this.summaryConfig = config; if (this.sumValueMap == null) this.sumValueMap = new HashMap<>(); for (String s : this.summaryConfig.getItemSet()) { this.sumValueMap.put(s, null); } return this; } /** * Get the class which this object can extract data from * * @return */ @SuppressWarnings("rawtypes") public Class getClazz() { return this.clazz; } /** * {@inheritDoc} */ @Override public String getAggregateLabelName() { return this.aggregateLabelColumn; } /** * {@inheritDoc} */ @Override public String getDateValueName() { return this.dateValueColumn == null ? StreamAggregator.DEFAULT_DATE_VALUE : this.dateValueColumn; } public IDataExtractor copy() throws Exception { throw new UnsupportedOperationException(); } /** * {@inheritDoc} */ @Override public String getUniqueIdName() { if (this.uniqueIdMethod != null) { return StreamAggregatorUtils.methodToColumn(this.uniqueIdMethodName); } else { return null; } } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/io/RegexDataExtractor.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.io; import java.util.List; import com.amazonaws.services.kinesis.io.serializer.CsvSerializer; import com.amazonaws.services.kinesis.io.serializer.RegexSerializer; /** * IDataExtractor implementation which allows for extraction of data from * Streams formatted as Character Separated Values. Also optionally allows for * regular expression based filtering of the stream prior to aggregation. */ public class RegexDataExtractor extends StringDataExtractor implements IDataExtractor { private String regex; private RegexSerializer serialiser; /** * Create a new data extractor using the indicated index for the label value * to be aggregated on, and the regular expression used for data extraction * * @param labelIndex Index (base 0) of where in the CSV stream the label * value occurs * @param delimiter The character delimiter separating items in the stream * data. */ public RegexDataExtractor(String regex, List labelIndicies) { this(regex, labelIndicies, null, -1, null, null); } public RegexDataExtractor(String regex, List labelIndicies, int dateValueIndex) { this(regex, labelIndicies, null, dateValueIndex, null, null); } public RegexDataExtractor(String regex, List labelIndicies, String labelAttributeAlias, int dateValueIndex, String dateAttributeAlias, RegexSerializer serialiser) { this.regex = regex; super.labelIndicies = labelIndicies; super.labelAttributeAlias = labelAttributeAlias; super.dateAttributeAlias = dateAttributeAlias; if (dateValueIndex != -1) super.dateValueIndex = dateValueIndex; if (serialiser != null) { super.serialiser = serialiser; } else { super.serialiser = new RegexSerializer(regex); } } /** * Add a non default item terminator. The default is "\n" * * @param lineTerminator The characters used for delimiting lines of text * @return */ public RegexDataExtractor withItemTerminator(String lineTerminator) { if (lineTerminator != null) { this.serialiser.withItemTerminator(lineTerminator); super.serialiser = this.serialiser; } return this; } /** * Add a custom configured serialiser * * @param serialiser * @return */ public RegexDataExtractor withSerialiser(CsvSerializer serialiser) { super.serialiser = serialiser; return this; } /** * Builder method for adding a index to the extraction configuration which * indicates where the date item to be used for aggregation can be found. * * @param dateValueIndex The index value (base 0) in the CSV stream which * contains the date value. * @return */ public RegexDataExtractor withDateValueIndex(Integer dateValueIndex) { if (dateValueIndex != null) { this.dateValueIndex = dateValueIndex; } return this; } @Override public IDataExtractor copy() throws Exception { RegexDataExtractor dataExtractor = new RegexDataExtractor(this.regex, this.labelIndicies, super.labelAttributeAlias, this.dateValueIndex, super.dateAttributeAlias, this.serialiser).withSummaryIndicies(this.getOriginalSummaryExpressions()); return dataExtractor; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/io/StringDataExtractor.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.io; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.services.kinesis.aggregators.AggregateData; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.InputEvent; import com.amazonaws.services.kinesis.aggregators.LabelSet; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils; import com.amazonaws.services.kinesis.aggregators.exception.InvalidConfigurationException; import com.amazonaws.services.kinesis.aggregators.exception.SerializationException; import com.amazonaws.services.kinesis.aggregators.exception.UnsupportedCalculationException; import com.amazonaws.services.kinesis.aggregators.summary.SummaryElement; import com.amazonaws.services.kinesis.io.serializer.IKinesisSerializer; /** * IDataExtractor implementation which allows for extraction of data from * Streams formatted as Character Separated Values. Also optionally allows for * regular expression based filtering of the stream prior to aggregation. */ public class StringDataExtractor> extends AbstractDataExtractor implements IDataExtractor { protected List labelIndicies = new ArrayList<>(); private LabelSet labelSet; protected String labelAttributeAlias, dateAttributeAlias; private boolean usePartitionKeyForUnique = false; private boolean useSequenceForUnique = false; private int uniqueIdIndex = -1; protected int dateValueIndex = -1; private String dateFormat; private SimpleDateFormat dateFormatter; protected List originalSummaryExpressions = new ArrayList<>(); protected List summaryIndicies = new ArrayList<>(); protected Map sumUpdates; protected IKinesisSerializer>, byte[]> serialiser; private final Log LOG = LogFactory.getLog(StringDataExtractor.class); protected StringDataExtractor() { } /** * Validate that the Data Extractor is correctly configured. */ @Override public void validate() throws Exception { if (this.serialiser == null) { throw new InvalidConfigurationException( "Unable to create instance of StringDataExtractor without an IKinesisSerialiser"); } if (aggregatorType.equals(AggregatorType.SUM) && (this.summaryIndicies == null || this.summaryIndicies.size() == 0)) { throw new InvalidConfigurationException( "Summary type String Aggregators require a list of Summary Indicies"); } this.labelSet = LabelSet.fromIntegerKeys(this.labelIndicies); if (this.labelAttributeAlias != null) { this.labelSet.withAlias(this.labelAttributeAlias); } } /** * {@inheritDoc} */ @Override public List getData(InputEvent event) throws SerializationException { try { int summaryIndex = -1; String dateString; Date dateValue; List data = new ArrayList<>(); List> content = serialiser.toClass(event); for (List line : content) { if (line != null) { LabelSet labels = new LabelSet(); labels.withAlias(this.labelAttributeAlias); for (Integer key : this.labelIndicies) { labels.put("" + key, line.get(key)); } // get the unique index String uniqueId = null; if (this.usePartitionKeyForUnique) { uniqueId = event.getPartitionKey(); } else if (this.useSequenceForUnique) { uniqueId = event.getSequenceNumber(); } else { if (this.uniqueIdIndex != -1) { uniqueId = line.get(this.uniqueIdIndex); } } // get the date value from the line if (this.dateValueIndex != -1) { dateString = line.get(dateValueIndex); if (this.dateFormat != null) { dateValue = dateFormatter.parse(dateString); } else { // no formatter, so treat as epoch seconds try { dateValue = new Date(Long.parseLong(dateString)); } catch (Exception e) { LOG.error(String.format( "Unable to create Date Value element from item '%s' due to invalid format as Epoch Seconds", dateValueIndex)); throw new SerializationException(e); } } } else { dateValue = new Date(System.currentTimeMillis()); } // get the summed values if (this.aggregatorType.equals(AggregatorType.SUM)) { sumUpdates = new HashMap<>(); // get the positional sum items for (int i = 0; i < summaryIndicies.size(); i++) { summaryIndex = summaryIndicies.get(i); try { sumUpdates.put("" + summaryIndex, Double.parseDouble(line.get(summaryIndex))); } catch (NumberFormatException nfe) { LOG.error(String.format( "Unable to deserialise Summary '%s' due to NumberFormatException", i)); throw new SerializationException(nfe); } } } data.add(new AggregateData(uniqueId, labels, dateValue, sumUpdates)); } } return data; } catch (Exception e) { throw new SerializationException(e); } } /** * Builder method to add a date format (based on * {@link java.text.SimpleDateFormat} when the dateValueIndex item is a * string. * * @param dateFormat * @return */ @SuppressWarnings("unchecked") public T withDateFormat(String dateFormat) { if (dateFormat != null && !dateFormat.equals("")) { this.dateFormat = dateFormat; this.dateFormatter = new SimpleDateFormat(dateFormat); } return (T) this; } /** * Builder method to add a set of summary indicies or expressions to the * aggregation configuration. * * @param summaryIndicies List of integer values indicating positions in the * stream for summary values, or a list of strings indicating * expressions around positions which contain summary values to be * aggregated. If expressions using * {@link com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation} * are used, then the format is SummaryCalculation(index), for * example the sum of position 4 would be 'sum(4)' * @return * @throws UnsupportedCalculationException */ @SuppressWarnings("unchecked") public T withSummaryIndicies(List summaryIndicies) throws UnsupportedCalculationException { if (summaryIndicies != null) { for (Object o : summaryIndicies) { if (o instanceof Integer) { Integer i = (Integer) o; withSummaryIndex(i); } else if (o instanceof String) { String s = (String) o; withSummaryIndex(s); } else { throw new UnsupportedCalculationException(String.format( "Unable to generate calculation for %s Datatype", o.getClass().getSimpleName())); } } } return (T) this; } @SuppressWarnings("unchecked") public T withStringSummaryIndicies(List summaryIndicies) throws UnsupportedCalculationException { if (summaryIndicies != null) { for (String s : summaryIndicies) { withSummaryIndex(s); } } return (T) this; } @SuppressWarnings("unchecked") public T withIntegerSummaryIndicies(List summaryIndicies) throws UnsupportedCalculationException { if (summaryIndicies != null) { for (Integer i : summaryIndicies) { withSummaryIndex(i); } } return (T) this; } @SuppressWarnings("unchecked") public T withSummaryIndex(Integer index) { this.aggregatorType = AggregatorType.SUM; this.summaryIndicies.add(index); this.originalSummaryExpressions.add(index); try { this.summaryConfig.withConfigItem(String.format("sum(%s)", index)); } catch (UnsupportedCalculationException e) { } return (T) this; } @SuppressWarnings("unchecked") public T withSummaryIndex(String expression) throws UnsupportedCalculationException { this.aggregatorType = AggregatorType.SUM; if (this.summaryIndicies == null) { this.summaryIndicies = new ArrayList<>(); } SummaryElement e = new SummaryElement(expression); this.originalSummaryExpressions.add(expression); this.summaryIndicies.add(Integer.parseInt(e.getStreamDataElement())); this.summaryConfig.withConfigItem(expression); return (T) this; } public T withLabelAttributeAlias(String alias) { this.labelAttributeAlias = alias; return (T) this; } public T withUniqueIdIndex(String index) { switch (index) { case StreamAggregator.REF_PARTITION_KEY: this.usePartitionKeyForUnique = true; break; case StreamAggregator.REF_SEQUENCE: this.useSequenceForUnique = true; break; default: this.uniqueIdIndex = Integer.parseInt(index); break; } return (T) this; } public T withDateAttributeAlias(String alias) { this.dateAttributeAlias = alias; return (T) this; } /** * {@inheritDoc} */ public String getAggregateLabelName() { return this.labelSet.getName(); } /** * {@inheritDoc} */ @Override public String getUniqueIdName() { return "" + this.uniqueIdIndex; } /** * {@inheritDoc} */ public String getDateValueName() { return this.dateAttributeAlias != null ? this.dateAttributeAlias : "" + this.dateValueIndex; } public List getOriginalSummaryExpressions() { return this.originalSummaryExpressions; } public IDataExtractor copy() throws Exception { throw new UnsupportedOperationException(); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/io/serializer/CsvSerializer.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.io.serializer; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.regex.Pattern; import com.amazonaws.services.kinesis.aggregators.InputEvent; public class CsvSerializer extends StringSerializer implements IKinesisSerializer>, byte[]> { private String delimiter = ","; private String itemTerminator = "\n"; private String filterRegex; private String charset = "UTF-8"; private Pattern p; /** * Convert a Kinesis record into one or more String lists by tokenising the * parsed item by the delimiter */ public List> toClass(InputEvent event) throws IOException { List> outputData = new ArrayList<>(); List item = new ArrayList<>(); try { String[] lines; lines = super.getItems(event); // apply filters and tokenise by delimiter for (String line : lines) { if ((filterRegex != null && p.matcher(line).matches()) || filterRegex == null) { item = Arrays.asList(line.split(delimiter)); outputData.add(item); } } return outputData; } catch (Exception e) { throw new IOException(e); } } /** * Generate a byte stream in the supplied character set using the String * list of CSV items */ public byte[] fromClass(List> csv) throws IOException { StringBuffer ret = new StringBuffer(); StringBuffer sb = new StringBuffer(); for (List item : csv) { for (String s : item) { sb.append(s + this.delimiter); } ret.append(sb.substring(0, sb.length() - 1) + this.itemTerminator); sb = new StringBuffer(); } return SerializationUtils.safeReturnData(ret.substring(0, ret.length() - 1).getBytes( this.charset)); } /** * Builder method to apply a non-default field delimiter (default ',') * * @param delimiter * @return */ public CsvSerializer withFieldDelimiter(String delimiter) { this.delimiter = delimiter; return this; } /** * Builder method to apply a filtering regular expression to text based * serialisation operations * * @param regex * @return */ public CsvSerializer withFilterRegex(String regex) { this.filterRegex = regex; p = Pattern.compile(this.filterRegex); return this; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/io/serializer/IKinesisSerializer.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.io.serializer; import java.io.IOException; import com.amazonaws.services.kinesis.aggregators.InputEvent; /** * IKinesisSerialiser is used to transform data from a Record (byte array) to the data * model class (T) for processing in the application and from the data model * class to the output type (U) for the emitter. * * @param the data type stored in the record */ public interface IKinesisSerializer { /** * Transform record into an object of its original class. * * @param event raw intput event from the Kinesis stream * @return data as its original class * @throws IOException could not convert the record to a T */ public T toClass(InputEvent event) throws IOException; /** * Transform record from its original class to byte array. * * @param record data as its original class * @return data byte array */ public U fromClass(T record) throws IOException; } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/io/serializer/JavaSerializationSerializer.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.io.serializer; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.ObjectInput; import java.io.ObjectInputStream; import java.io.ObjectOutput; import java.io.ObjectOutputStream; import java.io.Serializable; import com.amazonaws.services.kinesis.aggregators.InputEvent; public class JavaSerializationSerializer implements IKinesisSerializer, Serializable { private static final long serialVersionUID = 2837410982374019823L; public Object toClass(InputEvent event) throws IOException { ByteArrayInputStream bis = new ByteArrayInputStream(event.getData()); ObjectInput in = null; try { in = new ObjectInputStream(bis); return in.readObject(); } catch (ClassNotFoundException e) { throw new IOException(e); } finally { try { bis.close(); } catch (IOException ex) { ; } try { if (in != null) { in.close(); } } catch (IOException ex) { } } } public byte[] fromClass(Object o) throws IOException { ByteArrayOutputStream bos = new ByteArrayOutputStream(); ObjectOutput out = null; try { out = new ObjectOutputStream(bos); out.writeObject(o); return SerializationUtils.safeReturnData(bos.toByteArray()); } finally { try { if (out != null) { out.close(); } } catch (IOException ex) { // ignore close exception } try { bos.close(); } catch (IOException ex) { // ignore close exception } } } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/io/serializer/JsonSerializer.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.io.serializer; import java.io.IOException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; import java.util.regex.Pattern; import com.amazonaws.services.kinesis.aggregators.InputEvent; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; @SuppressWarnings("rawtypes") /** * Class which handles serialising Object payloads using Jackson marshalling, or converts to string format if configured to support text based payloads */ public class JsonSerializer implements IKinesisSerializer { ObjectMapper mapper = new ObjectMapper(); String itemTerminator = null; Class clazz; private String filterRegex; private Pattern p; private String charset = "UTF-8"; /** * Construct a basic json data serialiser */ public JsonSerializer() { } /** * Construct a serialiser that is based on a densely packed recordset list * of items * * @param itemTerminator */ public JsonSerializer(String itemTerminator) { this.itemTerminator = itemTerminator; } /** * Construct a Serialiser which is class based * * @param clazz */ public JsonSerializer(Class clazz) { this.clazz = clazz; mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); } @SuppressWarnings("unchecked") /** * Method to generate either a class instance from a Kinesis Record, or a String which will be converted to JsonMap if we are serialising text based payloads */ public Object toClass(final InputEvent event) throws IOException { // Return a class object from the json, or if we have no class then // return a String list List jsonStringList = new ArrayList<>(); if (this.clazz == null) { if (this.itemTerminator != null) { // break up the json items as separate lines String[] items = new String(event.getData(), this.charset).split(this.itemTerminator); for (String item : items) { if (filterRegex == null || (filterRegex != null && p.matcher(item).matches())) { jsonStringList.add(item); } } return jsonStringList; } else { // single json object per record String item = new String(event.getData(), this.charset); if (filterRegex == null || (filterRegex != null && p.matcher(item).matches())) { jsonStringList.add(item); } return jsonStringList; } } else { // use jackson to serialise a class instance return mapper.readValue(event.getData(), clazz); } } /** * Convert a given object into the required binary representation, based * upon the serialiser config as either an object serialiser or a string * serialiser */ public byte[] fromClass(final Object o) throws IOException { if (this.clazz == null) { return SerializationUtils.safeReturnData(((String) o).getBytes(this.charset)); } else { return SerializationUtils.safeReturnData(mapper.writeValueAsBytes(o)); } } /** * Builder method to apply a filtering regular expression to text based * serialisation operations * * @param regex * @return */ public JsonSerializer withFilterRegex(String regex) { this.filterRegex = regex; p = Pattern.compile(this.filterRegex); return this; } /** * Builder method to apply a non-default character set to text based * serialisation operations (default UTF-8) * * @param charset * @return */ public JsonSerializer withCharset(String charset) { // test that this is a valid character set Charset test = Charset.forName(charset); // use it this.charset = charset; return this; } /** * Build method to apply a non-default item terminator (default \n) * * @param itemTerminator * @return */ public JsonSerializer withItemTerminator(String itemTerminator) { this.itemTerminator = itemTerminator; return this; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/io/serializer/RegexSerializer.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.io.serializer; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.amazonaws.services.kinesis.aggregators.InputEvent; public class RegexSerializer extends StringSerializer implements IKinesisSerializer>, byte[]> { private String regexPattern; private Pattern p; private Matcher m; public RegexSerializer(String regexPattern) { this.regexPattern = regexPattern; p = Pattern.compile(this.regexPattern); } public List> toClass(InputEvent event) throws IOException { List> output = new ArrayList<>(); String[] items; try { items = super.getItems(event); for (String s : items) { List elements = new ArrayList<>(); if (m == null) { m = p.matcher(s); } else { m.reset(s); } if (m.find() && m.groupCount() > 0) { for (int i = 1; i < m.groupCount() + 1; i++) { elements.add(m.group(i)); } output.add(elements); } } return output; } catch (Exception e) { throw new IOException(e); } } public byte[] fromClass(List> content) throws IOException { // Can't reverse engineer the original regex from a string list, so dont // try throw new IOException(new UnsupportedOperationException()); } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/io/serializer/SerializationUtils.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.io.serializer; import java.io.IOException; /** * Helper methods for managing Kinesis Serialisation * * @author meyersi */ public class SerializationUtils { /** * Ensure that the generated binary representation will conform to Kinesis * wire format requirements * * @param check * @return * @throws Exception */ // Kinesis Maximum Byte Length is 50KB public static final int maxObjectSize = 50 * 1024; public static byte[] safeReturnData(byte[] check) throws IOException { if (check.length > maxObjectSize) { throw new IOException(String.format( "Serialised byte length exceeds maximum length of %s", maxObjectSize)); } return check; } } ================================================ FILE: src/main/java/com/amazonaws/services/kinesis/io/serializer/StringSerializer.java ================================================ /** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.io.serializer; import java.nio.charset.Charset; import com.amazonaws.services.kinesis.aggregators.InputEvent; public abstract class StringSerializer> { protected String charset = "UTF-8"; protected String itemTerminator = "\n"; /** * Builder method to apply a non-default character set to text based * serialisation operations (default UTF-8) * * @param charset * @return */ @SuppressWarnings("unchecked") public T withCharset(String charset) { // test that this is a valid character set Charset test = Charset.forName(charset); // use it this.charset = charset; return (T) this; } /** * Build method to apply a non-default item terminator (default \n) * * @param itemTerminator * @return */ @SuppressWarnings("unchecked") public T withItemTerminator(String terminator) { this.itemTerminator = terminator; return (T) this; } protected String[] getItems(InputEvent event) throws Exception { // convert the content to a string in the supplied character set String content = new String(event.getData(), this.charset); // break into items using line terminator return content.split(this.itemTerminator); } }