[
  {
    "path": ".editorconfig",
    "content": "root = true\n[*]\nend_of_line = lf\ninsert_final_newline = true\ncharset = utf-8\nindent_style = space\nindent_size = 4\n"
  },
  {
    "path": ".gitignore",
    "content": "/target/\n/bin/\n*.class\n*.log\n.classpath\n.idea\n.wercker\nproject/target\nproject/project\nlib_managed*/\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.md",
    "content": "# Spark+DynamoDB\nPlug-and-play implementation of an Apache Spark custom data source for AWS DynamoDB.\n\nWe published a small article about the project, check it out here:\nhttps://www.audienceproject.com/blog/tech/sparkdynamodb-using-aws-dynamodb-data-source-apache-spark/\n\n## News\n\n* 2021-01-28: Added option `inferSchema=false` which is useful when writing to a table with many columns\n* 2020-07-23: Releasing version 1.1.0 which supports Spark 3.0.0 and Scala 2.12. Future releases will no longer be compatible with Scala 2.11 and Spark 2.x.x.\n* 2020-04-28: Releasing version 1.0.4. Includes support for assuming AWS roles through custom STS endpoint (credits @jhulten).\n* 2020-04-09: We are releasing version 1.0.3 of the Spark+DynamoDB connector. Added option to `delete` records (thank you @rhelmstetter). Fixes (thank you @juanyunism for #46).\n* 2019-11-25: We are releasing version 1.0.0 of the Spark+DynamoDB connector, which is based on the Spark Data Source V2 API. Out-of-the-box throughput calculations, parallelism and partition planning should now be more reliable. We have also pulled out the external dependency on Guava, which was causing a lot of compatibility issues.\n\n## Features\n\n- Distributed, parallel scan with lazy evaluation\n- Throughput control by rate limiting on target fraction of provisioned table/index capacity\n- Schema discovery to suit your needs\n  - Dynamic inference\n  - Static analysis of case class\n- Column and filter pushdown\n- Global secondary index support\n- Write support\n\n## Getting The Dependency\n\nThe library is available from [Maven Central](https://mvnrepository.com/artifact/com.audienceproject/spark-dynamodb). Add the dependency in SBT as ```\"com.audienceproject\" %% \"spark-dynamodb\" % \"latest\"```\n\nSpark is used in the library as a \"provided\" dependency, which means Spark has to be installed separately on the container where the application is running, such as is the case on AWS EMR.\n\n## Quick Start Guide\n\n### Scala\n```scala\nimport com.audienceproject.spark.dynamodb.implicits._\nimport org.apache.spark.sql.SparkSession\n\nval spark = SparkSession.builder().getOrCreate()\n\n// Load a DataFrame from a Dynamo table. Only incurs the cost of a single scan for schema inference.\nval dynamoDf = spark.read.dynamodb(\"SomeTableName\") // <-- DataFrame of Row objects with inferred schema.\n\n// Scan the table for the first 100 items (the order is arbitrary) and print them.\ndynamoDf.show(100)\n\n// write to some other table overwriting existing item with same keys\ndynamoDf.write.dynamodb(\"SomeOtherTable\")\n\n// Case class representing the items in our table.\nimport com.audienceproject.spark.dynamodb.attribute\ncase class Vegetable (name: String, color: String, @attribute(\"weight_kg\") weightKg: Double)\n\n// Load a Dataset[Vegetable]. Notice the @attribute annotation on the case class - we imagine the weight attribute is named with an underscore in DynamoDB.\nimport org.apache.spark.sql.functions._\nimport spark.implicits._\nval vegetableDs = spark.read.dynamodbAs[Vegetable](\"VegeTable\")\nval avgWeightByColor = vegetableDs.agg($\"color\", avg($\"weightKg\")) // The column is called 'weightKg' in the Dataset.\n```\n\n### Python\n```python\n# Load a DataFrame from a Dynamo table. Only incurs the cost of a single scan for schema inference.\ndynamoDf = spark.read.option(\"tableName\", \"SomeTableName\") \\\n                     .format(\"dynamodb\") \\\n                     .load() # <-- DataFrame of Row objects with inferred schema.\n\n# Scan the table for the first 100 items (the order is arbitrary) and print them.\ndynamoDf.show(100)\n\n# write to some other table overwriting existing item with same keys\ndynamoDf.write.option(\"tableName\", \"SomeOtherTable\") \\\n              .format(\"dynamodb\") \\\n              .save()\n```\n\n*Note:* When running from `pyspark` shell, you can add the library as:\n```bash\npyspark --packages com.audienceproject:spark-dynamodb_<spark-scala-version>:<version>\n```\n\n## Parameters\nThe following parameters can be set as options on the Spark reader and writer object before loading/saving.\n- `region` sets the region where the dynamodb table. Default is environment specific.\n- `roleArn` sets an IAM role to assume. This allows for access to a DynamoDB in a different account than the Spark cluster. Defaults to the standard role configuration.\n\nThe following parameters can be set as options on the Spark reader object before loading.\n\n- `readPartitions` number of partitions to split the initial RDD when loading the data into Spark. Defaults to the size of the DynamoDB table divided into chunks of `maxPartitionBytes`\n- `maxPartitionBytes` the maximum size of a single input partition. Default 128 MB\n- `defaultParallelism` the number of input partitions that can be read from DynamoDB simultaneously. Defaults to `sparkContext.defaultParallelism`\n- `targetCapacity` fraction of provisioned read capacity on the table (or index) to consume for reading. Default 1 (i.e. 100% capacity).\n- `stronglyConsistentReads` whether or not to use strongly consistent reads. Default false.\n- `bytesPerRCU` number of bytes that can be read per second with a single Read Capacity Unit. Default 4000 (4 KB). This value is multiplied by two when `stronglyConsistentReads=false`\n- `filterPushdown` whether or not to use filter pushdown to DynamoDB on scan requests. Default true.\n- `throughput` the desired read throughput to use. It overwrites any calculation used by the package. It is intended to be used with tables that are on-demand. Defaults to 100 for on-demand.\n\nThe following parameters can be set as options on the Spark writer object before saving.\n\n- `writeBatchSize` number of items to send per call to DynamoDB BatchWriteItem. Default 25.\n- `targetCapacity` fraction of provisioned write capacity on the table to consume for writing or updating. Default 1 (i.e. 100% capacity).\n- `update` if true items will be written using UpdateItem on keys rather than BatchWriteItem. Default false.\n- `throughput` the desired write throughput to use. It overwrites any calculation used by the package. It is intended to be used with tables that are on-demand. Defaults to 100 for on-demand.\n- `inferSchema` if false will not automatically infer schema - this is useful when writing to a table with many columns\n\n## System Properties\nThe following Java system properties are available for configuration.\n\n- `aws.profile` IAM profile to use for default credentials provider.\n- `aws.dynamodb.region` region in which to access the AWS APIs.\n- `aws.dynamodb.endpoint` endpoint to use for accessing the DynamoDB API.\n- `aws.sts.endpoint` endpoint to use for accessing the STS API when assuming the role indicated by the `roleArn` parameter.\n\n## Acknowledgements\nUsage of parallel scan and rate limiter inspired by work in https://github.com/traviscrawford/spark-dynamodb\n"
  },
  {
    "path": "build.sbt",
    "content": "organization := \"com.audienceproject\"\n\nname := \"spark-dynamodb\"\n\nversion := \"1.1.3\"\n\ndescription := \"Plug-and-play implementation of an Apache Spark custom data source for AWS DynamoDB.\"\n\nscalaVersion := \"2.12.12\"\n\ncompileOrder := CompileOrder.JavaThenScala\n\nresolvers += \"DynamoDBLocal\" at \"https://s3-us-west-2.amazonaws.com/dynamodb-local/release\"\n\nlibraryDependencies += \"com.amazonaws\" % \"aws-java-sdk-sts\" % \"1.11.678\"\nlibraryDependencies += \"com.amazonaws\" % \"aws-java-sdk-dynamodb\" % \"1.11.678\"\nlibraryDependencies += \"com.amazonaws\" % \"DynamoDBLocal\" % \"[1.11,2.0)\" % \"test\" exclude(\"com.google.guava\", \"guava\")\n\nlibraryDependencies += \"org.apache.spark\" %% \"spark-sql\" % \"3.0.0\" % \"provided\"\n\nlibraryDependencies += \"org.scalatest\" %% \"scalatest\" % \"3.0.5\" % \"test\"\n\nlibraryDependencies += \"org.slf4j\" % \"slf4j-api\" % \"1.7.25\"\n\nlibraryDependencies ++= {\n    val log4j2Version = \"2.11.1\"\n    Seq(\n        \"org.apache.logging.log4j\" % \"log4j-api\" % log4j2Version % \"test\",\n        \"org.apache.logging.log4j\" % \"log4j-core\" % log4j2Version % \"test\",\n        \"org.apache.logging.log4j\" % \"log4j-slf4j-impl\" % log4j2Version % \"test\"\n    )\n}\n\nlibraryDependencies += \"com.almworks.sqlite4java\" % \"sqlite4java\" % \"1.0.392\" % \"test\"\n\nretrieveManaged := true\n\nfork in Test := true\n\nval libManaged = \"lib_managed\"\nval libManagedSqlite = s\"${libManaged}_sqlite4java\"\n\njavaOptions in Test ++= Seq(s\"-Djava.library.path=./$libManagedSqlite\", \"-Daws.dynamodb.endpoint=http://localhost:8000\")\n\n/**\n  * Put all sqlite4java dependencies in [[libManagedSqlite]] for easy reference when configuring java.library.path.\n  */\nTest / resourceGenerators += Def.task {\n    import java.nio.file.{Files, Path}\n    import java.util.function.Predicate\n    import java.util.stream.Collectors\n    import scala.collection.JavaConverters._\n\n    def log(msg: Any): Unit = println(s\"[℣₳ℒ𐎅] $msg\") //stand out in the crowd\n\n    val theOnesWeLookFor = Set(\n        \"libsqlite4java-linux-amd64-1.0.392.so\",\n        \"libsqlite4java-linux-i386-1.0.392.so \",\n        \"libsqlite4java-osx-1.0.392.dylib     \",\n        \"sqlite4java-1.0.392.jar              \",\n        \"sqlite4java-win32-x64-1.0.392.dll    \",\n        \"sqlite4java-win32-x86-1.0.392.dll    \"\n    ).map(_.trim)\n\n    val isOneOfTheOnes = new Predicate[Path] {\n        override def test(p: Path) = theOnesWeLookFor exists (p endsWith _)\n    }\n\n    val theOnesWeCouldFind: Set[Path] = Files\n        .walk(new File(libManaged).toPath)\n        .filter(isOneOfTheOnes)\n        .collect(Collectors.toSet[Path])\n        .asScala.toSet\n\n    theOnesWeCouldFind foreach { path =>\n        log(s\"found: ${path.toFile.getName}\")\n    }\n\n    assert(theOnesWeCouldFind.size == theOnesWeLookFor.size)\n\n    val libManagedSqliteDir = new File(s\"$libManagedSqlite\")\n    sbt.IO delete libManagedSqliteDir\n    sbt.IO createDirectory libManagedSqliteDir\n    log(libManagedSqliteDir.getAbsolutePath)\n\n    theOnesWeCouldFind\n        .map { path =>\n            val source: File = path.toFile\n            val target: File = libManagedSqliteDir / source.getName\n            log(s\"copying from $source to $target\")\n            sbt.IO.copyFile(source, target)\n            target\n        }\n        .toSeq\n}.taskValue\n\n/**\n  * Maven specific settings for publishing to Maven central.\n  */\npublishMavenStyle := true\npublishArtifact in Test := false\npomIncludeRepository := { _ => false }\npublishTo := {\n    val nexus = \"https://oss.sonatype.org/\"\n    if (isSnapshot.value) Some(\"snapshots\" at nexus + \"content/repositories/snapshots\")\n    else Some(\"releases\" at nexus + \"service/local/staging/deploy/maven2\")\n}\npomExtra := <url>https://github.com/audienceproject/spark-dynamodb</url>\n    <licenses>\n        <license>\n            <name>Apache License, Version 2.0</name>\n            <url>https://opensource.org/licenses/apache-2.0</url>\n        </license>\n    </licenses>\n    <scm>\n        <url>git@github.com:audienceproject/spark-dynamodb.git</url>\n        <connection>scm:git:git//github.com/audienceproject/spark-dynamodb.git</connection>\n        <developerConnection>scm:git:ssh://github.com:audienceproject/spark-dynamodb.git</developerConnection>\n    </scm>\n    <developers>\n        <developer>\n            <id>jacobfi</id>\n            <name>Jacob Fischer</name>\n            <email>jacob.fischer@audienceproject.com</email>\n            <organization>AudienceProject</organization>\n            <organizationUrl>https://www.audienceproject.com</organizationUrl>\n        </developer>\n        <developer>\n            <id>johsbk</id>\n            <name>Johs Kristoffersen</name>\n            <email>johs.kristoffersen@audienceproject.com</email>\n            <organization>AudienceProject</organization>\n            <organizationUrl>https://www.audienceproject.com</organizationUrl>\n        </developer>\n    </developers>\n"
  },
  {
    "path": "project/build.properties",
    "content": "sbt.version = 1.2.6\n"
  },
  {
    "path": "project/plugins.sbt",
    "content": "logLevel := Level.Warn\n\naddSbtPlugin(\"com.jsuereth\" % \"sbt-pgp\" % \"1.1.0\")\naddSbtPlugin(\"com.typesafe.sbteclipse\" % \"sbteclipse-plugin\" % \"5.2.4\")\naddSbtPlugin(\"net.virtual-void\" % \"sbt-dependency-graph\" % \"0.9.2\")\n"
  },
  {
    "path": "src/main/java/com/audienceproject/shaded/google/common/base/Preconditions.java",
    "content": "package com.audienceproject.shaded.google.common.base;\n\n/*\n * Notice:\n * This file was modified at AudienceProject ApS by Cosmin Catalin Sanda (cosmin@audienceproject.com)\n */\n\n/*\n * Copyright (C) 2007 The Guava Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport java.util.NoSuchElementException;\n\nimport javax.annotation.Nullable;\n\n/**\n * Simple static methods to be called at the start of your own methods to verify\n * correct arguments and state. This allows constructs such as\n * <pre>\n *     if (count <= 0) {\n *       throw new IllegalArgumentException(\"must be positive: \" + count);\n *     }</pre>\n *\n * to be replaced with the more compact\n * <pre>\n *     checkArgument(count > 0, \"must be positive: %s\", count);</pre>\n *\n * Note that the sense of the expression is inverted; with {@code Preconditions}\n * you declare what you expect to be <i>true</i>, just as you do with an\n * <a href=\"http://java.sun.com/j2se/1.5.0/docs/guide/language/assert.html\">\n * {@code assert}</a> or a JUnit {@code assertTrue} call.\n *\n * <p><b>Warning:</b> only the {@code \"%s\"} specifier is recognized as a\n * placeholder in these messages, not the full range of {@link\n * String#format(String, Object[])} specifiers.\n *\n * <p>Take care not to confuse precondition checking with other similar types\n * of checks! Precondition exceptions -- including those provided here, but also\n * {@link IndexOutOfBoundsException}, {@link NoSuchElementException}, {@link\n * UnsupportedOperationException} and others -- are used to signal that the\n * <i>calling method</i> has made an error. This tells the caller that it should\n * not have invoked the method when it did, with the arguments it did, or\n * perhaps ever. Postcondition or other invariant failures should not throw\n * these types of exceptions.\n *\n * <p>See the Guava User Guide on <a href=\n * \"http://code.google.com/p/guava-libraries/wiki/PreconditionsExplained\">\n * using {@code Preconditions}</a>.\n *\n * @author Kevin Bourrillion\n * @since 2.0 (imported from Google Collections Library)\n */\npublic final class Preconditions {\n    private Preconditions() {}\n\n    /**\n     * Ensures the truth of an expression involving one or more parameters to the\n     * calling method.\n     *\n     * @param expression a boolean expression\n     * @throws IllegalArgumentException if {@code expression} is false\n     */\n    public static void checkArgument(boolean expression) {\n        if (!expression) {\n            throw new IllegalArgumentException();\n        }\n    }\n\n    /**\n     * Ensures the truth of an expression involving one or more parameters to the\n     * calling method.\n     *\n     * @param expression a boolean expression\n     * @param errorMessage the exception message to use if the check fails; will\n     *     be converted to a string using {@link String#valueOf(Object)}\n     * @throws IllegalArgumentException if {@code expression} is false\n     */\n    public static void checkArgument(\n        boolean expression, @Nullable Object errorMessage) {\n        if (!expression) {\n            throw new IllegalArgumentException(String.valueOf(errorMessage));\n        }\n    }\n\n    /**\n     * Ensures the truth of an expression involving one or more parameters to the\n     * calling method.\n     *\n     * @param expression a boolean expression\n     * @param errorMessageTemplate a template for the exception message should the\n     *     check fail. The message is formed by replacing each {@code %s}\n     *     placeholder in the template with an argument. These are matched by\n     *     position - the first {@code %s} gets {@code errorMessageArgs[0]}, etc.\n     *     Unmatched arguments will be appended to the formatted message in square\n     *     braces. Unmatched placeholders will be left as-is.\n     * @param errorMessageArgs the arguments to be substituted into the message\n     *     template. Arguments are converted to strings using\n     *     {@link String#valueOf(Object)}.\n     * @throws IllegalArgumentException if {@code expression} is false\n     * @throws NullPointerException if the check fails and either {@code\n     *     errorMessageTemplate} or {@code errorMessageArgs} is null (don't let\n     *     this happen)\n     */\n    public static void checkArgument(boolean expression,\n                                     @Nullable String errorMessageTemplate,\n                                     @Nullable Object... errorMessageArgs) {\n        if (!expression) {\n            throw new IllegalArgumentException(\n                format(errorMessageTemplate, errorMessageArgs));\n        }\n    }\n\n    /**\n     * Ensures the truth of an expression involving the state of the calling\n     * instance, but not involving any parameters to the calling method.\n     *\n     * @param expression a boolean expression\n     * @throws IllegalStateException if {@code expression} is false\n     */\n    public static void checkState(boolean expression) {\n        if (!expression) {\n            throw new IllegalStateException();\n        }\n    }\n\n    /**\n     * Ensures the truth of an expression involving the state of the calling\n     * instance, but not involving any parameters to the calling method.\n     *\n     * @param expression a boolean expression\n     * @param errorMessage the exception message to use if the check fails; will\n     *     be converted to a string using {@link String#valueOf(Object)}\n     * @throws IllegalStateException if {@code expression} is false\n     */\n    public static void checkState(\n        boolean expression, @Nullable Object errorMessage) {\n        if (!expression) {\n            throw new IllegalStateException(String.valueOf(errorMessage));\n        }\n    }\n\n    /**\n     * Ensures the truth of an expression involving the state of the calling\n     * instance, but not involving any parameters to the calling method.\n     *\n     * @param expression a boolean expression\n     * @param errorMessageTemplate a template for the exception message should the\n     *     check fail. The message is formed by replacing each {@code %s}\n     *     placeholder in the template with an argument. These are matched by\n     *     position - the first {@code %s} gets {@code errorMessageArgs[0]}, etc.\n     *     Unmatched arguments will be appended to the formatted message in square\n     *     braces. Unmatched placeholders will be left as-is.\n     * @param errorMessageArgs the arguments to be substituted into the message\n     *     template. Arguments are converted to strings using\n     *     {@link String#valueOf(Object)}.\n     * @throws IllegalStateException if {@code expression} is false\n     * @throws NullPointerException if the check fails and either {@code\n     *     errorMessageTemplate} or {@code errorMessageArgs} is null (don't let\n     *     this happen)\n     */\n    public static void checkState(boolean expression,\n                                  @Nullable String errorMessageTemplate,\n                                  @Nullable Object... errorMessageArgs) {\n        if (!expression) {\n            throw new IllegalStateException(\n                format(errorMessageTemplate, errorMessageArgs));\n        }\n    }\n\n    /**\n     * Ensures that an object reference passed as a parameter to the calling\n     * method is not null.\n     *\n     * @param reference an object reference\n     * @return the non-null reference that was validated\n     * @throws NullPointerException if {@code reference} is null\n     */\n    public static <T> T checkNotNull(T reference) {\n        if (reference == null) {\n            throw new NullPointerException();\n        }\n        return reference;\n    }\n\n    /**\n     * Ensures that an object reference passed as a parameter to the calling\n     * method is not null.\n     *\n     * @param reference an object reference\n     * @param errorMessage the exception message to use if the check fails; will\n     *     be converted to a string using {@link String#valueOf(Object)}\n     * @return the non-null reference that was validated\n     * @throws NullPointerException if {@code reference} is null\n     */\n    public static <T> T checkNotNull(T reference, @Nullable Object errorMessage) {\n        if (reference == null) {\n            throw new NullPointerException(String.valueOf(errorMessage));\n        }\n        return reference;\n    }\n\n    /**\n     * Ensures that an object reference passed as a parameter to the calling\n     * method is not null.\n     *\n     * @param reference an object reference\n     * @param errorMessageTemplate a template for the exception message should the\n     *     check fail. The message is formed by replacing each {@code %s}\n     *     placeholder in the template with an argument. These are matched by\n     *     position - the first {@code %s} gets {@code errorMessageArgs[0]}, etc.\n     *     Unmatched arguments will be appended to the formatted message in square\n     *     braces. Unmatched placeholders will be left as-is.\n     * @param errorMessageArgs the arguments to be substituted into the message\n     *     template. Arguments are converted to strings using\n     *     {@link String#valueOf(Object)}.\n     * @return the non-null reference that was validated\n     * @throws NullPointerException if {@code reference} is null\n     */\n    public static <T> T checkNotNull(T reference,\n                                     @Nullable String errorMessageTemplate,\n                                     @Nullable Object... errorMessageArgs) {\n        if (reference == null) {\n            // If either of these parameters is null, the right thing happens anyway\n            throw new NullPointerException(\n                format(errorMessageTemplate, errorMessageArgs));\n        }\n        return reference;\n    }\n\n    /*\n     * All recent hotspots (as of 2009) *really* like to have the natural code\n     *\n     * if (guardExpression) {\n     *    throw new BadException(messageExpression);\n     * }\n     *\n     * refactored so that messageExpression is moved to a separate\n     * String-returning method.\n     *\n     * if (guardExpression) {\n     *    throw new BadException(badMsg(...));\n     * }\n     *\n     * The alternative natural refactorings into void or Exception-returning\n     * methods are much slower.  This is a big deal - we're talking factors of\n     * 2-8 in microbenchmarks, not just 10-20%.  (This is a hotspot optimizer\n     * bug, which should be fixed, but that's a separate, big project).\n     *\n     * The coding pattern above is heavily used in java.util, e.g. in ArrayList.\n     * There is a RangeCheckMicroBenchmark in the JDK that was used to test this.\n     *\n     * But the methods in this class want to throw different exceptions,\n     * depending on the args, so it appears that this pattern is not directly\n     * applicable.  But we can use the ridiculous, devious trick of throwing an\n     * exception in the middle of the construction of another exception.\n     * Hotspot is fine with that.\n     */\n\n    /**\n     * Ensures that {@code index} specifies a valid <i>element</i> in an array,\n     * list or string of size {@code size}. An element index may range from zero,\n     * inclusive, to {@code size}, exclusive.\n     *\n     * @param index a user-supplied index identifying an element of an array, list\n     *     or string\n     * @param size the size of that array, list or string\n     * @return the value of {@code index}\n     * @throws IndexOutOfBoundsException if {@code index} is negative or is not\n     *     less than {@code size}\n     * @throws IllegalArgumentException if {@code size} is negative\n     */\n    public static int checkElementIndex(int index, int size) {\n        return checkElementIndex(index, size, \"index\");\n    }\n\n    /**\n     * Ensures that {@code index} specifies a valid <i>element</i> in an array,\n     * list or string of size {@code size}. An element index may range from zero,\n     * inclusive, to {@code size}, exclusive.\n     *\n     * @param index a user-supplied index identifying an element of an array, list\n     *     or string\n     * @param size the size of that array, list or string\n     * @param desc the text to use to describe this index in an error message\n     * @return the value of {@code index}\n     * @throws IndexOutOfBoundsException if {@code index} is negative or is not\n     *     less than {@code size}\n     * @throws IllegalArgumentException if {@code size} is negative\n     */\n    public static int checkElementIndex(\n        int index, int size, @Nullable String desc) {\n        // Carefully optimized for execution by hotspot (explanatory comment above)\n        if (index < 0 || index >= size) {\n            throw new IndexOutOfBoundsException(badElementIndex(index, size, desc));\n        }\n        return index;\n    }\n\n    private static String badElementIndex(int index, int size, String desc) {\n        if (index < 0) {\n            return format(\"%s (%s) must not be negative\", desc, index);\n        } else if (size < 0) {\n            throw new IllegalArgumentException(\"negative size: \" + size);\n        } else { // index >= size\n            return format(\"%s (%s) must be less than size (%s)\", desc, index, size);\n        }\n    }\n\n    /**\n     * Ensures that {@code index} specifies a valid <i>position</i> in an array,\n     * list or string of size {@code size}. A position index may range from zero\n     * to {@code size}, inclusive.\n     *\n     * @param index a user-supplied index identifying a position in an array, list\n     *     or string\n     * @param size the size of that array, list or string\n     * @return the value of {@code index}\n     * @throws IndexOutOfBoundsException if {@code index} is negative or is\n     *     greater than {@code size}\n     * @throws IllegalArgumentException if {@code size} is negative\n     */\n    public static int checkPositionIndex(int index, int size) {\n        return checkPositionIndex(index, size, \"index\");\n    }\n\n    /**\n     * Ensures that {@code index} specifies a valid <i>position</i> in an array,\n     * list or string of size {@code size}. A position index may range from zero\n     * to {@code size}, inclusive.\n     *\n     * @param index a user-supplied index identifying a position in an array, list\n     *     or string\n     * @param size the size of that array, list or string\n     * @param desc the text to use to describe this index in an error message\n     * @return the value of {@code index}\n     * @throws IndexOutOfBoundsException if {@code index} is negative or is\n     *     greater than {@code size}\n     * @throws IllegalArgumentException if {@code size} is negative\n     */\n    public static int checkPositionIndex(\n        int index, int size, @Nullable String desc) {\n        // Carefully optimized for execution by hotspot (explanatory comment above)\n        if (index < 0 || index > size) {\n            throw new IndexOutOfBoundsException(badPositionIndex(index, size, desc));\n        }\n        return index;\n    }\n\n    private static String badPositionIndex(int index, int size, String desc) {\n        if (index < 0) {\n            return format(\"%s (%s) must not be negative\", desc, index);\n        } else if (size < 0) {\n            throw new IllegalArgumentException(\"negative size: \" + size);\n        } else { // index > size\n            return format(\"%s (%s) must not be greater than size (%s)\",\n                desc, index, size);\n        }\n    }\n\n    /**\n     * Ensures that {@code start} and {@code end} specify a valid <i>positions</i>\n     * in an array, list or string of size {@code size}, and are in order. A\n     * position index may range from zero to {@code size}, inclusive.\n     *\n     * @param start a user-supplied index identifying a starting position in an\n     *     array, list or string\n     * @param end a user-supplied index identifying a ending position in an array,\n     *     list or string\n     * @param size the size of that array, list or string\n     * @throws IndexOutOfBoundsException if either index is negative or is\n     *     greater than {@code size}, or if {@code end} is less than {@code start}\n     * @throws IllegalArgumentException if {@code size} is negative\n     */\n    public static void checkPositionIndexes(int start, int end, int size) {\n        // Carefully optimized for execution by hotspot (explanatory comment above)\n        if (start < 0 || end < start || end > size) {\n            throw new IndexOutOfBoundsException(badPositionIndexes(start, end, size));\n        }\n    }\n\n    private static String badPositionIndexes(int start, int end, int size) {\n        if (start < 0 || start > size) {\n            return badPositionIndex(start, size, \"start index\");\n        }\n        if (end < 0 || end > size) {\n            return badPositionIndex(end, size, \"end index\");\n        }\n        // end < start\n        return format(\"end index (%s) must not be less than start index (%s)\",\n            end, start);\n    }\n\n    /**\n     * Substitutes each {@code %s} in {@code template} with an argument. These\n     * are matched by position - the first {@code %s} gets {@code args[0]}, etc.\n     * If there are more arguments than placeholders, the unmatched arguments will\n     * be appended to the end of the formatted message in square braces.\n     *\n     * @param template a non-null string containing 0 or more {@code %s}\n     *     placeholders.\n     * @param args the arguments to be substituted into the message\n     *     template. Arguments are converted to strings using\n     *     {@link String#valueOf(Object)}. Arguments can be null.\n     */\n    static String format(String template,\n                                            @Nullable Object... args) {\n        template = String.valueOf(template); // null -> \"null\"\n\n        // start substituting the arguments into the '%s' placeholders\n        StringBuilder builder = new StringBuilder(\n            template.length() + 16 * args.length);\n        int templateStart = 0;\n        int i = 0;\n        while (i < args.length) {\n            int placeholderStart = template.indexOf(\"%s\", templateStart);\n            if (placeholderStart == -1) {\n                break;\n            }\n            builder.append(template.substring(templateStart, placeholderStart));\n            builder.append(args[i++]);\n            templateStart = placeholderStart + 2;\n        }\n        builder.append(template.substring(templateStart));\n\n        // if we run out of placeholders, append the extra args in square braces\n        if (i < args.length) {\n            builder.append(\" [\");\n            builder.append(args[i++]);\n            while (i < args.length) {\n                builder.append(\", \");\n                builder.append(args[i++]);\n            }\n            builder.append(']');\n        }\n\n        return builder.toString();\n    }\n}\n\n"
  },
  {
    "path": "src/main/java/com/audienceproject/shaded/google/common/base/Ticker.java",
    "content": "package com.audienceproject.shaded.google.common.base;\n\n/*\n * Notice:\n * This file was modified at AudienceProject ApS by Cosmin Catalin Sanda (cosmin@audienceproject.com)\n */\n\n/*\n * Copyright (C) 2011 The Guava Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n/**\n * A time source; returns a time value representing the number of nanoseconds elapsed since some\n * fixed but arbitrary point in time.\n *\n * <p><b>Warning:</b> this interface can only be used to measure elapsed time, not wall time.\n *\n * @author Kevin Bourrillion\n * @since 10.0\n *     (<a href=\"http://code.google.com/p/guava-libraries/wiki/Compatibility\"\n *     >mostly source-compatible</a> since 9.0)\n */\npublic abstract class Ticker {\n    /**\n     * Constructor for use by subclasses.\n     */\n    protected Ticker() {}\n\n    /**\n     * Returns the number of nanoseconds elapsed since this ticker's fixed\n     * point of reference.\n     */\n    public abstract long read();\n\n    /**\n     * A ticker that reads the current time using {@link System#nanoTime}.\n     *\n     * @since 10.0\n     */\n    public static Ticker systemTicker() {\n        return SYSTEM_TICKER;\n    }\n\n    private static final Ticker SYSTEM_TICKER = new Ticker() {\n        @Override\n        public long read() {\n            return System.nanoTime();\n        }\n    };\n}\n\n"
  },
  {
    "path": "src/main/java/com/audienceproject/shaded/google/common/util/concurrent/RateLimiter.java",
    "content": "package com.audienceproject.shaded.google.common.util.concurrent;\n\n/*\n * Notice:\n * This file was modified at AudienceProject ApS by Cosmin Catalin Sanda (cosmin@audienceproject.com)\n */\n\n/*\n * Copyright (C) 2012 The Guava Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport com.audienceproject.shaded.google.common.base.Preconditions;\nimport com.audienceproject.shaded.google.common.base.Ticker;\n\nimport javax.annotation.concurrent.ThreadSafe;\nimport java.util.concurrent.TimeUnit;\n\n/**\n  * A rate limiter. Conceptually, a rate limiter distributes permits at a\n  * configurable rate. Each {@link #acquire()} blocks if necessary until a permit is\n  * available, and then takes it. Once acquired, permits need not be released.\n  *\n  * <p>Rate limiters are often used to restrict the rate at which some\n  * physical or logical resource is accessed. This is in contrast to {@link\n  * java.util.concurrent.Semaphore} which restricts the number of concurrent\n  * accesses instead of the rate (note though that concurrency and rate are closely related,\n  * e.g. see <a href=\"http://en.wikipedia.org/wiki/Little's_law\">Little's Law</a>).\n  *\n  * <p>A {@code RateLimiter} is defined primarily by the rate at which permits\n  * are issued. Absent additional configuration, permits will be distributed at a\n  * fixed rate, defined in terms of permits per second. Permits will be distributed\n  * smoothly, with the delay between individual permits being adjusted to ensure\n  * that the configured rate is maintained.\n  *\n  * <p>It is possible to configure a {@code RateLimiter} to have a warmup\n  * period during which time the permits issued each second steadily increases until\n  * it hits the stable rate.\n  *\n  * <p>As an example, imagine that we have a list of tasks to execute, but we don't want to\n  * submit more than 2 per second:\n  *<pre>  {@code\n  *  final RateLimiter rateLimiter = RateLimiter.create(2.0); // rate is \"2 permits per second\"\n *  void submitTasks(List<Runnable> tasks, Executor executor) {\n *    for (Runnable task : tasks) {\n *      rateLimiter.acquire(); // may wait\n *      executor.execute(task);\n *    }\n  *  }\n  *}</pre>\n  *\n  * <p>As another example, imagine that we produce a stream of data, and we want to cap it\n  * at 5kb per second. This could be accomplished by requiring a permit per byte, and specifying\n  * a rate of 5000 permits per second:\n  *<pre>  {@code\n  *  final RateLimiter rateLimiter = RateLimiter.create(5000.0); // rate = 5000 permits per second\n *  void submitPacket(byte[] packet) {\n *    rateLimiter.acquire(packet.length);\n *    networkService.send(packet);\n *  }\n  *}</pre>\n  *\n  * <p>It is important to note that the number of permits requested <i>never</i>\n  * affect the throttling of the request itself (an invocation to {@code acquire(1)}\n  * and an invocation to {@code acquire(1000)} will result in exactly the same throttling, if any),\n  * but it affects the throttling of the <i>next</i> request. I.e., if an expensive task\n  * arrives at an idle RateLimiter, it will be granted immediately, but it is the <i>next</i>\n  * request that will experience extra throttling, thus paying for the cost of the expensive\n  * task.\n  *\n  * <p>Note: {@code RateLimiter} does not provide fairness guarantees.\n  *\n  * @author Dimitris Andreou\n  * @since 13.0\n  */\n// TODO(user): switch to nano precision. A natural unit of cost is \"bytes\", and a micro precision\n//     would mean a maximum rate of \"1MB/s\", which might be small in some cases.\n@ThreadSafe\npublic abstract class RateLimiter {\n    /*\n     * How is the RateLimiter designed, and why?\n     *\n     * The primary feature of a RateLimiter is its \"stable rate\", the maximum rate that\n     * is should allow at normal conditions. This is enforced by \"throttling\" incoming\n     * requests as needed, i.e. compute, for an incoming request, the appropriate throttle time,\n     * and make the calling thread wait as much.\n     *\n     * The simplest way to maintain a rate of QPS is to keep the timestamp of the last\n     * granted request, and ensure that (1/QPS) seconds have elapsed since then. For example,\n     * for a rate of QPS=5 (5 tokens per second), if we ensure that a request isn't granted\n     * earlier than 200ms after the the last one, then we achieve the intended rate.\n     * If a request comes and the last request was granted only 100ms ago, then we wait for\n     * another 100ms. At this rate, serving 15 fresh permits (i.e. for an acquire(15) request)\n     * naturally takes 3 seconds.\n     *\n     * It is important to realize that such a RateLimiter has a very superficial memory\n     * of the past: it only remembers the last request. What if the RateLimiter was unused for\n     * a long period of time, then a request arrived and was immediately granted?\n     * This RateLimiter would immediately forget about that past underutilization. This may\n     * result in either underutilization or overflow, depending on the real world consequences\n     * of not using the expected rate.\n     *\n     * Past underutilization could mean that excess resources are available. Then, the RateLimiter\n     * should speed up for a while, to take advantage of these resources. This is important\n     * when the rate is applied to networking (limiting bandwidth), where past underutilization\n     * typically translates to \"almost empty buffers\", which can be filled immediately.\n     *\n     * On the other hand, past underutilization could mean that \"the server responsible for\n     * handling the request has become less ready for future requests\", i.e. its caches become\n     * stale, and requests become more likely to trigger expensive operations (a more extreme\n     * case of this example is when a server has just booted, and it is mostly busy with getting\n     * itself up to speed).\n     *\n     * To deal with such scenarios, we add an extra dimension, that of \"past underutilization\",\n     * modeled by \"storedPermits\" variable. This variable is zero when there is no\n     * underutilization, and it can grow up to maxStoredPermits, for sufficiently large\n     * underutilization. So, the requested permits, by an invocation acquire(permits),\n     * are served from:\n     * - stored permits (if available)\n     * - fresh permits (for any remaining permits)\n     *\n     * How this works is best explained with an example:\n     *\n     * For a RateLimiter that produces 1 token per second, every second\n     * that goes by with the RateLimiter being unused, we increase storedPermits by 1.\n     * Say we leave the RateLimiter unused for 10 seconds (i.e., we expected a request at time\n     * X, but we are at time X + 10 seconds before a request actually arrives; this is\n     * also related to the point made in the last paragraph), thus storedPermits\n     * becomes 10.0 (assuming maxStoredPermits >= 10.0). At that point, a request of acquire(3)\n     * arrives. We serve this request out of storedPermits, and reduce that to 7.0 (how this is\n     * translated to throttling time is discussed later). Immediately after, assume that an\n     * acquire(10) request arriving. We serve the request partly from storedPermits,\n     * using all the remaining 7.0 permits, and the remaining 3.0, we serve them by fresh permits\n     * produced by the rate limiter.\n     *\n     * We already know how much time it takes to serve 3 fresh permits: if the rate is\n     * \"1 token per second\", then this will take 3 seconds. But what does it mean to serve 7\n     * stored permits? As explained above, there is no unique answer. If we are primarily\n     * interested to deal with underutilization, then we want stored permits to be given out\n     * /faster/ than fresh ones, because underutilization = free resources for the taking.\n     * If we are primarily interested to deal with overflow, then stored permits could\n     * be given out /slower/ than fresh ones. Thus, we require a (different in each case)\n     * function that translates storedPermits to throtting time.\n     *\n     * This role is played by storedPermitsToWaitTime(double storedPermits, double permitsToTake).\n     * The underlying model is a continuous function mapping storedPermits\n     * (from 0.0 to maxStoredPermits) onto the 1/rate (i.e. intervals) that is effective at the given\n     * storedPermits. \"storedPermits\" essentially measure unused time; we spend unused time\n     * buying/storing permits. Rate is \"permits / time\", thus \"1 / rate = time / permits\".\n     * Thus, \"1/rate\" (time / permits) times \"permits\" gives time, i.e., integrals on this\n     * function (which is what storedPermitsToWaitTime() computes) correspond to minimum intervals\n     * between subsequent requests, for the specified number of requested permits.\n     *\n     * Here is an example of storedPermitsToWaitTime:\n     * If storedPermits == 10.0, and we want 3 permits, we take them from storedPermits,\n     * reducing them to 7.0, and compute the throttling for these as a call to\n     * storedPermitsToWaitTime(storedPermits = 10.0, permitsToTake = 3.0), which will\n     * evaluate the integral of the function from 7.0 to 10.0.\n     *\n     * Using integrals guarantees that the effect of a single acquire(3) is equivalent\n     * to { acquire(1); acquire(1); acquire(1); }, or { acquire(2); acquire(1); }, etc,\n     * since the integral of the function in [7.0, 10.0] is equivalent to the sum of the\n     * integrals of [7.0, 8.0], [8.0, 9.0], [9.0, 10.0] (and so on), no matter\n     * what the function is. This guarantees that we handle correctly requests of varying weight\n     * (permits), /no matter/ what the actual function is - so we can tweak the latter freely.\n     * (The only requirement, obviously, is that we can compute its integrals).\n     *\n     * Note well that if, for this function, we chose a horizontal line, at height of exactly\n     * (1/QPS), then the effect of the function is non-existent: we serve storedPermits at\n     * exactly the same cost as fresh ones (1/QPS is the cost for each). We use this trick later.\n     *\n     * If we pick a function that goes /below/ that horizontal line, it means that we reduce\n     * the area of the function, thus time. Thus, the RateLimiter becomes /faster/ after a\n     * period of underutilization. If, on the other hand, we pick a function that\n     * goes /above/ that horizontal line, then it means that the area (time) is increased,\n     * thus storedPermits are more costly than fresh permits, thus the RateLimiter becomes\n     * /slower/ after a period of underutilization.\n     *\n     * Last, but not least: consider a RateLimiter with rate of 1 permit per second, currently\n     * completely unused, and an expensive acquire(100) request comes. It would be nonsensical\n     * to just wait for 100 seconds, and /then/ start the actual task. Why wait without doing\n     * anything? A much better approach is to /allow/ the request right away (as if it was an\n     * acquire(1) request instead), and postpone /subsequent/ requests as needed. In this version,\n     * we allow starting the task immediately, and postpone by 100 seconds future requests,\n     * thus we allow for work to get done in the meantime instead of waiting idly.\n     *\n     * This has important consequences: it means that the RateLimiter doesn't remember the time\n     * of the _last_ request, but it remembers the (expected) time of the _next_ request. This\n     * also enables us to tell immediately (see tryAcquire(timeout)) whether a particular\n     * timeout is enough to get us to the point of the next scheduling time, since we always\n     * maintain that. And what we mean by \"an unused RateLimiter\" is also defined by that\n     * notion: when we observe that the \"expected arrival time of the next request\" is actually\n     * in the past, then the difference (now - past) is the amount of time that the RateLimiter\n     * was formally unused, and it is that amount of time which we translate to storedPermits.\n     * (We increase storedPermits with the amount of permits that would have been produced\n     * in that idle time). So, if rate == 1 permit per second, and arrivals come exactly\n     * one second after the previous, then storedPermits is _never_ increased -- we would only\n     * increase it for arrivals _later_ than the expected one second.\n     */\n\n    /**\n      * Creates a {@code RateLimiter} with the specified stable throughput, given as\n      * \"permits per second\" (commonly referred to as <i>QPS</i>, queries per second).\n      *\n      * <p>The returned {@code RateLimiter} ensures that on average no more than {@code\n      * permitsPerSecond} are issued during any given second, with sustained requests\n      * being smoothly spread over each second. When the incoming request rate exceeds\n      * {@code permitsPerSecond} the rate limiter will release one permit every {@code\n      * (1.0 / permitsPerSecond)} seconds. When the rate limiter is unused,\n      * bursts of up to {@code permitsPerSecond} permits will be allowed, with subsequent\n      * requests being smoothly limited at the stable rate of {@code permitsPerSecond}.\n      *\n      * @param permitsPerSecond the rate of the returned {@code RateLimiter}, measured in\n      *        how many permits become available per second.\n      */\n    public static RateLimiter create(double permitsPerSecond) {\n        return create(SleepingTicker.SYSTEM_TICKER, permitsPerSecond);\n    }\n\n    static RateLimiter create(SleepingTicker ticker, double permitsPerSecond) {\n        RateLimiter rateLimiter = new Bursty(ticker);\n        rateLimiter.setRate(permitsPerSecond);\n        return rateLimiter;\n    }\n\n    /**\n      * Creates a {@code RateLimiter} with the specified stable throughput, given as\n      * \"permits per second\" (commonly referred to as <i>QPS</i>, queries per second), and a\n      * <i>warmup period</i>, during which the {@code RateLimiter} smoothly ramps up its rate,\n      * until it reaches its maximum rate at the end of the period (as long as there are enough\n      * requests to saturate it). Similarly, if the {@code RateLimiter} is left <i>unused</i> for\n      * a duration of {@code warmupPeriod}, it will gradually return to its \"cold\" state,\n      * i.e. it will go through the same warming up process as when it was first created.\n      *\n      * <p>The returned {@code RateLimiter} is intended for cases where the resource that actually\n      * fulfils the requests (e.g., a remote server) needs \"warmup\" time, rather than\n      * being immediately accessed at the stable (maximum) rate.\n      *\n      * <p>The returned {@code RateLimiter} starts in a \"cold\" state (i.e. the warmup period\n      * will follow), and if it is left unused for long enough, it will return to that state.\n      *\n      * @param permitsPerSecond the rate of the returned {@code RateLimiter}, measured in\n      *        how many permits become available per second\n      * @param warmupPeriod the duration of the period where the {@code RateLimiter} ramps up its\n      *        rate, before reaching its stable (maximum) rate\n      * @param unit the time unit of the warmupPeriod argument\n      */\n    // TODO(user): add a burst size of 1-second-worth of permits, as in the metronome?\n    public static RateLimiter create(double permitsPerSecond, long warmupPeriod, TimeUnit unit) {\n        return create(SleepingTicker.SYSTEM_TICKER, permitsPerSecond, warmupPeriod, unit);\n    }\n\n    static RateLimiter create(\n        SleepingTicker ticker, double permitsPerSecond, long warmupPeriod, TimeUnit timeUnit) {\n        RateLimiter rateLimiter = new WarmingUp(ticker, warmupPeriod, timeUnit);\n        rateLimiter.setRate(permitsPerSecond);\n        return rateLimiter;\n    }\n\n    static RateLimiter createBursty(\n        SleepingTicker ticker, double permitsPerSecond, int maxBurstSize) {\n        Bursty rateLimiter = new Bursty(ticker);\n        rateLimiter.setRate(permitsPerSecond);\n        rateLimiter.maxPermits = maxBurstSize;\n        return rateLimiter;\n    }\n\n    /**\n      * The underlying timer; used both to measure elapsed time and sleep as necessary. A separate\n      * object to facilitate testing.\n      */\n    private final SleepingTicker ticker;\n\n    /**\n      * The timestamp when the RateLimiter was created; used to avoid possible overflow/time-wrapping\n      * errors.\n      */\n    private final long offsetNanos;\n\n    /**\n      * The currently stored permits.\n      */\n    double storedPermits;\n\n    /**\n      * The maximum number of stored permits.\n      */\n    double maxPermits;\n\n    /**\n      * The interval between two unit requests, at our stable rate. E.g., a stable rate of 5 permits\n      * per second has a stable interval of 200ms.\n      */\n    volatile double stableIntervalMicros;\n\n    private final Object mutex = new Object();\n\n    /**\n      * The time when the next request (no matter its size) will be granted. After granting a request,\n      * this is pushed further in the future. Large requests push this further than small requests.\n      */\n    private long nextFreeTicketMicros = 0L; // could be either in the past or future\n\n    private RateLimiter(SleepingTicker ticker) {\n        this.ticker = ticker;\n        this.offsetNanos = ticker.read();\n    }\n\n    /**\n      * Updates the stable rate of this {@code RateLimiter}, that is, the\n      * {@code permitsPerSecond} argument provided in the factory method that\n      * constructed the {@code RateLimiter}. Currently throttled threads will <b>not</b>\n      * be awakened as a result of this invocation, thus they do not observe the new rate;\n      * only subsequent requests will.\n      *\n      * <p>Note though that, since each request repays (by waiting, if necessary) the cost\n      * of the <i>previous</i> request, this means that the very next request\n      * after an invocation to {@code setRate} will not be affected by the new rate;\n      * it will pay the cost of the previous request, which is in terms of the previous rate.\n      *\n      * <p>The behavior of the {@code RateLimiter} is not modified in any other way,\n      * e.g. if the {@code RateLimiter} was configured with a warmup period of 20 seconds,\n      * it still has a warmup period of 20 seconds after this method invocation.\n      *\n      * @param permitsPerSecond the new stable rate of this {@code RateLimiter}.\n      */\n    public final void setRate(double permitsPerSecond) {\n        Preconditions.checkArgument(permitsPerSecond > 0.0\n            && !Double.isNaN(permitsPerSecond), \"rate must be positive\");\n        synchronized (mutex) {\n            resync(readSafeMicros());\n            double stableIntervalMicros = TimeUnit.SECONDS.toMicros(1L) / permitsPerSecond;\n            this.stableIntervalMicros = stableIntervalMicros;\n            doSetRate(permitsPerSecond, stableIntervalMicros);\n        }\n    }\n\n    abstract void doSetRate(double permitsPerSecond, double stableIntervalMicros);\n\n    /**\n      * Returns the stable rate (as {@code permits per seconds}) with which this\n      * {@code RateLimiter} is configured with. The initial value of this is the same as\n      * the {@code permitsPerSecond} argument passed in the factory method that produced\n      * this {@code RateLimiter}, and it is only updated after invocations\n      * to {@linkplain #setRate}.\n      */\n    public final double getRate() {\n        return TimeUnit.SECONDS.toMicros(1L) / stableIntervalMicros;\n    }\n\n    /**\n      * Acquires a permit from this {@code RateLimiter}, blocking until the request can be granted.\n      *\n      * <p>This method is equivalent to {@code acquire(1)}.\n      */\n    public void acquire() {\n        acquire(1);\n    }\n\n    /**\n      * Acquires the given number of permits from this {@code RateLimiter}, blocking until the\n      * request be granted.\n      *\n      * @param permits the number of permits to acquire\n      */\n    public void acquire(int permits) {\n        checkPermits(permits);\n        long microsToWait;\n        synchronized (mutex) {\n            microsToWait = reserveNextTicket(permits, readSafeMicros());\n        }\n        ticker.sleepMicrosUninterruptibly(microsToWait);\n    }\n\n    /**\n      * Acquires a permit from this {@code RateLimiter} if it can be obtained\n      * without exceeding the specified {@code timeout}, or returns {@code false}\n      * immediately (without waiting) if the permit would not have been granted\n      * before the timeout expired.\n      *\n      * <p>This method is equivalent to {@code tryAcquire(1, timeout, unit)}.\n      *\n      * @param timeout the maximum time to wait for the permit\n      * @param unit the time unit of the timeout argument\n      * @return {@code true} if the permit was acquired, {@code false} otherwise\n      */\n    public boolean tryAcquire(long timeout, TimeUnit unit) {\n        return tryAcquire(1, timeout, unit);\n    }\n\n    /**\n      * Acquires permits from this {@link RateLimiter} if it can be acquired immediately without delay.\n      *\n      * <p>\n      * This method is equivalent to {@code tryAcquire(permits, 0, anyUnit)}.\n      *\n      * @param permits the number of permits to acquire\n      * @return {@code true} if the permits were acquired, {@code false} otherwise\n      * @since 14.0\n      */\n    public boolean tryAcquire(int permits) {\n        return tryAcquire(permits, 0, TimeUnit.MICROSECONDS);\n    }\n\n    /**\n      * Acquires a permit from this {@link RateLimiter} if it can be acquired immediately without\n      * delay.\n      *\n      * <p>\n      * This method is equivalent to {@code tryAcquire(1)}.\n      *\n      * @return {@code true} if the permit was acquired, {@code false} otherwise\n      * @since 14.0\n      */\n    public boolean tryAcquire() {\n        return tryAcquire(1, 0, TimeUnit.MICROSECONDS);\n    }\n\n    /**\n      * Acquires the given number of permits from this {@code RateLimiter} if it can be obtained\n      * without exceeding the specified {@code timeout}, or returns {@code false}\n      * immediately (without waiting) if the permits would not have been granted\n      * before the timeout expired.\n      *\n      * @param permits the number of permits to acquire\n      * @param timeout the maximum time to wait for the permits\n      * @param unit the time unit of the timeout argument\n      * @return {@code true} if the permits were acquired, {@code false} otherwise\n      */\n    public boolean tryAcquire(int permits, long timeout, TimeUnit unit) {\n        long timeoutMicros = unit.toMicros(timeout);\n        checkPermits(permits);\n        long microsToWait;\n        synchronized (mutex) {\n            long nowMicros = readSafeMicros();\n            if (nextFreeTicketMicros > nowMicros + timeoutMicros) {\n                return false;\n            } else {\n                microsToWait = reserveNextTicket(permits, nowMicros);\n            }\n        }\n        ticker.sleepMicrosUninterruptibly(microsToWait);\n        return true;\n    }\n\n    private static void checkPermits(int permits) {\n        Preconditions.checkArgument(permits > 0, \"Requested permits must be positive\");\n    }\n\n    /**\n      * Reserves next ticket and returns the wait time that the caller must wait for.\n      */\n    private long reserveNextTicket(double requiredPermits, long nowMicros) {\n        resync(nowMicros);\n        long microsToNextFreeTicket = nextFreeTicketMicros - nowMicros;\n        double storedPermitsToSpend = Math.min(requiredPermits, this.storedPermits);\n        double freshPermits = requiredPermits - storedPermitsToSpend;\n\n        long waitMicros = storedPermitsToWaitTime(this.storedPermits, storedPermitsToSpend)\n        + (long) (freshPermits * stableIntervalMicros);\n\n        this.nextFreeTicketMicros = nextFreeTicketMicros + waitMicros;\n        this.storedPermits -= storedPermitsToSpend;\n        return microsToNextFreeTicket;\n    }\n\n    /**\n      * Translates a specified portion of our currently stored permits which we want to\n      * spend/acquire, into a throttling time. Conceptually, this evaluates the integral\n      * of the underlying function we use, for the range of\n      * [(storedPermits - permitsToTake), storedPermits].\n      *\n      * This always holds: {@code 0 <= permitsToTake <= storedPermits}\n      */\n    abstract long storedPermitsToWaitTime(double storedPermits, double permitsToTake);\n\n    private void resync(long nowMicros) {\n        // if nextFreeTicket is in the past, resync to now\n        if (nowMicros > nextFreeTicketMicros) {\n            storedPermits = Math.min(maxPermits,\n                storedPermits + (nowMicros - nextFreeTicketMicros) / stableIntervalMicros);\n            nextFreeTicketMicros = nowMicros;\n        }\n    }\n\n    private long readSafeMicros() {\n        return TimeUnit.NANOSECONDS.toMicros(ticker.read() - offsetNanos);\n    }\n\n    @Override\n    public String toString() {\n        return String.format(\"RateLimiter[stableRate=%3.1fqps]\", 1000000.0 / stableIntervalMicros);\n    }\n\n    /**\n      * This implements the following function:\n      *\n      *          ^ throttling\n      *          |\n      * 3*stable +                  /\n      * interval |                 /.\n      *  (cold)  |                / .\n      *          |               /  .   <-- \"warmup period\" is the area of the trapezoid between\n      * 2*stable +              /   .       halfPermits and maxPermits\n      * interval |             /    .\n      *          |            /     .\n      *          |           /      .\n      *   stable +----------/  WARM . }\n      * interval |          .   UP  . } <-- this rectangle (from 0 to maxPermits, and\n      *          |          . PERIOD. }     height == stableInterval) defines the cooldown period,\n      *          |          .       . }     and we want cooldownPeriod == warmupPeriod\n      *          |---------------------------------> storedPermits\n      *              (halfPermits) (maxPermits)\n      *\n      * Before going into the details of this particular function, let's keep in mind the basics:\n      * 1) The state of the RateLimiter (storedPermits) is a vertical line in this figure.\n      * 2) When the RateLimiter is not used, this goes right (up to maxPermits)\n      * 3) When the RateLimiter is used, this goes left (down to zero), since if we have storedPermits,\n      *    we serve from those first\n      * 4) When _unused_, we go right at the same speed (rate)! I.e., if our rate is\n      *    2 permits per second, and 3 unused seconds pass, we will always save 6 permits\n      *    (no matter what our initial position was), up to maxPermits.\n      *    If we invert the rate, we get the \"stableInterval\" (interval between two requests\n      *    in a perfectly spaced out sequence of requests of the given rate). Thus, if you\n      *    want to see \"how much time it will take to go from X storedPermits to X+K storedPermits?\",\n      *    the answer is always stableInterval * K. In the same example, for 2 permits per second,\n      *    stableInterval is 500ms. Thus to go from X storedPermits to X+6 storedPermits, we\n      *    require 6 * 500ms = 3 seconds.\n      *\n      *    In short, the time it takes to move to the right (save K permits) is equal to the\n      *    rectangle of width == K and height == stableInterval.\n      * 4) When _used_, the time it takes, as explained in the introductory class note, is\n      *    equal to the integral of our function, between X permits and X-K permits, assuming\n      *    we want to spend K saved permits.\n      *\n      *    In summary, the time it takes to move to the left (spend K permits), is equal to the\n      *    area of the function of width == K.\n      *\n      * Let's dive into this function now:\n      *\n      * When we have storedPermits <= halfPermits (the left portion of the function), then\n      * we spend them at the exact same rate that\n      * fresh permits would be generated anyway (that rate is 1/stableInterval). We size\n      * this area to be equal to _half_ the specified warmup period. Why we need this?\n      * And why half? We'll explain shortly below (after explaining the second part).\n      *\n      * Stored permits that are beyond halfPermits, are mapped to an ascending line, that goes\n      * from stableInterval to 3 * stableInterval. The average height for that part is\n      * 2 * stableInterval, and is sized appropriately to have an area _equal_ to the\n      * specified warmup period. Thus, by point (4) above, it takes \"warmupPeriod\" amount of time\n      * to go from maxPermits to halfPermits.\n      *\n      * BUT, by point (3) above, it only takes \"warmupPeriod / 2\" amount of time to return back\n      * to maxPermits, from halfPermits! (Because the trapezoid has double the area of the rectangle\n      * of height stableInterval and equivalent width). We decided that the \"cooldown period\"\n      * time should be equivalent to \"warmup period\", thus a fully saturated RateLimiter\n      * (with zero stored permits, serving only fresh ones) can go to a fully unsaturated\n      * (with storedPermits == maxPermits) in the same amount of time it takes for a fully\n      * unsaturated RateLimiter to return to the stableInterval -- which happens in halfPermits,\n      * since beyond that point, we use a horizontal line of \"stableInterval\" height, simulating\n      * the regular rate.\n      *\n      * Thus, we have figured all dimensions of this shape, to give all the desired\n      * properties:\n      * - the width is warmupPeriod / stableInterval, to make cooldownPeriod == warmupPeriod\n      * - the slope starts at the middle, and goes from stableInterval to 3*stableInterval so\n      *   to have halfPermits being spend in double the usual time (half the rate), while their\n      *   respective rate is steadily ramping up\n      */\n    private static class WarmingUp extends RateLimiter {\n\n        final long warmupPeriodMicros;\n        /**\n          * The slope of the line from the stable interval (when permits == 0), to the cold interval\n          * (when permits == maxPermits)\n          */\n        private double slope;\n        private double halfPermits;\n\n        WarmingUp(SleepingTicker ticker, long warmupPeriod, TimeUnit timeUnit) {\n            super(ticker);\n            this.warmupPeriodMicros = timeUnit.toMicros(warmupPeriod);\n        }\n\n        @Override\n        void doSetRate(double permitsPerSecond, double stableIntervalMicros) {\n            double oldMaxPermits = maxPermits;\n            maxPermits = warmupPeriodMicros / stableIntervalMicros;\n            halfPermits = maxPermits / 2.0;\n            // Stable interval is x, cold is 3x, so on average it's 2x. Double the time -> halve the rate\n            double coldIntervalMicros = stableIntervalMicros * 3.0;\n            slope = (coldIntervalMicros - stableIntervalMicros) / halfPermits;\n            if (oldMaxPermits == Double.POSITIVE_INFINITY) {\n                // if we don't special-case this, we would get storedPermits == NaN, below\n                storedPermits = 0.0;\n            } else {\n                storedPermits = (oldMaxPermits == 0.0)\n                ? maxPermits // initial state is cold\n                    : storedPermits * maxPermits / oldMaxPermits;\n            }\n        }\n\n        @Override\n        long storedPermitsToWaitTime(double storedPermits, double permitsToTake) {\n            double availablePermitsAboveHalf = storedPermits - halfPermits;\n            long micros = 0;\n            // measuring the integral on the right part of the function (the climbing line)\n            if (availablePermitsAboveHalf > 0.0) {\n                double permitsAboveHalfToTake = Math.min(availablePermitsAboveHalf, permitsToTake);\n                micros = (long) (permitsAboveHalfToTake * (permitsToTime(availablePermitsAboveHalf)\n                    + permitsToTime(availablePermitsAboveHalf - permitsAboveHalfToTake)) / 2.0);\n                permitsToTake -= permitsAboveHalfToTake;\n            }\n            // measuring the integral on the left part of the function (the horizontal line)\n            micros += (stableIntervalMicros * permitsToTake);\n            return micros;\n        }\n\n        private double permitsToTime(double permits) {\n            return stableIntervalMicros + permits * slope;\n        }\n    }\n\n    /**\n      * This implements a trivial function, where storedPermits are translated to\n      * zero throttling - thus, a client gets an infinite speedup for permits acquired out\n      * of the storedPermits pool. This is also used for the special case of the \"metronome\",\n      * where the width of the function is also zero; maxStoredPermits is zero, thus\n      * storedPermits and permitsToTake are always zero as well. Such a RateLimiter can\n      * not save permits when unused, thus all permits it serves are fresh, using the\n      * designated rate.\n      */\n    private static class Bursty extends RateLimiter {\n        Bursty(SleepingTicker ticker) {\n            super(ticker);\n        }\n\n        @Override\n        void doSetRate(double permitsPerSecond, double stableIntervalMicros) {\n            double oldMaxPermits = this.maxPermits;\n            /*\n             * We allow the equivalent work of up to one second to be granted with zero waiting, if the\n             * rate limiter has been unused for as much. This is to avoid potentially producing tiny\n             * wait interval between subsequent requests for sufficiently large rates, which would\n             * unnecessarily overconstrain the thread scheduler.\n             */\n            maxPermits = permitsPerSecond; // one second worth of permits\n            storedPermits = (oldMaxPermits == 0.0)\n            ? 0.0 // initial state\n                : storedPermits * maxPermits / oldMaxPermits;\n        }\n\n        @Override\n        long storedPermitsToWaitTime(double storedPermits, double permitsToTake) {\n            return 0L;\n        }\n    }\n}\n\nabstract class SleepingTicker extends Ticker {\n    abstract void sleepMicrosUninterruptibly(long micros);\n\n    static final SleepingTicker SYSTEM_TICKER = new SleepingTicker() {\n        @Override\n        public long read() {\n            return systemTicker().read();\n        }\n\n        @Override\n        public void sleepMicrosUninterruptibly(long micros) {\n            if (micros > 0) {\n                Uninterruptibles.sleepUninterruptibly(micros, TimeUnit.MICROSECONDS);\n            }\n        }\n    };\n}\n"
  },
  {
    "path": "src/main/java/com/audienceproject/shaded/google/common/util/concurrent/Uninterruptibles.java",
    "content": "package com.audienceproject.shaded.google.common.util.concurrent;\n\n/*\n * Notice:\n * This file was modified at AudienceProject ApS by Cosmin Catalin Sanda (cosmin@audienceproject.com)\n */\n\n/*\n * Copyright (C) 2011 The Guava Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport com.audienceproject.shaded.google.common.base.Preconditions;\n\nimport java.util.concurrent.*;\n\nimport static java.util.concurrent.TimeUnit.NANOSECONDS;\n\n/**\n * Utilities for treating interruptible operations as uninterruptible.\n * In all cases, if a thread is interrupted during such a call, the call\n * continues to block until the result is available or the timeout elapses,\n * and only then re-interrupts the thread.\n *\n * @author Anthony Zana\n * @since 10.0\n */\npublic final class Uninterruptibles {\n\n    // Implementation Note: As of 3-7-11, the logic for each blocking/timeout\n    // methods is identical, save for method being invoked.\n\n    /**\n     * Invokes {@code latch.}{@link CountDownLatch#await() await()}\n     * uninterruptibly.\n     */\n    public static void awaitUninterruptibly(CountDownLatch latch) {\n        boolean interrupted = false;\n        try {\n            while (true) {\n                try {\n                    latch.await();\n                    return;\n                } catch (InterruptedException e) {\n                    interrupted = true;\n                }\n            }\n        } finally {\n            if (interrupted) {\n                Thread.currentThread().interrupt();\n            }\n        }\n    }\n\n    /**\n     * Invokes\n     * {@code latch.}{@link CountDownLatch#await(long, TimeUnit)\n     * await(timeout, unit)} uninterruptibly.\n     */\n    public static boolean awaitUninterruptibly(CountDownLatch latch,\n                                               long timeout, TimeUnit unit) {\n        boolean interrupted = false;\n        try {\n            long remainingNanos = unit.toNanos(timeout);\n            long end = System.nanoTime() + remainingNanos;\n\n            while (true) {\n                try {\n                    // CountDownLatch treats negative timeouts just like zero.\n                    return latch.await(remainingNanos, NANOSECONDS);\n                } catch (InterruptedException e) {\n                    interrupted = true;\n                    remainingNanos = end - System.nanoTime();\n                }\n            }\n        } finally {\n            if (interrupted) {\n                Thread.currentThread().interrupt();\n            }\n        }\n    }\n\n    /**\n     * Invokes {@code toJoin.}{@link Thread#join() join()} uninterruptibly.\n     */\n    public static void joinUninterruptibly(Thread toJoin) {\n        boolean interrupted = false;\n        try {\n            while (true) {\n                try {\n                    toJoin.join();\n                    return;\n                } catch (InterruptedException e) {\n                    interrupted = true;\n                }\n            }\n        } finally {\n            if (interrupted) {\n                Thread.currentThread().interrupt();\n            }\n        }\n    }\n\n    /**\n     * Invokes {@code future.}{@link Future#get() get()} uninterruptibly.\n     * To get uninterruptibility and remove checked exceptions.\n     *\n     * <p>If instead, you wish to treat {@link InterruptedException} uniformly\n     * with other exceptions.\n     *\n     * @throws ExecutionException if the computation threw an exception\n     * @throws CancellationException if the computation was cancelled\n     */\n    public static <V> V getUninterruptibly(Future<V> future)\n        throws ExecutionException {\n        boolean interrupted = false;\n        try {\n            while (true) {\n                try {\n                    return future.get();\n                } catch (InterruptedException e) {\n                    interrupted = true;\n                }\n            }\n        } finally {\n            if (interrupted) {\n                Thread.currentThread().interrupt();\n            }\n        }\n    }\n\n    /**\n     * Invokes\n     * {@code future.}{@link Future#get(long, TimeUnit) get(timeout, unit)}\n     * uninterruptibly.\n     *\n     * <p>If instead, you wish to treat {@link InterruptedException} uniformly\n     * with other exceptions.\n     *\n     * @throws ExecutionException if the computation threw an exception\n     * @throws CancellationException if the computation was cancelled\n     * @throws TimeoutException if the wait timed out\n     */\n    public static <V> V getUninterruptibly(\n        Future<V> future, long timeout,  TimeUnit unit)\n        throws ExecutionException, TimeoutException {\n        boolean interrupted = false;\n        try {\n            long remainingNanos = unit.toNanos(timeout);\n            long end = System.nanoTime() + remainingNanos;\n\n            while (true) {\n                try {\n                    // Future treats negative timeouts just like zero.\n                    return future.get(remainingNanos, NANOSECONDS);\n                } catch (InterruptedException e) {\n                    interrupted = true;\n                    remainingNanos = end - System.nanoTime();\n                }\n            }\n        } finally {\n            if (interrupted) {\n                Thread.currentThread().interrupt();\n            }\n        }\n    }\n\n    /**\n     * Invokes\n     * {@code unit.}{@link TimeUnit#timedJoin(Thread, long)\n     * timedJoin(toJoin, timeout)} uninterruptibly.\n     */\n    public static void joinUninterruptibly(Thread toJoin,\n                                           long timeout, TimeUnit unit) {\n        Preconditions.checkNotNull(toJoin);\n        boolean interrupted = false;\n        try {\n            long remainingNanos = unit.toNanos(timeout);\n            long end = System.nanoTime() + remainingNanos;\n            while (true) {\n                try {\n                    // TimeUnit.timedJoin() treats negative timeouts just like zero.\n                    NANOSECONDS.timedJoin(toJoin, remainingNanos);\n                    return;\n                } catch (InterruptedException e) {\n                    interrupted = true;\n                    remainingNanos = end - System.nanoTime();\n                }\n            }\n        } finally {\n            if (interrupted) {\n                Thread.currentThread().interrupt();\n            }\n        }\n    }\n\n    /**\n     * Invokes {@code queue.}{@link BlockingQueue#take() take()} uninterruptibly.\n     */\n    public static <E> E takeUninterruptibly(BlockingQueue<E> queue) {\n        boolean interrupted = false;\n        try {\n            while (true) {\n                try {\n                    return queue.take();\n                } catch (InterruptedException e) {\n                    interrupted = true;\n                }\n            }\n        } finally {\n            if (interrupted) {\n                Thread.currentThread().interrupt();\n            }\n        }\n    }\n\n    /**\n     * Invokes {@code queue.}{@link BlockingQueue#put(Object) put(element)}\n     * uninterruptibly.\n     *\n     * @throws ClassCastException if the class of the specified element prevents\n     *     it from being added to the given queue\n     * @throws IllegalArgumentException if some property of the specified element\n     *     prevents it from being added to the given queue\n     */\n    public static <E> void putUninterruptibly(BlockingQueue<E> queue, E element) {\n        boolean interrupted = false;\n        try {\n            while (true) {\n                try {\n                    queue.put(element);\n                    return;\n                } catch (InterruptedException e) {\n                    interrupted = true;\n                }\n            }\n        } finally {\n            if (interrupted) {\n                Thread.currentThread().interrupt();\n            }\n        }\n    }\n\n    // TODO(user): Support Sleeper somehow (wrapper or interface method)?\n    /**\n     * Invokes {@code unit.}{@link TimeUnit#sleep(long) sleep(sleepFor)}\n     * uninterruptibly.\n     */\n    public static void sleepUninterruptibly(long sleepFor, TimeUnit unit) {\n        boolean interrupted = false;\n        try {\n            long remainingNanos = unit.toNanos(sleepFor);\n            long end = System.nanoTime() + remainingNanos;\n            while (true) {\n                try {\n                    // TimeUnit.sleep() treats negative timeouts just like zero.\n                    NANOSECONDS.sleep(remainingNanos);\n                    return;\n                } catch (InterruptedException e) {\n                    interrupted = true;\n                    remainingNanos = end - System.nanoTime();\n                }\n            }\n        } finally {\n            if (interrupted) {\n                Thread.currentThread().interrupt();\n            }\n        }\n    }\n\n    // TODO(user): Add support for waitUninterruptibly.\n\n    private Uninterruptibles() {}\n}\n\n"
  },
  {
    "path": "src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister",
    "content": "com.audienceproject.spark.dynamodb.datasource.DefaultSource\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/attribute.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2018 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb\n\nimport scala.annotation.StaticAnnotation\n\nfinal case class attribute(name: String) extends StaticAnnotation\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/catalyst/JavaConverter.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2019 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.catalyst\n\nimport java.util\n\nimport org.apache.spark.sql.catalyst.InternalRow\nimport org.apache.spark.sql.catalyst.util.{ArrayData, MapData}\nimport org.apache.spark.sql.types._\nimport org.apache.spark.unsafe.types.UTF8String\n\nimport scala.collection.JavaConverters._\n\nobject JavaConverter {\n\n    def convertRowValue(row: InternalRow, index: Int, elementType: DataType): Any = {\n        elementType match {\n            case ArrayType(innerType, _) => convertArray(row.getArray(index), innerType)\n            case MapType(keyType, valueType, _) => convertMap(row.getMap(index), keyType, valueType)\n            case StructType(fields) => convertStruct(row.getStruct(index, fields.length), fields)\n            case StringType => row.getString(index)\n            case LongType => row.getLong(index)\n            case t: DecimalType => row.getDecimal(index, t.precision, t.scale).toBigDecimal\n            case _ => row.get(index, elementType)\n        }\n    }\n\n    def convertArray(array: ArrayData, elementType: DataType): Any = {\n        elementType match {\n            case ArrayType(innerType, _) => array.toSeq[ArrayData](elementType).map(convertArray(_, innerType)).asJava\n            case MapType(keyType, valueType, _) => array.toSeq[MapData](elementType).map(convertMap(_, keyType, valueType)).asJava\n            case structType: StructType => array.toSeq[InternalRow](structType).map(convertStruct(_, structType.fields)).asJava\n            case StringType => convertStringArray(array).asJava\n            case _ => array.toSeq[Any](elementType).asJava\n        }\n    }\n\n    def convertMap(map: MapData, keyType: DataType, valueType: DataType): util.Map[String, Any] = {\n        if (keyType != StringType) throw new IllegalArgumentException(\n            s\"Invalid Map key type '${keyType.typeName}'. DynamoDB only supports String as Map key type.\")\n        val keys = convertStringArray(map.keyArray())\n        val values = valueType match {\n            case ArrayType(innerType, _) => map.valueArray().toSeq[ArrayData](valueType).map(convertArray(_, innerType))\n            case MapType(innerKeyType, innerValueType, _) => map.valueArray().toSeq[MapData](valueType).map(convertMap(_, innerKeyType, innerValueType))\n            case structType: StructType => map.valueArray().toSeq[InternalRow](structType).map(convertStruct(_, structType.fields))\n            case StringType => convertStringArray(map.valueArray())\n            case _ => map.valueArray().toSeq[Any](valueType)\n        }\n        val kvPairs = for (i <- 0 until map.numElements()) yield keys(i) -> values(i)\n        Map(kvPairs: _*).asJava\n    }\n\n    def convertStruct(row: InternalRow, fields: Seq[StructField]): util.Map[String, Any] = {\n        val kvPairs = for (i <- 0 until row.numFields) yield\n            if (row.isNullAt(i)) fields(i).name -> null\n            else fields(i).name -> convertRowValue(row, i, fields(i).dataType)\n        Map(kvPairs: _*).asJava\n    }\n\n    def convertStringArray(array: ArrayData): Seq[String] =\n        array.toSeq[UTF8String](StringType).map(_.toString)\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/connector/ColumnSchema.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2019 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.connector\n\nimport org.apache.spark.sql.types.{DataType, StructType}\n\nprivate[dynamodb] class ColumnSchema(keySchema: KeySchema,\n                                     sparkSchema: StructType) {\n\n    type Attr = (String, Int, DataType)\n\n    private val columnNames = sparkSchema.map(_.name)\n\n    private val keyIndices = keySchema match {\n        case KeySchema(hashKey, None) =>\n            val hashKeyIndex = columnNames.indexOf(hashKey)\n            val hashKeyType = sparkSchema(hashKey).dataType\n            Left(hashKey, hashKeyIndex, hashKeyType)\n        case KeySchema(hashKey, Some(rangeKey)) =>\n            val hashKeyIndex = columnNames.indexOf(hashKey)\n            val rangeKeyIndex = columnNames.indexOf(rangeKey)\n            val hashKeyType = sparkSchema(hashKey).dataType\n            val rangeKeyType = sparkSchema(rangeKey).dataType\n            Right((hashKey, hashKeyIndex, hashKeyType), (rangeKey, rangeKeyIndex, rangeKeyType))\n    }\n\n    private val attributeIndices = columnNames.zipWithIndex.filterNot({\n        case (name, _) => keySchema match {\n            case KeySchema(hashKey, None) => name == hashKey\n            case KeySchema(hashKey, Some(rangeKey)) => name == hashKey || name == rangeKey\n        }\n    }).map({\n        case (name, index) => (name, index, sparkSchema(name).dataType)\n    })\n\n    def keys(): Either[Attr, (Attr, Attr)] = keyIndices\n\n    def attributes(): Seq[Attr] = attributeIndices\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/connector/DynamoConnector.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2018 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.connector\n\nimport com.amazonaws.auth.profile.ProfileCredentialsProvider\nimport com.amazonaws.auth.{AWSCredentialsProvider, AWSStaticCredentialsProvider, BasicSessionCredentials, DefaultAWSCredentialsProviderChain}\nimport com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration\nimport com.amazonaws.services.dynamodbv2.document.{DynamoDB, ItemCollection, ScanOutcome}\nimport com.amazonaws.services.dynamodbv2.{AmazonDynamoDB, AmazonDynamoDBAsync, AmazonDynamoDBAsyncClientBuilder, AmazonDynamoDBClientBuilder}\nimport com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder\nimport com.amazonaws.services.securitytoken.model.AssumeRoleRequest\nimport org.apache.spark.sql.sources.Filter\n\nprivate[dynamodb] trait DynamoConnector {\n\n    @transient private lazy val properties = sys.props\n\n    def getDynamoDB(region: Option[String] = None, roleArn: Option[String] = None, providerClassName: Option[String] = None): DynamoDB = {\n        val client: AmazonDynamoDB = getDynamoDBClient(region, roleArn, providerClassName)\n        new DynamoDB(client)\n    }\n\n    private def getDynamoDBClient(region: Option[String] = None,\n                                  roleArn: Option[String] = None,\n                                  providerClassName: Option[String]): AmazonDynamoDB = {\n        val chosenRegion = region.getOrElse(properties.getOrElse(\"aws.dynamodb.region\", \"us-east-1\"))\n        val credentials = getCredentials(chosenRegion, roleArn, providerClassName)\n\n        properties.get(\"aws.dynamodb.endpoint\").map(endpoint => {\n            AmazonDynamoDBClientBuilder.standard()\n                .withCredentials(credentials)\n                .withEndpointConfiguration(new EndpointConfiguration(endpoint, chosenRegion))\n                .build()\n        }).getOrElse(\n            AmazonDynamoDBClientBuilder.standard()\n                .withCredentials(credentials)\n                .withRegion(chosenRegion)\n                .build()\n        )\n    }\n\n    def getDynamoDBAsyncClient(region: Option[String] = None,\n                               roleArn: Option[String] = None,\n                               providerClassName: Option[String] = None): AmazonDynamoDBAsync = {\n        val chosenRegion = region.getOrElse(properties.getOrElse(\"aws.dynamodb.region\", \"us-east-1\"))\n        val credentials = getCredentials(chosenRegion, roleArn, providerClassName)\n\n        properties.get(\"aws.dynamodb.endpoint\").map(endpoint => {\n            AmazonDynamoDBAsyncClientBuilder.standard()\n                .withCredentials(credentials)\n                .withEndpointConfiguration(new EndpointConfiguration(endpoint, chosenRegion))\n                .build()\n        }).getOrElse(\n            AmazonDynamoDBAsyncClientBuilder.standard()\n                .withCredentials(credentials)\n                .withRegion(chosenRegion)\n                .build()\n        )\n    }\n\n    /**\n     * Get credentials from an instantiated object of the class name given\n     * or a passed in arn\n     * or from profile\n     * or return the default credential provider\n     **/\n    private def getCredentials(chosenRegion: String, roleArn: Option[String], providerClassName: Option[String]) = {\n        providerClassName.map(providerClass => {\n            Class.forName(providerClass).newInstance.asInstanceOf[AWSCredentialsProvider]\n        }).orElse(roleArn.map(arn => {\n            val stsClient = properties.get(\"aws.sts.endpoint\").map(endpoint => {\n                AWSSecurityTokenServiceClientBuilder\n                    .standard()\n                    .withCredentials(new DefaultAWSCredentialsProviderChain)\n                    .withEndpointConfiguration(new EndpointConfiguration(endpoint, chosenRegion))\n                    .build()\n            }).getOrElse(\n                // STS without an endpoint will sign from the region, but use the global endpoint\n                AWSSecurityTokenServiceClientBuilder\n                    .standard()\n                    .withCredentials(new DefaultAWSCredentialsProviderChain)\n                    .withRegion(chosenRegion)\n                    .build()\n            )\n            val assumeRoleResult = stsClient.assumeRole(\n                new AssumeRoleRequest()\n                    .withRoleSessionName(\"DynamoDBAssumed\")\n                    .withRoleArn(arn)\n            )\n            val stsCredentials = assumeRoleResult.getCredentials\n            val assumeCreds = new BasicSessionCredentials(\n                stsCredentials.getAccessKeyId,\n                stsCredentials.getSecretAccessKey,\n                stsCredentials.getSessionToken\n            )\n            new AWSStaticCredentialsProvider(assumeCreds)\n        })).orElse(properties.get(\"aws.profile\").map(new ProfileCredentialsProvider(_)))\n            .getOrElse(new DefaultAWSCredentialsProviderChain)\n    }\n\n    val keySchema: KeySchema\n\n    val readLimit: Double\n\n    val itemLimit: Int\n\n    val totalSegments: Int\n\n    val filterPushdownEnabled: Boolean\n\n    def scan(segmentNum: Int, columns: Seq[String], filters: Seq[Filter]): ItemCollection[ScanOutcome]\n\n    def isEmpty: Boolean = itemLimit == 0\n\n    def nonEmpty: Boolean = !isEmpty\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/connector/DynamoWritable.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2018 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.connector\n\nimport com.amazonaws.services.dynamodbv2.document.DynamoDB\nimport com.audienceproject.shaded.google.common.util.concurrent.RateLimiter\nimport org.apache.spark.sql.catalyst.InternalRow\n\nprivate[dynamodb] trait DynamoWritable {\n\n    val writeLimit: Double\n\n    def putItems(columnSchema: ColumnSchema, items: Seq[InternalRow])\n                (client: DynamoDB, rateLimiter: RateLimiter): Unit\n\n    def updateItem(columnSchema: ColumnSchema, item: InternalRow)\n                  (client: DynamoDB, rateLimiter: RateLimiter): Unit\n\n    def deleteItems(columnSchema: ColumnSchema, itema: Seq[InternalRow])\n                   (client: DynamoDB, rateLimiter: RateLimiter): Unit\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/connector/FilterPushdown.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2018 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.connector\n\nimport com.amazonaws.services.dynamodbv2.xspec.ExpressionSpecBuilder.{BOOL => newBOOL, N => newN, S => newS, _}\nimport com.amazonaws.services.dynamodbv2.xspec._\nimport org.apache.spark.sql.sources._\n\nprivate[dynamodb] object FilterPushdown {\n\n    def apply(filters: Seq[Filter]): Condition =\n        filters.map(buildCondition).map(parenthesize).reduce[Condition](_ and _)\n\n    /**\n      * Accepts only filters that would be considered valid input to FilterPushdown.apply()\n      *\n      * @param filters input list which may contain both valid and invalid filters\n      * @return a (valid, invalid) partitioning of the input filters\n      */\n    def acceptFilters(filters: Array[Filter]): (Array[Filter], Array[Filter]) =\n        filters.partition(checkFilter)\n\n    private def checkFilter(filter: Filter): Boolean = filter match {\n        case _: StringEndsWith => false\n        case And(left, right) => checkFilter(left) && checkFilter(right)\n        case Or(left, right) => checkFilter(left) && checkFilter(right)\n        case Not(f) => checkFilter(f)\n        case _ => true\n    }\n\n    private def buildCondition(filter: Filter): Condition = filter match {\n        case EqualTo(path, value: Boolean) => newBOOL(path).eq(value)\n        case EqualTo(path, value) => coerceAndApply(_ eq _, _ eq _)(path, value)\n\n        case GreaterThan(path, value) => coerceAndApply(_ gt _, _ gt _)(path, value)\n        case GreaterThanOrEqual(path, value) => coerceAndApply(_ ge _, _ ge _)(path, value)\n\n        case LessThan(path, value) => coerceAndApply(_ lt _, _ lt _)(path, value)\n        case LessThanOrEqual(path, value) => coerceAndApply(_ le _, _ le _)(path, value)\n\n        case In(path, values) =>\n            val valueList = values.toList\n            valueList match {\n                case (_: String) :: _ => newS(path).in(valueList.asInstanceOf[List[String]]: _*)\n                case (_: Boolean) :: _ => newBOOL(path).in(valueList.asInstanceOf[List[Boolean]]: _*)\n                case (_: Int) :: _ => newN(path).in(valueList.map(_.asInstanceOf[Number]): _*)\n                case (_: Long) :: _ => newN(path).in(valueList.map(_.asInstanceOf[Number]): _*)\n                case (_: Short) :: _ => newN(path).in(valueList.map(_.asInstanceOf[Number]): _*)\n                case (_: Float) :: _ => newN(path).in(valueList.map(_.asInstanceOf[Number]): _*)\n                case (_: Double) :: _ => newN(path).in(valueList.map(_.asInstanceOf[Number]): _*)\n                case Nil => throw new IllegalArgumentException(\"Unable to apply `In` filter with empty value list\")\n                case _ => throw new IllegalArgumentException(s\"Type of values supplied to `In` filter on attribute $path not supported by filter pushdown\")\n            }\n\n        case IsNull(path) => attribute_not_exists(path)\n        case IsNotNull(path) => attribute_exists(path)\n\n        case StringStartsWith(path, value) => newS(path).beginsWith(value)\n        case StringContains(path, value) => newS(path).contains(value)\n        case StringEndsWith(_, _) => throw new UnsupportedOperationException(\"Filter `StringEndsWith` is not supported by DynamoDB\")\n\n        case And(left, right) => parenthesize(buildCondition(left)) and parenthesize(buildCondition(right))\n        case Or(left, right) => parenthesize(buildCondition(left)) or parenthesize(buildCondition(right))\n        case Not(f) => parenthesize(buildCondition(f)).negate()\n    }\n\n    private def coerceAndApply(stringOp: (S, String) => Condition, numOp: (N, Number) => Condition)\n                              (path: String, value: Any): Condition = value match {\n        case string: String => stringOp(newS(path), string)\n        case number: Int => numOp(newN(path), number)\n        case number: Long => numOp(newN(path), number)\n        case number: Short => numOp(newN(path), number)\n        case number: Float => numOp(newN(path), number)\n        case number: Double => numOp(newN(path), number)\n        case _ => throw new IllegalArgumentException(s\"Type of operand given to filter on attribute $path not supported by filter pushdown\")\n    }\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/connector/KeySchema.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2019 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.connector\n\nimport com.amazonaws.services.dynamodbv2.model.{KeySchemaElement, KeyType}\n\nprivate[dynamodb] case class KeySchema(hashKeyName: String, rangeKeyName: Option[String])\n\nprivate[dynamodb] object KeySchema {\n\n    def fromDescription(keySchemaElements: Seq[KeySchemaElement]): KeySchema = {\n        val hashKeyName = keySchemaElements.find(_.getKeyType == KeyType.HASH.toString).get.getAttributeName\n        val rangeKeyName = keySchemaElements.find(_.getKeyType == KeyType.RANGE.toString).map(_.getAttributeName)\n        KeySchema(hashKeyName, rangeKeyName)\n    }\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/connector/TableConnector.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2018 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.connector\n\nimport com.amazonaws.services.dynamodbv2.document._\nimport com.amazonaws.services.dynamodbv2.document.spec.{BatchWriteItemSpec, ScanSpec, UpdateItemSpec}\nimport com.amazonaws.services.dynamodbv2.model.ReturnConsumedCapacity\nimport com.amazonaws.services.dynamodbv2.xspec.ExpressionSpecBuilder\nimport com.audienceproject.shaded.google.common.util.concurrent.RateLimiter\nimport com.audienceproject.spark.dynamodb.catalyst.JavaConverter\nimport org.apache.spark.sql.catalyst.InternalRow\nimport org.apache.spark.sql.sources.Filter\n\nimport scala.annotation.tailrec\nimport scala.collection.JavaConverters._\n\nprivate[dynamodb] class TableConnector(tableName: String, parallelism: Int, parameters: Map[String, String])\n    extends DynamoConnector with DynamoWritable with Serializable {\n\n    private val consistentRead = parameters.getOrElse(\"stronglyconsistentreads\", \"false\").toBoolean\n    private val filterPushdown = parameters.getOrElse(\"filterpushdown\", \"true\").toBoolean\n    private val region = parameters.get(\"region\")\n    private val roleArn = parameters.get(\"rolearn\")\n    private val providerClassName = parameters.get(\"providerclassname\")\n\n    override val filterPushdownEnabled: Boolean = filterPushdown\n\n    override val (keySchema, readLimit, writeLimit, itemLimit, totalSegments) = {\n        val table = getDynamoDB(region, roleArn, providerClassName).getTable(tableName)\n        val desc = table.describe()\n\n        // Key schema.\n        val keySchema = KeySchema.fromDescription(desc.getKeySchema.asScala)\n\n        // User parameters.\n        val bytesPerRCU = parameters.getOrElse(\"bytesperrcu\", \"4000\").toInt\n        val maxPartitionBytes = parameters.getOrElse(\"maxpartitionbytes\", \"128000000\").toInt\n        val targetCapacity = parameters.getOrElse(\"targetcapacity\", \"1\").toDouble\n        val readFactor = if (consistentRead) 1 else 2\n\n        // Table parameters.\n        val tableSize = desc.getTableSizeBytes\n        val itemCount = desc.getItemCount\n\n        // Partitioning calculation.\n        val numPartitions = parameters.get(\"readpartitions\").map(_.toInt).getOrElse({\n            val sizeBased = (tableSize / maxPartitionBytes).toInt max 1\n            val remainder = sizeBased % parallelism\n            if (remainder > 0) sizeBased + (parallelism - remainder)\n            else sizeBased\n        })\n\n        // Provisioned or on-demand throughput.\n        val readThroughput = parameters.getOrElse(\"throughput\", Option(desc.getProvisionedThroughput.getReadCapacityUnits)\n            .filter(_ > 0).map(_.longValue().toString)\n            .getOrElse(\"100\")).toLong\n        val writeThroughput = parameters.getOrElse(\"throughput\", Option(desc.getProvisionedThroughput.getWriteCapacityUnits)\n            .filter(_ > 0).map(_.longValue().toString)\n            .getOrElse(\"100\")).toLong\n\n        // Rate limit calculation.\n        val avgItemSize = tableSize.toDouble / itemCount\n        val readCapacity = readThroughput * targetCapacity\n        val writeCapacity = writeThroughput * targetCapacity\n\n        val readLimit = readCapacity / parallelism\n        val itemLimit = ((bytesPerRCU / avgItemSize * readLimit).toInt * readFactor) max 1\n\n        val writeLimit = writeCapacity / parallelism\n\n        (keySchema, readLimit, writeLimit, itemLimit, numPartitions)\n    }\n\n    override def scan(segmentNum: Int, columns: Seq[String], filters: Seq[Filter]): ItemCollection[ScanOutcome] = {\n        val scanSpec = new ScanSpec()\n            .withSegment(segmentNum)\n            .withTotalSegments(totalSegments)\n            .withMaxPageSize(itemLimit)\n            .withReturnConsumedCapacity(ReturnConsumedCapacity.TOTAL)\n            .withConsistentRead(consistentRead)\n\n        if (columns.nonEmpty) {\n            val xspec = new ExpressionSpecBuilder().addProjections(columns: _*)\n\n            if (filters.nonEmpty && filterPushdown) {\n                xspec.withCondition(FilterPushdown(filters))\n            }\n\n            scanSpec.withExpressionSpec(xspec.buildForScan())\n        }\n\n        getDynamoDB(region, roleArn, providerClassName).getTable(tableName).scan(scanSpec)\n    }\n\n    override def putItems(columnSchema: ColumnSchema, items: Seq[InternalRow])\n                         (client: DynamoDB, rateLimiter: RateLimiter): Unit = {\n        // For each batch.\n        val batchWriteItemSpec = new BatchWriteItemSpec().withReturnConsumedCapacity(ReturnConsumedCapacity.TOTAL)\n        batchWriteItemSpec.withTableWriteItems(new TableWriteItems(tableName).withItemsToPut(\n            // Map the items.\n            items.map(row => {\n                val item = new Item()\n\n                // Map primary key.\n                columnSchema.keys() match {\n                    case Left((hashKey, hashKeyIndex, hashKeyType)) =>\n                        item.withPrimaryKey(hashKey, JavaConverter.convertRowValue(row, hashKeyIndex, hashKeyType))\n                    case Right(((hashKey, hashKeyIndex, hashKeyType), (rangeKey, rangeKeyIndex, rangeKeyType))) =>\n                        val hashKeyValue = JavaConverter.convertRowValue(row, hashKeyIndex, hashKeyType)\n                        val rangeKeyValue = JavaConverter.convertRowValue(row, rangeKeyIndex, rangeKeyType)\n                        item.withPrimaryKey(hashKey, hashKeyValue, rangeKey, rangeKeyValue)\n                }\n\n                // Map remaining columns.\n                columnSchema.attributes().foreach({\n                    case (name, index, dataType) if !row.isNullAt(index) =>\n                        item.`with`(name, JavaConverter.convertRowValue(row, index, dataType))\n                    case _ =>\n                })\n\n                item\n            }): _*\n        ))\n\n        val response = client.batchWriteItem(batchWriteItemSpec)\n        handleBatchWriteResponse(client, rateLimiter)(response)\n    }\n\n    override def updateItem(columnSchema: ColumnSchema, row: InternalRow)\n                           (client: DynamoDB, rateLimiter: RateLimiter): Unit = {\n        val updateItemSpec = new UpdateItemSpec().withReturnConsumedCapacity(ReturnConsumedCapacity.TOTAL)\n\n        // Map primary key.\n        columnSchema.keys() match {\n            case Left((hashKey, hashKeyIndex, hashKeyType)) =>\n                updateItemSpec.withPrimaryKey(hashKey, JavaConverter.convertRowValue(row, hashKeyIndex, hashKeyType))\n            case Right(((hashKey, hashKeyIndex, hashKeyType), (rangeKey, rangeKeyIndex, rangeKeyType))) =>\n                val hashKeyValue = JavaConverter.convertRowValue(row, hashKeyIndex, hashKeyType)\n                val rangeKeyValue = JavaConverter.convertRowValue(row, rangeKeyIndex, rangeKeyType)\n                updateItemSpec.withPrimaryKey(hashKey, hashKeyValue, rangeKey, rangeKeyValue)\n        }\n\n        // Map remaining columns.\n        val attributeUpdates = columnSchema.attributes().collect({\n            case (name, index, dataType) if !row.isNullAt(index) =>\n                new AttributeUpdate(name).put(JavaConverter.convertRowValue(row, index, dataType))\n        })\n\n        updateItemSpec.withAttributeUpdate(attributeUpdates: _*)\n\n        // Update item and rate limit on write capacity.\n        val response = client.getTable(tableName).updateItem(updateItemSpec)\n        Option(response.getUpdateItemResult.getConsumedCapacity)\n            .foreach(cap => rateLimiter.acquire(cap.getCapacityUnits.toInt max 1))\n    }\n\n    override def deleteItems(columnSchema: ColumnSchema, items: Seq[InternalRow])\n                            (client: DynamoDB, rateLimiter: RateLimiter): Unit = {\n        // For each batch.\n        val batchWriteItemSpec = new BatchWriteItemSpec().withReturnConsumedCapacity(ReturnConsumedCapacity.TOTAL)\n\n        val tableWriteItems = new TableWriteItems(tableName)\n        val tableWriteItemsWithItems: TableWriteItems =\n        // Check if hash key only or also range key.\n            columnSchema.keys() match {\n                case Left((hashKey, hashKeyIndex, hashKeyType)) =>\n                    val hashKeys = items.map(row =>\n                        JavaConverter.convertRowValue(row, hashKeyIndex, hashKeyType).asInstanceOf[AnyRef])\n                    tableWriteItems.withHashOnlyKeysToDelete(hashKey, hashKeys: _*)\n                case Right(((hashKey, hashKeyIndex, hashKeyType), (rangeKey, rangeKeyIndex, rangeKeyType))) =>\n                    val alternatingHashAndRangeKeys = items.flatMap { row =>\n                        val hashKeyValue = JavaConverter.convertRowValue(row, hashKeyIndex, hashKeyType)\n                        val rangeKeyValue = JavaConverter.convertRowValue(row, rangeKeyIndex, rangeKeyType)\n                        Seq(hashKeyValue.asInstanceOf[AnyRef], rangeKeyValue.asInstanceOf[AnyRef])\n                    }\n                    tableWriteItems.withHashAndRangeKeysToDelete(hashKey, rangeKey, alternatingHashAndRangeKeys: _*)\n            }\n\n        batchWriteItemSpec.withTableWriteItems(tableWriteItemsWithItems)\n\n        val response = client.batchWriteItem(batchWriteItemSpec)\n        handleBatchWriteResponse(client, rateLimiter)(response)\n    }\n\n    @tailrec\n    private def handleBatchWriteResponse(client: DynamoDB, rateLimiter: RateLimiter)\n                                        (response: BatchWriteItemOutcome): Unit = {\n        // Rate limit on write capacity.\n        if (response.getBatchWriteItemResult.getConsumedCapacity != null) {\n            response.getBatchWriteItemResult.getConsumedCapacity.asScala.map(cap => {\n                cap.getTableName -> cap.getCapacityUnits.toInt\n            }).toMap.get(tableName).foreach(units => rateLimiter.acquire(units max 1))\n        }\n        // Retry unprocessed items.\n        if (response.getUnprocessedItems != null && !response.getUnprocessedItems.isEmpty) {\n            val newResponse = client.batchWriteItemUnprocessed(response.getUnprocessedItems)\n            handleBatchWriteResponse(client, rateLimiter)(newResponse)\n        }\n    }\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/connector/TableIndexConnector.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2018 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.connector\n\nimport com.amazonaws.services.dynamodbv2.document.spec.ScanSpec\nimport com.amazonaws.services.dynamodbv2.document.{ItemCollection, ScanOutcome}\nimport com.amazonaws.services.dynamodbv2.model.ReturnConsumedCapacity\nimport com.amazonaws.services.dynamodbv2.xspec.ExpressionSpecBuilder\nimport org.apache.spark.sql.sources.Filter\n\nimport scala.collection.JavaConverters._\n\nprivate[dynamodb] class TableIndexConnector(tableName: String, indexName: String, parallelism: Int, parameters: Map[String, String])\n    extends DynamoConnector with Serializable {\n\n    private val consistentRead = parameters.getOrElse(\"stronglyConsistentReads\", \"false\").toBoolean\n    private val filterPushdown = parameters.getOrElse(\"filterPushdown\", \"true\").toBoolean\n    private val region = parameters.get(\"region\")\n    private val roleArn = parameters.get(\"roleArn\")\n    private val providerClassName = parameters.get(\"providerclassname\")\n\n    override val filterPushdownEnabled: Boolean = filterPushdown\n\n    override val (keySchema, readLimit, itemLimit, totalSegments) = {\n        val table = getDynamoDB(region, roleArn, providerClassName).getTable(tableName)\n        val indexDesc = table.describe().getGlobalSecondaryIndexes.asScala.find(_.getIndexName == indexName).get\n\n        // Key schema.\n        val keySchema = KeySchema.fromDescription(indexDesc.getKeySchema.asScala)\n\n        // User parameters.\n        val bytesPerRCU = parameters.getOrElse(\"bytesPerRCU\", \"4000\").toInt\n        val maxPartitionBytes = parameters.getOrElse(\"maxpartitionbytes\", \"128000000\").toInt\n        val targetCapacity = parameters.getOrElse(\"targetCapacity\", \"1\").toDouble\n        val readFactor = if (consistentRead) 1 else 2\n\n        // Table parameters.\n        val indexSize = indexDesc.getIndexSizeBytes\n        val itemCount = indexDesc.getItemCount\n\n        // Partitioning calculation.\n        val numPartitions = parameters.get(\"readpartitions\").map(_.toInt).getOrElse({\n            val sizeBased = (indexSize / maxPartitionBytes).toInt max 1\n            val remainder = sizeBased % parallelism\n            if (remainder > 0) sizeBased + (parallelism - remainder)\n            else sizeBased\n        })\n\n        // Provisioned or on-demand throughput.\n        val readThroughput = parameters.getOrElse(\"throughput\", Option(indexDesc.getProvisionedThroughput.getReadCapacityUnits)\n            .filter(_ > 0).map(_.longValue().toString)\n            .getOrElse(\"100\")).toLong\n\n        // Rate limit calculation.\n        val avgItemSize = indexSize.toDouble / itemCount\n        val readCapacity = readThroughput * targetCapacity\n\n        val rateLimit = readCapacity / parallelism\n        val itemLimit = ((bytesPerRCU / avgItemSize * rateLimit).toInt * readFactor) max 1\n\n        (keySchema, rateLimit, itemLimit, numPartitions)\n    }\n\n    override def scan(segmentNum: Int, columns: Seq[String], filters: Seq[Filter]): ItemCollection[ScanOutcome] = {\n        val scanSpec = new ScanSpec()\n            .withSegment(segmentNum)\n            .withTotalSegments(totalSegments)\n            .withMaxPageSize(itemLimit)\n            .withReturnConsumedCapacity(ReturnConsumedCapacity.TOTAL)\n            .withConsistentRead(consistentRead)\n\n        if (columns.nonEmpty) {\n            val xspec = new ExpressionSpecBuilder().addProjections(columns: _*)\n\n            if (filters.nonEmpty && filterPushdown) {\n                xspec.withCondition(FilterPushdown(filters))\n            }\n\n            scanSpec.withExpressionSpec(xspec.buildForScan())\n        }\n\n        getDynamoDB(region, roleArn, providerClassName).getTable(tableName).getIndex(indexName).scan(scanSpec)\n    }\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/datasource/DefaultSource.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2019 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.datasource\n\nimport java.util\n\nimport org.apache.spark.sql.connector.catalog.{Table, TableProvider}\nimport org.apache.spark.sql.connector.expressions.Transform\nimport org.apache.spark.sql.sources.DataSourceRegister\nimport org.apache.spark.sql.types.StructType\nimport org.apache.spark.sql.util.CaseInsensitiveStringMap\n\nclass DefaultSource extends TableProvider with DataSourceRegister {\n\n    override def getTable(schema: StructType,\n                          partitioning: Array[Transform],\n                          properties: util.Map[String, String]): Table = {\n        new DynamoTable(new CaseInsensitiveStringMap(properties), Some(schema))\n    }\n\n    override def inferSchema(options: CaseInsensitiveStringMap): StructType = {\n        new DynamoTable(options).schema()\n    }\n\n    override def supportsExternalMetadata(): Boolean = true\n\n    override def shortName(): String = \"dynamodb\"\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/datasource/DynamoBatchReader.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2019 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.datasource\n\nimport com.audienceproject.spark.dynamodb.connector.DynamoConnector\nimport org.apache.spark.sql.connector.read._\nimport org.apache.spark.sql.connector.read.partitioning.Partitioning\nimport org.apache.spark.sql.sources.Filter\nimport org.apache.spark.sql.types.StructType\n\nclass DynamoBatchReader(connector: DynamoConnector,\n                        filters: Array[Filter],\n                        schema: StructType)\n    extends Scan with Batch with SupportsReportPartitioning {\n\n    override def readSchema(): StructType = schema\n\n    override def toBatch: Batch = this\n\n    override def planInputPartitions(): Array[InputPartition] = {\n        val requiredColumns = schema.map(_.name)\n        Array.tabulate(connector.totalSegments)(new ScanPartition(_, requiredColumns, filters))\n    }\n\n    override def createReaderFactory(): PartitionReaderFactory =\n        new DynamoReaderFactory(connector, schema)\n\n    override val outputPartitioning: Partitioning = new OutputPartitioning(connector.totalSegments)\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/datasource/DynamoDataDeleteWriter.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2020 AudienceProject. All rights reserved.\n  */\n\npackage com.audienceproject.spark.dynamodb.datasource\n\nimport com.amazonaws.services.dynamodbv2.document.DynamoDB\nimport com.audienceproject.spark.dynamodb.connector.{ColumnSchema, TableConnector}\n\nclass DynamoDataDeleteWriter(batchSize: Int,\n                             columnSchema: ColumnSchema,\n                             connector: TableConnector,\n                             client: DynamoDB)\n    extends DynamoDataWriter(batchSize, columnSchema, connector, client) {\n\n    protected override def flush(): Unit = {\n        if (buffer.nonEmpty) {\n            connector.deleteItems(columnSchema, buffer)(client, rateLimiter)\n            buffer.clear()\n        }\n    }\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/datasource/DynamoDataUpdateWriter.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2019 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.datasource\n\nimport com.amazonaws.services.dynamodbv2.document.DynamoDB\nimport com.audienceproject.shaded.google.common.util.concurrent.RateLimiter\nimport com.audienceproject.spark.dynamodb.connector.{ColumnSchema, TableConnector}\nimport org.apache.spark.sql.catalyst.InternalRow\nimport org.apache.spark.sql.connector.write.{DataWriter, WriterCommitMessage}\n\nclass DynamoDataUpdateWriter(columnSchema: ColumnSchema,\n                             connector: TableConnector,\n                             client: DynamoDB)\n    extends DataWriter[InternalRow] {\n\n    private val rateLimiter = RateLimiter.create(connector.writeLimit)\n\n    override def write(record: InternalRow): Unit = {\n        connector.updateItem(columnSchema, record)(client, rateLimiter)\n    }\n\n    override def commit(): WriterCommitMessage = new WriterCommitMessage {}\n\n    override def abort(): Unit = {}\n\n    override def close(): Unit = client.shutdown()\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/datasource/DynamoDataWriter.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2019 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.datasource\n\nimport com.amazonaws.services.dynamodbv2.document.DynamoDB\nimport com.audienceproject.shaded.google.common.util.concurrent.RateLimiter\nimport com.audienceproject.spark.dynamodb.connector.{ColumnSchema, TableConnector}\nimport org.apache.spark.sql.catalyst.InternalRow\nimport org.apache.spark.sql.connector.write.{DataWriter, WriterCommitMessage}\n\nimport scala.collection.mutable.ArrayBuffer\n\nclass DynamoDataWriter(batchSize: Int,\n                       columnSchema: ColumnSchema,\n                       connector: TableConnector,\n                       client: DynamoDB)\n    extends DataWriter[InternalRow] {\n\n    protected val buffer: ArrayBuffer[InternalRow] = new ArrayBuffer[InternalRow](batchSize)\n    protected val rateLimiter: RateLimiter = RateLimiter.create(connector.writeLimit)\n\n    override def write(record: InternalRow): Unit = {\n        buffer += record.copy()\n        if (buffer.size == batchSize) {\n            flush()\n        }\n    }\n\n    override def commit(): WriterCommitMessage = {\n        flush()\n        new WriterCommitMessage {}\n    }\n\n    override def abort(): Unit = {}\n\n    override def close(): Unit = client.shutdown()\n\n    protected def flush(): Unit = {\n        if (buffer.nonEmpty) {\n            connector.putItems(columnSchema, buffer)(client, rateLimiter)\n            buffer.clear()\n        }\n    }\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/datasource/DynamoReaderFactory.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2019 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.datasource\n\nimport com.amazonaws.services.dynamodbv2.document.Item\nimport com.audienceproject.shaded.google.common.util.concurrent.RateLimiter\nimport com.audienceproject.spark.dynamodb.connector.DynamoConnector\nimport org.apache.spark.sql.catalyst.InternalRow\nimport org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}\nimport org.apache.spark.sql.types.{StructField, StructType}\n\nimport scala.collection.JavaConverters._\n\nclass DynamoReaderFactory(connector: DynamoConnector,\n                          schema: StructType)\n    extends PartitionReaderFactory {\n\n    override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {\n        if (connector.isEmpty) new EmptyReader\n        else new ScanPartitionReader(partition.asInstanceOf[ScanPartition])\n    }\n\n    private class EmptyReader extends PartitionReader[InternalRow] {\n        override def next(): Boolean = false\n\n        override def get(): InternalRow = throw new IllegalStateException(\"Unable to call get() on empty iterator\")\n\n        override def close(): Unit = {}\n    }\n\n    private class ScanPartitionReader(scanPartition: ScanPartition) extends PartitionReader[InternalRow] {\n\n        import scanPartition._\n\n        private val pageIterator = connector.scan(partitionIndex, requiredColumns, filters).pages().iterator().asScala\n        private val rateLimiter = RateLimiter.create(connector.readLimit)\n\n        private var innerIterator: Iterator[InternalRow] = Iterator.empty\n\n        private var currentRow: InternalRow = _\n        private var proceed = false\n\n        private val typeConversions = schema.collect({\n            case StructField(name, dataType, _, _) => name -> TypeConversion(name, dataType)\n        }).toMap\n\n        override def next(): Boolean = {\n            proceed = true\n            innerIterator.hasNext || {\n                if (pageIterator.hasNext) {\n                    nextPage()\n                    next()\n                }\n                else false\n            }\n        }\n\n        override def get(): InternalRow = {\n            if (proceed) {\n                currentRow = innerIterator.next()\n                proceed = false\n            }\n            currentRow\n        }\n\n        override def close(): Unit = {}\n\n        private def nextPage(): Unit = {\n            val page = pageIterator.next()\n            val result = page.getLowLevelResult\n            Option(result.getScanResult.getConsumedCapacity).foreach(cap => rateLimiter.acquire(cap.getCapacityUnits.toInt max 1))\n            innerIterator = result.getItems.iterator().asScala.map(itemToRow(requiredColumns))\n        }\n\n        private def itemToRow(requiredColumns: Seq[String])(item: Item): InternalRow =\n            if (requiredColumns.nonEmpty) InternalRow.fromSeq(requiredColumns.map(columnName => typeConversions(columnName)(item)))\n            else InternalRow.fromSeq(item.asMap().asScala.values.toSeq.map(_.toString))\n\n    }\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/datasource/DynamoScanBuilder.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2019 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.datasource\n\nimport com.audienceproject.spark.dynamodb.connector.{DynamoConnector, FilterPushdown}\nimport org.apache.spark.sql.connector.read._\nimport org.apache.spark.sql.sources.Filter\nimport org.apache.spark.sql.types._\n\nclass DynamoScanBuilder(connector: DynamoConnector, schema: StructType)\n    extends ScanBuilder\n        with SupportsPushDownRequiredColumns\n        with SupportsPushDownFilters {\n\n    private var acceptedFilters: Array[Filter] = Array.empty\n    private var currentSchema: StructType = schema\n\n    override def build(): Scan = new DynamoBatchReader(connector, pushedFilters(), currentSchema)\n\n    override def pruneColumns(requiredSchema: StructType): Unit = {\n        val keyFields = Seq(Some(connector.keySchema.hashKeyName), connector.keySchema.rangeKeyName).flatten\n            .flatMap(keyName => currentSchema.fields.find(_.name == keyName))\n        val requiredFields = keyFields ++ requiredSchema.fields\n        val newFields = currentSchema.fields.filter(requiredFields.contains)\n        currentSchema = StructType(newFields)\n    }\n\n    override def pushFilters(filters: Array[Filter]): Array[Filter] = {\n        if (connector.filterPushdownEnabled) {\n            val (acceptedFilters, postScanFilters) = FilterPushdown.acceptFilters(filters)\n            this.acceptedFilters = acceptedFilters\n            postScanFilters // Return filters that need to be evaluated after scanning.\n        } else filters\n    }\n\n    override def pushedFilters(): Array[Filter] = acceptedFilters\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/datasource/DynamoTable.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2019 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.datasource\n\nimport java.util\n\nimport com.audienceproject.spark.dynamodb.connector.{TableConnector, TableIndexConnector}\nimport org.apache.spark.sql.SparkSession\nimport org.apache.spark.sql.connector.catalog._\nimport org.apache.spark.sql.connector.read.ScanBuilder\nimport org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}\nimport org.apache.spark.sql.types._\nimport org.apache.spark.sql.util.CaseInsensitiveStringMap\nimport org.slf4j.LoggerFactory\n\nimport scala.collection.JavaConverters._\n\nclass DynamoTable(options: CaseInsensitiveStringMap,\n                  userSchema: Option[StructType] = None)\n    extends Table\n        with SupportsRead\n        with SupportsWrite {\n\n    private val logger = LoggerFactory.getLogger(this.getClass)\n\n    private val dynamoConnector = {\n        val indexName = Option(options.get(\"indexname\"))\n        val defaultParallelism = Option(options.get(\"defaultparallelism\")).map(_.toInt).getOrElse(getDefaultParallelism)\n        val optionsMap = Map(options.asScala.toSeq: _*)\n\n        if (indexName.isDefined) new TableIndexConnector(name(), indexName.get, defaultParallelism, optionsMap)\n        else new TableConnector(name(), defaultParallelism, optionsMap)\n    }\n\n    override def name(): String = options.get(\"tablename\")\n\n    override def schema(): StructType = userSchema.getOrElse(inferSchema())\n\n    override def capabilities(): util.Set[TableCapability] =\n        Set(TableCapability.BATCH_READ, TableCapability.BATCH_WRITE, TableCapability.ACCEPT_ANY_SCHEMA).asJava\n\n    override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {\n        new DynamoScanBuilder(dynamoConnector, schema())\n    }\n\n    override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {\n        val parameters = Map(info.options().asScala.toSeq: _*)\n        dynamoConnector match {\n            case tableConnector: TableConnector => new DynamoWriteBuilder(tableConnector, parameters, info.schema())\n            case _ => throw new RuntimeException(\"Unable to write to a GSI, please omit `indexName` option.\")\n        }\n    }\n\n    private def getDefaultParallelism: Int =\n        SparkSession.getActiveSession match {\n            case Some(spark) => spark.sparkContext.defaultParallelism\n            case None =>\n                logger.warn(\"Unable to read defaultParallelism from SparkSession.\" +\n                    \" Parallelism will be 1 unless overwritten with option `defaultParallelism`\")\n                1\n        }\n\n    private def inferSchema(): StructType = {\n        val inferenceItems =\n            if (dynamoConnector.nonEmpty && options.getBoolean(\"inferSchema\",true)) dynamoConnector.scan(0, Seq.empty, Seq.empty).firstPage().getLowLevelResult.getItems.asScala\n            else Seq.empty\n\n        val typeMapping = inferenceItems.foldLeft(Map[String, DataType]())({\n            case (map, item) => map ++ item.asMap().asScala.mapValues(inferType)\n        })\n        val typeSeq = typeMapping.map({ case (name, sparkType) => StructField(name, sparkType) }).toSeq\n\n        if (typeSeq.size > 100) throw new RuntimeException(\"Schema inference not possible, too many attributes in table.\")\n\n        StructType(typeSeq)\n    }\n\n    private def inferType(value: Any): DataType = value match {\n        case number: java.math.BigDecimal =>\n            if (number.scale() == 0) {\n                if (number.precision() < 10) IntegerType\n                else if (number.precision() < 19) LongType\n                else DataTypes.createDecimalType(number.precision(), number.scale())\n            }\n            else DoubleType\n        case list: java.util.ArrayList[_] =>\n            if (list.isEmpty) ArrayType(StringType)\n            else ArrayType(inferType(list.get(0)))\n        case set: java.util.Set[_] =>\n            if (set.isEmpty) ArrayType(StringType)\n            else ArrayType(inferType(set.iterator().next()))\n        case map: java.util.Map[String, _] =>\n            val mapFields = (for ((fieldName, fieldValue) <- map.asScala) yield {\n                StructField(fieldName, inferType(fieldValue))\n            }).toSeq\n            StructType(mapFields)\n        case _: java.lang.Boolean => BooleanType\n        case _: Array[Byte] => BinaryType\n        case _ => StringType\n    }\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/datasource/DynamoWriteBuilder.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2019 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.datasource\n\nimport com.audienceproject.spark.dynamodb.connector.TableConnector\nimport org.apache.spark.sql.connector.write._\nimport org.apache.spark.sql.types.StructType\n\nclass DynamoWriteBuilder(connector: TableConnector, parameters: Map[String, String], schema: StructType)\n    extends WriteBuilder {\n\n    override def buildForBatch(): BatchWrite = new BatchWrite {\n        override def createBatchWriterFactory(info: PhysicalWriteInfo): DataWriterFactory =\n            new DynamoWriterFactory(connector, parameters, schema)\n\n        override def commit(messages: Array[WriterCommitMessage]): Unit = {}\n\n        override def abort(messages: Array[WriterCommitMessage]): Unit = {}\n    }\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/datasource/DynamoWriterFactory.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2019 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.datasource\n\nimport com.audienceproject.spark.dynamodb.connector.{ColumnSchema, TableConnector}\nimport org.apache.spark.sql.catalyst.InternalRow\nimport org.apache.spark.sql.connector.write.{DataWriter, DataWriterFactory}\nimport org.apache.spark.sql.types.StructType\n\nclass DynamoWriterFactory(connector: TableConnector,\n                          parameters: Map[String, String],\n                          schema: StructType)\n    extends DataWriterFactory {\n\n    private val batchSize = parameters.getOrElse(\"writebatchsize\", \"25\").toInt\n    private val update = parameters.getOrElse(\"update\", \"false\").toBoolean\n    private val delete = parameters.getOrElse(\"delete\", \"false\").toBoolean\n\n    private val region = parameters.get(\"region\")\n    private val roleArn = parameters.get(\"rolearn\")\n    private val providerClassName = parameters.get(\"providerclassname\")\n\n    override def createWriter(partitionId: Int, taskId: Long): DataWriter[InternalRow] = {\n        val columnSchema = new ColumnSchema(connector.keySchema, schema)\n        val client = connector.getDynamoDB(region, roleArn, providerClassName)\n        if (update) {\n            assert(!delete, \"Please provide exactly one of 'update' or 'delete' options.\")\n            new DynamoDataUpdateWriter(columnSchema, connector, client)\n        } else if (delete) {\n            new DynamoDataDeleteWriter(batchSize, columnSchema, connector, client)\n        } else {\n            new DynamoDataWriter(batchSize, columnSchema, connector, client)\n        }\n    }\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/datasource/OutputPartitioning.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2019 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.datasource\n\nimport org.apache.spark.sql.connector.read.partitioning.{Distribution, Partitioning}\n\nclass OutputPartitioning(override val numPartitions: Int) extends Partitioning {\n\n    override def satisfy(distribution: Distribution): Boolean = false\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/datasource/ScanPartition.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2019 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.datasource\n\nimport org.apache.spark.sql.connector.read.InputPartition\nimport org.apache.spark.sql.sources.Filter\n\nclass ScanPartition(val partitionIndex: Int,\n                    val requiredColumns: Seq[String],\n                    val filters: Array[Filter])\n    extends InputPartition\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/datasource/TypeConversion.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2019 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.datasource\n\nimport com.amazonaws.services.dynamodbv2.document.{IncompatibleTypeException, Item}\nimport org.apache.spark.sql.catalyst.InternalRow\nimport org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}\nimport org.apache.spark.sql.types._\nimport org.apache.spark.unsafe.types.UTF8String\n\nimport scala.collection.JavaConverters._\n\nprivate[dynamodb] object TypeConversion {\n\n    def apply(attrName: String, sparkType: DataType): Item => Any =\n\n        sparkType match {\n            case BooleanType => nullableGet(_.getBOOL)(attrName)\n            case StringType => nullableGet(item => attrName => UTF8String.fromString(item.getString(attrName)))(attrName)\n            case IntegerType => nullableGet(_.getInt)(attrName)\n            case LongType => nullableGet(_.getLong)(attrName)\n            case DoubleType => nullableGet(_.getDouble)(attrName)\n            case FloatType => nullableGet(_.getFloat)(attrName)\n            case BinaryType => nullableGet(_.getBinary)(attrName)\n            case DecimalType() => nullableGet(_.getNumber)(attrName)\n            case ArrayType(innerType, _) =>\n                nullableGet(_.getList)(attrName).andThen(extractArray(convertValue(innerType)))\n            case MapType(keyType, valueType, _) =>\n                if (keyType != StringType) throw new IllegalArgumentException(s\"Invalid Map key type '${keyType.typeName}'. DynamoDB only supports String as Map key type.\")\n                nullableGet(_.getRawMap)(attrName).andThen(extractMap(convertValue(valueType)))\n            case StructType(fields) =>\n                val nestedConversions = fields.collect({ case StructField(name, dataType, _, _) => name -> convertValue(dataType) })\n                nullableGet(_.getRawMap)(attrName).andThen(extractStruct(nestedConversions))\n            case _ => throw new IllegalArgumentException(s\"Spark DataType '${sparkType.typeName}' could not be mapped to a corresponding DynamoDB data type.\")\n        }\n\n    private val stringConverter = (value: Any) => UTF8String.fromString(value.asInstanceOf[String])\n\n    private def convertValue(sparkType: DataType): Any => Any =\n\n        sparkType match {\n            case IntegerType => nullableConvert(_.intValue())\n            case LongType => nullableConvert(_.longValue())\n            case DoubleType => nullableConvert(_.doubleValue())\n            case FloatType => nullableConvert(_.floatValue())\n            case DecimalType() => nullableConvert(identity)\n            case ArrayType(innerType, _) => extractArray(convertValue(innerType))\n            case MapType(keyType, valueType, _) =>\n                if (keyType != StringType) throw new IllegalArgumentException(s\"Invalid Map key type '${keyType.typeName}'. DynamoDB only supports String as Map key type.\")\n                extractMap(convertValue(valueType))\n            case StructType(fields) =>\n                val nestedConversions = fields.collect({ case StructField(name, dataType, _, _) => name -> convertValue(dataType) })\n                extractStruct(nestedConversions)\n            case BooleanType => {\n                case boolean: Boolean => boolean\n                case _ => null\n            }\n            case StringType => {\n                case string: String => UTF8String.fromString(string)\n                case _ => null\n            }\n            case BinaryType => {\n                case byteArray: Array[Byte] => byteArray\n                case _ => null\n            }\n            case _ => throw new IllegalArgumentException(s\"Spark DataType '${sparkType.typeName}' could not be mapped to a corresponding DynamoDB data type.\")\n        }\n\n    private def nullableGet(getter: Item => String => Any)(attrName: String): Item => Any = {\n        case item if item.hasAttribute(attrName) => try getter(item)(attrName) catch {\n            case _: NumberFormatException => null\n            case _: IncompatibleTypeException => null\n        }\n        case _ => null\n    }\n\n    private def nullableConvert(converter: java.math.BigDecimal => Any): Any => Any = {\n        case item: java.math.BigDecimal => converter(item)\n        case _ => null\n    }\n\n    private def extractArray(converter: Any => Any): Any => Any = {\n        case list: java.util.List[_] => new GenericArrayData(list.asScala.map(converter))\n        case set: java.util.Set[_] => new GenericArrayData(set.asScala.map(converter).toSeq)\n        case _ => null\n    }\n\n    private def extractMap(converter: Any => Any): Any => Any = {\n        case map: java.util.Map[_, _] => ArrayBasedMapData(map, stringConverter, converter)\n        case _ => null\n    }\n\n    private def extractStruct(conversions: Seq[(String, Any => Any)]): Any => Any = {\n        case map: java.util.Map[_, _] => InternalRow.fromSeq(conversions.map({\n            case (name, conv) => conv(map.get(name))\n        }))\n        case _ => null\n    }\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/implicits.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2018 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb\n\nimport com.audienceproject.spark.dynamodb.reflect.SchemaAnalysis\nimport org.apache.spark.sql._\nimport org.apache.spark.sql.catalyst.encoders.ExpressionEncoder\nimport org.apache.spark.sql.functions.col\n\nimport scala.reflect.ClassTag\nimport scala.reflect.runtime.universe.TypeTag\n\nobject implicits {\n\n    implicit class DynamoDBDataFrameReader(reader: DataFrameReader) {\n\n        def dynamodb(tableName: String): DataFrame =\n            getDynamoDBSource(tableName).load()\n\n        def dynamodb(tableName: String, indexName: String): DataFrame =\n            getDynamoDBSource(tableName).option(\"indexName\", indexName).load()\n\n        def dynamodbAs[T <: Product : ClassTag : TypeTag](tableName: String): Dataset[T] = {\n            implicit val encoder: Encoder[T] = ExpressionEncoder()\n            val (schema, aliasMap) = SchemaAnalysis[T]\n            getColumnsAlias(getDynamoDBSource(tableName).schema(schema).load(), aliasMap).as\n        }\n\n        def dynamodbAs[T <: Product : ClassTag : TypeTag](tableName: String, indexName: String): Dataset[T] = {\n            implicit val encoder: Encoder[T] = ExpressionEncoder()\n            val (schema, aliasMap) = SchemaAnalysis[T]\n            getColumnsAlias(\n                getDynamoDBSource(tableName).option(\"indexName\", indexName).schema(schema).load(), aliasMap).as\n        }\n\n        private def getDynamoDBSource(tableName: String): DataFrameReader =\n            reader.format(\"com.audienceproject.spark.dynamodb.datasource\").option(\"tableName\", tableName)\n\n        private def getColumnsAlias(dataFrame: DataFrame, aliasMap: Map[String, String]): DataFrame = {\n            if (aliasMap.isEmpty) dataFrame\n            else {\n                val columnsAlias = dataFrame.columns.map({\n                    case name if aliasMap.isDefinedAt(name) => col(name) as aliasMap(name)\n                    case name => col(name)\n                })\n                dataFrame.select(columnsAlias: _*)\n            }\n        }\n\n    }\n\n    implicit class DynamoDBDataFrameWriter[T](writer: DataFrameWriter[T]) {\n\n        def dynamodb(tableName: String): Unit =\n            writer.format(\"com.audienceproject.spark.dynamodb.datasource\")\n                .mode(SaveMode.Append)\n                .option(\"tableName\", tableName)\n                .save()\n\n    }\n\n}\n"
  },
  {
    "path": "src/main/scala/com/audienceproject/spark/dynamodb/reflect/SchemaAnalysis.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2018 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb.reflect\n\nimport com.audienceproject.spark.dynamodb.attribute\nimport org.apache.spark.sql.catalyst.ScalaReflection\nimport org.apache.spark.sql.types.{StructField, StructType}\n\nimport scala.reflect.ClassTag\nimport scala.reflect.runtime.{universe => ru}\n\n/**\n  * Uses reflection to perform a static analysis that can derive a Spark schema from a case class of type `T`.\n  */\nprivate[dynamodb] object SchemaAnalysis {\n\n    def apply[T <: Product : ClassTag : ru.TypeTag]: (StructType, Map[String, String]) = {\n\n        val runtimeMirror = ru.runtimeMirror(getClass.getClassLoader)\n\n        val classObj = scala.reflect.classTag[T].runtimeClass\n        val classSymbol = runtimeMirror.classSymbol(classObj)\n\n        val params = classSymbol.primaryConstructor.typeSignature.paramLists.head\n        val (sparkFields, aliasMap) = params.foldLeft((List.empty[StructField], Map.empty[String, String]))({\n            case ((list, map), field) =>\n                val sparkType = ScalaReflection.schemaFor(field.typeSignature).dataType\n\n                // Black magic from here:\n                // https://stackoverflow.com/questions/23046958/accessing-an-annotation-value-in-scala\n                val attrName = field.annotations.collectFirst({\n                    case ann: ru.AnnotationApi if ann.tree.tpe =:= ru.typeOf[attribute] =>\n                        ann.tree.children.tail.collectFirst({\n                            case ru.Literal(ru.Constant(name: String)) => name\n                        })\n                }).flatten\n\n                if (attrName.isDefined) {\n                    val sparkField = StructField(attrName.get, sparkType, nullable = true)\n                    (list :+ sparkField, map + (attrName.get -> field.name.toString))\n                } else {\n                    val sparkField = StructField(field.name.toString, sparkType, nullable = true)\n                    (list :+ sparkField, map)\n                }\n        })\n\n        (StructType(sparkFields), aliasMap)\n    }\n\n}\n"
  },
  {
    "path": "src/test/resources/log4j2.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<Configuration status=\"WARN\" name=\"Log4j2 configuration\">\n    <Appenders>\n        <Console target=\"SYSTEM_OUT\" name=\"console\">\n            <PatternLayout pattern=\"%highlight{[%-5level][%d{HH:mm:ss.SSS}][%logger{36}]} %msg%n\" />\n        </Console>\n        <Console target=\"SYSTEM_OUT\" name=\"simple-console\">\n            <PatternLayout pattern=\"%msg%n\" />\n        </Console>\n    </Appenders>\n    <Loggers>\n        <Root level=\"INFO\">\n            <AppenderRef ref=\"console\" />\n        </Root>\n        <logger name=\"org.apache.spark\" level=\"WARN\">\n            <AppenderRef ref=\"simple-console\"/>\n        </logger>\n        <logger name=\"org.spark_project.jetty\" level=\"WARN\">\n            <AppenderRef ref=\"simple-console\"/>\n        </logger>\n        <logger name=\"com.amazonaws.services.dynamodbv2.local\" level=\"WARN\">\n            <AppenderRef ref=\"simple-console\"/>\n        </logger>\n        <logger name=\"com.amazonaws.auth.profile.internal.BasicProfileConfigLoader\" level=\"ERROR\">\n            <AppenderRef ref=\"simple-console\"/>\n        </logger>\n        <Logger name=\"MessageOnly\" level=\"INFO\" additivity=\"false\">\n            <AppenderRef ref=\"simple-console\"/>\n        </Logger>\n    </Loggers>\n</Configuration>\n"
  },
  {
    "path": "src/test/scala/com/audienceproject/spark/dynamodb/AbstractInMemoryTest.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2018 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb\n\nimport com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration\nimport com.amazonaws.services.dynamodbv2.document.{DynamoDB, Item}\nimport com.amazonaws.services.dynamodbv2.local.main.ServerRunner\nimport com.amazonaws.services.dynamodbv2.local.server.DynamoDBProxyServer\nimport com.amazonaws.services.dynamodbv2.model.{AttributeDefinition, CreateTableRequest, KeySchemaElement, ProvisionedThroughput}\nimport com.amazonaws.services.dynamodbv2.{AmazonDynamoDB, AmazonDynamoDBClientBuilder}\nimport org.apache.spark.sql.SparkSession\nimport org.scalatest.{BeforeAndAfterAll, FunSuite}\n\nclass AbstractInMemoryTest extends FunSuite with BeforeAndAfterAll {\n\n    val server: DynamoDBProxyServer = ServerRunner.createServerFromCommandLineArgs(Array(\"-inMemory\"))\n\n    val client: AmazonDynamoDB = AmazonDynamoDBClientBuilder.standard()\n        .withEndpointConfiguration(new EndpointConfiguration(System.getProperty(\"aws.dynamodb.endpoint\"), \"us-east-1\"))\n        .build()\n    val dynamoDB: DynamoDB = new DynamoDB(client)\n\n    val spark: SparkSession = SparkSession.builder\n        .master(\"local\")\n        .appName(this.getClass.getName)\n        .getOrCreate()\n\n    spark.sparkContext.setLogLevel(\"ERROR\")\n\n    override def beforeAll(): Unit = {\n        server.start()\n\n        // Create a test table.\n        dynamoDB.createTable(new CreateTableRequest()\n            .withTableName(\"TestFruit\")\n            .withAttributeDefinitions(new AttributeDefinition(\"name\", \"S\"))\n            .withKeySchema(new KeySchemaElement(\"name\", \"HASH\"))\n            .withProvisionedThroughput(new ProvisionedThroughput(5L, 5L)))\n\n        // Populate with test data.\n        val table = dynamoDB.getTable(\"TestFruit\")\n        for ((name, color, weight) <- Seq(\n            (\"apple\", \"red\", 0.2), (\"banana\", \"yellow\", 0.15), (\"watermelon\", \"red\", 0.5),\n            (\"grape\", \"green\", 0.01), (\"pear\", \"green\", 0.2), (\"kiwi\", \"green\", 0.05),\n            (\"blackberry\", \"purple\", 0.01), (\"blueberry\", \"purple\", 0.01), (\"plum\", \"purple\", 0.1)\n        )) {\n            table.putItem(new Item()\n                .withString(\"name\", name)\n                .withString(\"color\", color)\n                .withDouble(\"weightKg\", weight))\n        }\n    }\n\n    override def afterAll(): Unit = {\n        client.deleteTable(\"TestFruit\")\n        server.stop()\n    }\n\n}\n"
  },
  {
    "path": "src/test/scala/com/audienceproject/spark/dynamodb/DefaultSourceTest.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2018 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb\n\nimport com.audienceproject.spark.dynamodb.implicits._\nimport com.audienceproject.spark.dynamodb.structs.TestFruit\nimport org.apache.spark.sql.functions._\n\nimport scala.collection.JavaConverters._\n\nclass DefaultSourceTest extends AbstractInMemoryTest {\n\n    test(\"Table count is 9\") {\n        val count = spark.read.dynamodb(\"TestFruit\")\n        count.show()\n        assert(count.count() === 9)\n    }\n\n    test(\"Column sum is 27\") {\n        val result = spark.read.dynamodb(\"TestFruit\").collectAsList().asScala\n        val numCols = result.map(_.length).sum\n        assert(numCols === 27)\n    }\n\n    test(\"Select only first two columns\") {\n        val result = spark.read.dynamodb(\"TestFruit\").select(\"name\", \"color\").collectAsList().asScala\n        val numCols = result.map(_.length).sum\n        assert(numCols === 18)\n    }\n\n    test(\"The least occurring color is yellow\") {\n        import spark.implicits._\n        val itemWithLeastOccurringColor = spark.read.dynamodb(\"TestFruit\")\n            .groupBy($\"color\").agg(count($\"color\").as(\"countColor\"))\n            .orderBy($\"countColor\")\n            .takeAsList(1).get(0)\n        assert(itemWithLeastOccurringColor.getAs[String](\"color\") === \"yellow\")\n    }\n\n    test(\"Test of attribute name alias\") {\n        import spark.implicits._\n        val itemApple = spark.read.dynamodbAs[TestFruit](\"TestFruit\")\n            .filter($\"primaryKey\" === \"apple\")\n            .takeAsList(1).get(0)\n        assert(itemApple.primaryKey === \"apple\")\n    }\n\n}\n"
  },
  {
    "path": "src/test/scala/com/audienceproject/spark/dynamodb/FilterPushdownTest.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2018 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb\n\nimport com.audienceproject.spark.dynamodb.implicits._\n\nclass FilterPushdownTest extends AbstractInMemoryTest {\n\n    test(\"Count of red fruit is 2 (`EqualTo` filter)\") {\n        import spark.implicits._\n        val fruitCount = spark.read.dynamodb(\"TestFruit\").where($\"color\" === \"red\").count()\n        assert(fruitCount === 2)\n    }\n\n    test(\"Count of yellow and green fruit is 4 (`In` filter)\") {\n        import spark.implicits._\n        val fruitCount = spark.read.dynamodb(\"TestFruit\")\n            .where($\"color\" isin(\"yellow\", \"green\"))\n            .count()\n        assert(fruitCount === 4)\n    }\n\n    test(\"Count of 0.01 weight fruit is 4 (`In` filter)\") {\n        import spark.implicits._\n        val fruitCount = spark.read.dynamodb(\"TestFruit\")\n            .where($\"weightKg\" isin 0.01)\n            .count()\n        assert(fruitCount === 3)\n    }\n\n    test(\"Only 'banana' starts with a 'b' and is >0.01 kg (`StringStartsWith`, `GreaterThan`, `And` filters)\") {\n        import spark.implicits._\n        val fruit = spark.read.dynamodb(\"TestFruit\")\n            .where(($\"name\" startsWith \"b\") && ($\"weightKg\" > 0.01))\n            .collectAsList().get(0)\n        assert(fruit.getAs[String](\"name\") === \"banana\")\n    }\n\n}\n"
  },
  {
    "path": "src/test/scala/com/audienceproject/spark/dynamodb/NestedDataStructuresTest.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2018 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb\n\nimport com.amazonaws.services.dynamodbv2.model.{AttributeDefinition, CreateTableRequest, KeySchemaElement, ProvisionedThroughput}\nimport com.audienceproject.spark.dynamodb.implicits._\nimport com.audienceproject.spark.dynamodb.structs.{TestFruitProperties, TestFruitWithProperties}\nimport org.apache.spark.sql.Row\nimport org.apache.spark.sql.functions.struct\nimport org.apache.spark.sql.types._\n\nclass NestedDataStructuresTest extends AbstractInMemoryTest {\n\n    test(\"Insert ArrayType\") {\n        dynamoDB.createTable(new CreateTableRequest()\n            .withTableName(\"InsertTestList\")\n            .withAttributeDefinitions(new AttributeDefinition(\"name\", \"S\"))\n            .withKeySchema(new KeySchemaElement(\"name\", \"HASH\"))\n            .withProvisionedThroughput(new ProvisionedThroughput(5L, 5L)))\n\n        import spark.implicits._\n\n        val fruitSchema = StructType(\n            Seq(\n                StructField(\"name\", StringType, nullable = false),\n                StructField(\"color\", StringType, nullable = false),\n                StructField(\"weight\", DoubleType, nullable = false),\n                StructField(\"properties\", ArrayType(StringType, containsNull = false), nullable = false)\n            ))\n\n        val rows = spark.sparkContext.parallelize(Seq(\n            Row(\"lemon\", \"yellow\", 0.1, Seq(\"fresh\", \"2 dkk\")),\n            Row(\"orange\", \"orange\", 0.2, Seq(\"too ripe\", \"1 dkk\")),\n            Row(\"pomegranate\", \"red\", 0.2, Seq(\"freshness\", \"4 dkk\"))\n        ))\n\n        val newItemsDs = spark.createDataFrame(rows, fruitSchema)\n\n        newItemsDs.printSchema()\n        newItemsDs.show(false)\n\n        newItemsDs.write.dynamodb(\"InsertTestList\")\n\n        println(\"Writing successful.\")\n\n        val validationDs = spark.read.dynamodb(\"InsertTestList\")\n        assert(validationDs.count() === 3)\n        assert(validationDs.select($\"properties\".as[Seq[String]]).collect().forall(Seq(\n            Seq(\"fresh\", \"2 dkk\"),\n            Seq(\"too ripe\", \"1 dkk\"),\n            Seq(\"freshness\", \"4 dkk\")\n        ) contains _))\n    }\n\n    test(\"Insert MapType\") {\n        dynamoDB.createTable(new CreateTableRequest()\n            .withTableName(\"InsertTestMap\")\n            .withAttributeDefinitions(new AttributeDefinition(\"name\", \"S\"))\n            .withKeySchema(new KeySchemaElement(\"name\", \"HASH\"))\n            .withProvisionedThroughput(new ProvisionedThroughput(5L, 5L)))\n\n        import spark.implicits._\n\n        val fruitSchema = StructType(\n            Seq(\n                StructField(\"name\", StringType, nullable = false),\n                StructField(\"color\", StringType, nullable = false),\n                StructField(\"weight\", DoubleType, nullable = false),\n                StructField(\"properties\", MapType(StringType, StringType, valueContainsNull = false))\n            ))\n\n        val rows = spark.sparkContext.parallelize(Seq(\n            Row(\"lemon\", \"yellow\", 0.1, Map(\"freshness\" -> \"fresh\", \"eco\" -> \"yes\", \"price\" -> \"2 dkk\")),\n            Row(\"orange\", \"orange\", 0.2, Map(\"freshness\" -> \"too ripe\", \"eco\" -> \"no\", \"price\" -> \"1 dkk\")),\n            Row(\"pomegranate\", \"red\", 0.2, Map(\"freshness\" -> \"green\", \"eco\" -> \"yes\", \"price\" -> \"4 dkk\"))\n        ))\n\n        val newItemsDs = spark.createDataFrame(rows, fruitSchema)\n\n        newItemsDs.printSchema()\n        newItemsDs.show(false)\n\n        newItemsDs.write.dynamodb(\"InsertTestMap\")\n\n        println(\"Writing successful.\")\n\n        val validationDs = spark.read.schema(fruitSchema).dynamodb(\"InsertTestMap\")\n        validationDs.show(false)\n        assert(validationDs.count() === 3)\n        assert(validationDs.select($\"properties\".as[Map[String, String]]).collect().forall(Seq(\n            Map(\"freshness\" -> \"fresh\", \"eco\" -> \"yes\", \"price\" -> \"2 dkk\"),\n            Map(\"freshness\" -> \"too ripe\", \"eco\" -> \"no\", \"price\" -> \"1 dkk\"),\n            Map(\"freshness\" -> \"green\", \"eco\" -> \"yes\", \"price\" -> \"4 dkk\")\n        ) contains _))\n    }\n\n    test(\"Insert ArrayType with nested MapType\") {\n        dynamoDB.createTable(new CreateTableRequest()\n            .withTableName(\"InsertTestListMap\")\n            .withAttributeDefinitions(new AttributeDefinition(\"name\", \"S\"))\n            .withKeySchema(new KeySchemaElement(\"name\", \"HASH\"))\n            .withProvisionedThroughput(new ProvisionedThroughput(5L, 5L)))\n\n        import spark.implicits._\n\n        val fruitSchema = StructType(\n            Seq(\n                StructField(\"name\", StringType, nullable = false),\n                StructField(\"color\", StringType, nullable = false),\n                StructField(\"weight\", DoubleType, nullable = false),\n                StructField(\"properties\", ArrayType(MapType(StringType, StringType, valueContainsNull = false), containsNull = false), nullable = false)\n            ))\n\n        val rows = spark.sparkContext.parallelize(Seq(\n            Row(\"lemon\", \"yellow\", 0.1, Seq(Map(\"freshness\" -> \"fresh\", \"eco\" -> \"yes\", \"price\" -> \"2 dkk\"))),\n            Row(\"orange\", \"orange\", 0.2, Seq(Map(\"freshness\" -> \"too ripe\", \"eco\" -> \"no\", \"price\" -> \"1 dkk\"))),\n            Row(\"pomegranate\", \"red\", 0.2, Seq(Map(\"freshness\" -> \"green\", \"eco\" -> \"yes\", \"price\" -> \"4 dkk\")))\n        ))\n\n        val newItemsDs = spark.createDataFrame(rows, fruitSchema)\n\n        newItemsDs.printSchema()\n        newItemsDs.show(false)\n\n        newItemsDs.write.dynamodb(\"InsertTestListMap\")\n\n        println(\"Writing successful.\")\n\n        val validationDs = spark.read.schema(fruitSchema).dynamodb(\"InsertTestListMap\")\n        validationDs.show(false)\n        assert(validationDs.count() === 3)\n        assert(validationDs.select($\"properties\".as[Seq[Map[String, String]]]).collect().forall(Seq(\n            Seq(Map(\"freshness\" -> \"fresh\", \"eco\" -> \"yes\", \"price\" -> \"2 dkk\")),\n            Seq(Map(\"freshness\" -> \"too ripe\", \"eco\" -> \"no\", \"price\" -> \"1 dkk\")),\n            Seq(Map(\"freshness\" -> \"green\", \"eco\" -> \"yes\", \"price\" -> \"4 dkk\"))\n        ) contains _))\n    }\n\n    test(\"Insert StructType\") {\n        dynamoDB.createTable(new CreateTableRequest()\n            .withTableName(\"InsertTestStruct\")\n            .withAttributeDefinitions(new AttributeDefinition(\"name\", \"S\"))\n            .withKeySchema(new KeySchemaElement(\"name\", \"HASH\"))\n            .withProvisionedThroughput(new ProvisionedThroughput(5L, 5L)))\n\n        import spark.implicits._\n\n        val fruitSchema = StructType(\n            Seq(\n                StructField(\"name\", StringType, nullable = false),\n                StructField(\"color\", StringType, nullable = false),\n                StructField(\"weight\", DoubleType, nullable = false),\n                StructField(\"freshness\", StringType, nullable = false),\n                StructField(\"eco\", BooleanType, nullable = false),\n                StructField(\"price\", DoubleType, nullable = false)\n            ))\n\n        val rows = spark.sparkContext.parallelize(Seq(\n            Row(\"lemon\", \"yellow\", 0.1, \"fresh\", true, 2.0),\n            Row(\"pomegranate\", \"red\", 0.2, \"green\", true, 4.0)\n        ))\n\n        val newItemsDs = spark.createDataFrame(rows, fruitSchema).select(\n            $\"name\",\n            $\"color\",\n            $\"weight\",\n            struct($\"freshness\", $\"eco\", $\"price\") as \"properties\"\n        )\n\n        newItemsDs.printSchema()\n        newItemsDs.show(false)\n\n        newItemsDs.write.dynamodb(\"InsertTestStruct\")\n\n        println(\"Writing successful.\")\n\n        val validationDs = spark.read.dynamodbAs[TestFruitWithProperties](\"InsertTestStruct\")\n        assert(validationDs.count() === 2)\n        assert(validationDs.select($\"properties\".as[TestFruitProperties]).collect().forall(Seq(\n            TestFruitProperties(\"fresh\", eco = true, 2.0),\n            TestFruitProperties(\"green\", eco = true, 4.0)\n        ) contains _))\n    }\n\n}\n"
  },
  {
    "path": "src/test/scala/com/audienceproject/spark/dynamodb/NullBooleanTest.scala",
    "content": "package com.audienceproject.spark.dynamodb\n\nimport com.amazonaws.services.dynamodbv2.document.Item\nimport com.amazonaws.services.dynamodbv2.model.{\n    AttributeDefinition,\n    CreateTableRequest,\n    KeySchemaElement,\n    ProvisionedThroughput\n}\nimport com.audienceproject.spark.dynamodb.implicits._\n\nclass NullBooleanTest extends AbstractInMemoryTest {\n    test(\"Test Null\") {\n        dynamoDB.createTable(\n            new CreateTableRequest()\n                .withTableName(\"TestNullBoolean\")\n                .withAttributeDefinitions(new AttributeDefinition(\"Pk\", \"S\"))\n                .withKeySchema(new KeySchemaElement(\"Pk\", \"HASH\"))\n                .withProvisionedThroughput(new ProvisionedThroughput(5L, 5L))\n        )\n\n        val table = dynamoDB.getTable(\"TestNullBoolean\")\n\n        for ((_pk, _type, _value) <- Seq(\n            (\"id1\", \"type1\", true),\n            (\"id2\", \"type2\", null)\n        )) {\n            if (_type != \"type2\") {\n                table.putItem(\n                    new Item()\n                        .withString(\"Pk\", _pk)\n                        .withString(\"Type\", _type)\n                        .withBoolean(\"Value\", _value.asInstanceOf[Boolean])\n                )\n            } else {\n                table.putItem(\n                    new Item()\n                        .withString(\"Pk\", _pk)\n                        .withString(\"Type\", _type)\n                        .withNull(\"Value\")\n                )\n            }\n        }\n\n        val df = spark.read.dynamodbAs[BooleanClass](\"TestNullBoolean\")\n\n        import spark.implicits._\n        df.where($\"Type\" === \"type2\").show()\n        client.deleteTable(\"TestNullBoolean\")\n    }\n}\n\ncase class BooleanClass(Pk: String, Type: String, Value: Boolean)\n"
  },
  {
    "path": "src/test/scala/com/audienceproject/spark/dynamodb/NullValuesTest.scala",
    "content": "package com.audienceproject.spark.dynamodb\n\nimport com.amazonaws.services.dynamodbv2.model.{AttributeDefinition, CreateTableRequest, KeySchemaElement, ProvisionedThroughput}\nimport com.audienceproject.spark.dynamodb.implicits._\nimport org.apache.spark.sql.Row\nimport org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}\n\nclass NullValuesTest extends AbstractInMemoryTest {\n\n    test(\"Insert nested StructType with null values\") {\n        dynamoDB.createTable(new CreateTableRequest()\n            .withTableName(\"NullTest\")\n            .withAttributeDefinitions(new AttributeDefinition(\"name\", \"S\"))\n            .withKeySchema(new KeySchemaElement(\"name\", \"HASH\"))\n            .withProvisionedThroughput(new ProvisionedThroughput(5L, 5L)))\n\n        val schema = StructType(\n            Seq(\n                StructField(\"name\", StringType, nullable = false),\n                StructField(\"info\", StructType(\n                    Seq(\n                        StructField(\"age\", IntegerType, nullable = true),\n                        StructField(\"address\", StringType, nullable = true)\n                    )\n                ), nullable = true)\n            )\n        )\n\n        val rows = spark.sparkContext.parallelize(Seq(\n            Row(\"one\", Row(30, \"Somewhere\")),\n            Row(\"two\", null),\n            Row(\"three\", Row(null, null))\n        ))\n\n        val newItemsDs = spark.createDataFrame(rows, schema)\n\n        newItemsDs.write.dynamodb(\"NullTest\")\n\n        val validationDs = spark.read.dynamodb(\"NullTest\")\n\n        validationDs.show(false)\n    }\n\n}\n"
  },
  {
    "path": "src/test/scala/com/audienceproject/spark/dynamodb/RegionTest.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2018 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb\n\nimport com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration\nimport com.amazonaws.services.dynamodbv2.{AmazonDynamoDB, AmazonDynamoDBClientBuilder}\nimport com.amazonaws.services.dynamodbv2.document.DynamoDB\nimport com.amazonaws.services.dynamodbv2.model.{AttributeDefinition, CreateTableRequest, KeySchemaElement, ProvisionedThroughput}\nimport com.audienceproject.spark.dynamodb.implicits._\n\nclass RegionTest extends AbstractInMemoryTest {\n\n    test(\"Inserting from a local Dataset\") {\n        val tableName = \"RegionTest1\"\n        dynamoDB.createTable(new CreateTableRequest()\n            .withTableName(tableName)\n            .withAttributeDefinitions(new AttributeDefinition(\"name\", \"S\"))\n            .withKeySchema(new KeySchemaElement(\"name\", \"HASH\"))\n            .withProvisionedThroughput(new ProvisionedThroughput(5L, 5L)))\n        val client: AmazonDynamoDB = AmazonDynamoDBClientBuilder.standard()\n            .withEndpointConfiguration(new EndpointConfiguration(System.getProperty(\"aws.dynamodb.endpoint\"), \"eu-central-1\"))\n            .build()\n        val dynamoDBEU: DynamoDB = new DynamoDB(client)\n        dynamoDBEU.createTable(new CreateTableRequest()\n            .withTableName(tableName)\n            .withAttributeDefinitions(new AttributeDefinition(\"name\", \"S\"))\n            .withKeySchema(new KeySchemaElement(\"name\", \"HASH\"))\n            .withProvisionedThroughput(new ProvisionedThroughput(5L, 5L)))\n\n        import spark.implicits._\n\n        val newItemsDs = spark.createDataset(Seq(\n            (\"lemon\", \"yellow\", 0.1),\n            (\"orange\", \"orange\", 0.2),\n            (\"pomegranate\", \"red\", 0.2)\n        ))\n            .withColumnRenamed(\"_1\", \"name\")\n            .withColumnRenamed(\"_2\", \"color\")\n            .withColumnRenamed(\"_3\", \"weight\")\n        newItemsDs.write.option(\"region\",\"eu-central-1\").dynamodb(tableName)\n\n        val validationDs = spark.read.dynamodb(tableName)\n        assert(validationDs.count() === 0)\n        val validationDsEU = spark.read.option(\"region\",\"eu-central-1\").dynamodb(tableName)\n        assert(validationDsEU.count() === 3)\n    }\n\n}\n"
  },
  {
    "path": "src/test/scala/com/audienceproject/spark/dynamodb/WriteRelationTest.scala",
    "content": "/**\n  * Licensed to the Apache Software Foundation (ASF) under one\n  * or more contributor license agreements.  See the NOTICE file\n  * distributed with this work for additional information\n  * regarding copyright ownership.  The ASF licenses this file\n  * to you under the Apache License, Version 2.0 (the\n  * \"License\"); you may not use this file except in compliance\n  * with the License.  You may obtain a copy of the License at\n  *\n  * http://www.apache.org/licenses/LICENSE-2.0\n  *\n  * Unless required by applicable law or agreed to in writing,\n  * software distributed under the License is distributed on an\n  * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n  * KIND, either express or implied.  See the License for the\n  * specific language governing permissions and limitations\n  * under the License.\n  *\n  * Copyright © 2018 AudienceProject. All rights reserved.\n  */\npackage com.audienceproject.spark.dynamodb\n\nimport java.util\n\nimport collection.JavaConverters._\nimport com.amazonaws.services.dynamodbv2.model.{AttributeDefinition, CreateTableRequest, KeySchemaElement, KeyType, ProvisionedThroughput}\nimport com.audienceproject.spark.dynamodb.implicits._\nimport org.apache.spark.sql.functions.{lit, when, length => sqlLength}\nimport org.scalatest.Matchers\n\nclass WriteRelationTest extends AbstractInMemoryTest with Matchers {\n\n    test(\"Inserting from a local Dataset\") {\n        dynamoDB.createTable(new CreateTableRequest()\n            .withTableName(\"InsertTest1\")\n            .withAttributeDefinitions(new AttributeDefinition(\"name\", \"S\"))\n            .withKeySchema(new KeySchemaElement(\"name\", \"HASH\"))\n            .withProvisionedThroughput(new ProvisionedThroughput(5L, 5L)))\n\n        import spark.implicits._\n\n        val newItemsDs = spark.createDataset(Seq(\n            (\"lemon\", \"yellow\", 0.1),\n            (\"orange\", \"orange\", 0.2),\n            (\"pomegranate\", \"red\", 0.2)\n        ))\n            .withColumnRenamed(\"_1\", \"name\")\n            .withColumnRenamed(\"_2\", \"color\")\n            .withColumnRenamed(\"_3\", \"weight\")\n        newItemsDs.write.dynamodb(\"InsertTest1\")\n\n        val validationDs = spark.read.dynamodb(\"InsertTest1\")\n        assert(validationDs.count() === 3)\n        assert(validationDs.select(\"name\").as[String].collect().forall(Seq(\"lemon\", \"orange\", \"pomegranate\") contains _))\n        assert(validationDs.select(\"color\").as[String].collect().forall(Seq(\"yellow\", \"orange\", \"red\") contains _))\n        assert(validationDs.select(\"weight\").as[Double].collect().forall(Seq(0.1, 0.2, 0.2) contains _))\n    }\n\n    test(\"Deleting from a local Dataset with a HashKey only\") {\n        val tablename = \"DeleteTest1\"\n        dynamoDB.createTable(new CreateTableRequest()\n            .withTableName(tablename)\n            .withAttributeDefinitions(new AttributeDefinition(\"name\", \"S\"))\n            .withKeySchema(new KeySchemaElement(\"name\", \"HASH\"))\n            .withProvisionedThroughput(new ProvisionedThroughput(5L, 5L)))\n\n        import spark.implicits._\n\n        val newItemsDs = Seq(\n            (\"lemon\", \"yellow\", 0.1),\n            (\"orange\", \"orange\", 0.2),\n            (\"pomegranate\", \"red\", 0.2)\n        ).toDF(\"name\", \"color\", \"weight\")\n        newItemsDs.write.dynamodb(tablename)\n\n        val toDelete = Seq(\n            (\"lemon\", \"yellow\"),\n            (\"orange\", \"blue\"),\n            (\"doesn't exist\", \"black\")\n        ).toDF(\"name\", \"color\")\n        toDelete.write.option(\"delete\", \"true\").dynamodb(tablename)\n\n        val validationDs = spark.read.dynamodb(tablename)\n        validationDs.count() shouldEqual 1\n        val rec = validationDs.first\n        rec.getString(rec.fieldIndex(\"name\")) shouldEqual \"pomegranate\"\n        rec.getString(rec.fieldIndex(\"color\")) shouldEqual \"red\"\n        rec.getDouble(rec.fieldIndex(\"weight\")) shouldEqual 0.2\n    }\n\n    test(\"Deleting from a local Dataset with a HashKey and RangeKey\") {\n        val tablename = \"DeleteTest2\"\n\n        dynamoDB.createTable(new CreateTableRequest()\n            .withTableName(tablename)\n            .withAttributeDefinitions(Seq(\n                new AttributeDefinition(\"name\", \"S\"),\n                new AttributeDefinition(\"weight\", \"N\")\n            ).asJavaCollection)\n            .withKeySchema(Seq(\n                new KeySchemaElement(\"name\", KeyType.HASH),\n                // also test that non-string key works\n                new KeySchemaElement(\"weight\", KeyType.RANGE)\n            ).asJavaCollection)\n            .withProvisionedThroughput(new ProvisionedThroughput(5L, 5L)))\n\n        import spark.implicits._\n\n        val newItemsDs = Seq(\n            (\"lemon\", \"yellow\", 0.1),\n            (\"lemon\", \"blue\", 4.0),\n            (\"orange\", \"orange\", 0.2),\n            (\"pomegranate\", \"red\", 0.2)\n        ).toDF(\"name\", \"color\", \"weight\")\n        newItemsDs.write.dynamodb(tablename)\n\n        val toDelete = Seq(\n            (\"lemon\", \"yellow\", 0.1),\n            (\"orange\", \"orange\", 0.2),\n            (\"pomegranate\", \"shouldn'tdelete\", 0.5)\n        ).toDF(\"name\", \"color\", \"weight\")\n        toDelete.write.option(\"delete\", \"true\").dynamodb(tablename)\n\n        val validationDs = spark.read.dynamodb(tablename)\n        validationDs.show\n        validationDs.count() shouldEqual 2\n        validationDs.select(\"name\").as[String].collect should contain theSameElementsAs Seq(\"lemon\", \"pomegranate\")\n        validationDs.select(\"color\").as[String].collect should contain theSameElementsAs Seq(\"blue\", \"red\")\n    }\n\n    test(\"Updating from a local Dataset with new and only some previous columns\") {\n        val tablename = \"UpdateTest1\"\n        dynamoDB.createTable(new CreateTableRequest()\n            .withTableName(tablename)\n            .withAttributeDefinitions(new AttributeDefinition(\"name\", \"S\"))\n            .withKeySchema(new KeySchemaElement(\"name\", \"HASH\"))\n            .withProvisionedThroughput(new ProvisionedThroughput(5L, 5L)))\n\n        import spark.implicits._\n\n        val newItemsDs = Seq(\n            (\"lemon\", \"yellow\", 0.1),\n            (\"orange\", \"orange\", 0.2),\n            (\"pomegranate\", \"red\", 0.2)\n        ).toDF(\"name\", \"color\", \"weight\")\n        newItemsDs.write.dynamodb(tablename)\n\n        newItemsDs\n            .withColumn(\"size\", sqlLength($\"color\"))\n            .drop(\"color\")\n            .withColumn(\"weight\", $\"weight\" * 2)\n            .write.option(\"update\", \"true\").dynamodb(tablename)\n\n        val validationDs = spark.read.dynamodb(tablename)\n        validationDs.show\n        assert(validationDs.count() === 3)\n        assert(validationDs.select(\"name\").as[String].collect().forall(Seq(\"lemon\", \"orange\", \"pomegranate\") contains _))\n        assert(validationDs.select(\"color\").as[String].collect().forall(Seq(\"yellow\", \"orange\", \"red\") contains _))\n        assert(validationDs.select(\"weight\").as[Double].collect().forall(Seq(0.2, 0.4, 0.4) contains _))\n        assert(validationDs.select(\"size\").as[Long].collect().forall(Seq(6, 3) contains _))\n    }\n\n    test(\"Updating from a local Dataset with null values\") {\n        val tablename = \"UpdateTest2\"\n        dynamoDB.createTable(new CreateTableRequest()\n            .withTableName(tablename)\n            .withAttributeDefinitions(new AttributeDefinition(\"name\", \"S\"))\n            .withKeySchema(new KeySchemaElement(\"name\", \"HASH\"))\n            .withProvisionedThroughput(new ProvisionedThroughput(5L, 5L)))\n\n        import spark.implicits._\n\n        val newItemsDs = Seq(\n            (\"lemon\", \"yellow\", 0.1),\n            (\"orange\", \"orange\", 0.2),\n            (\"pomegranate\", \"red\", 0.2)\n        ).toDF(\"name\", \"color\", \"weight\")\n        newItemsDs.write.dynamodb(tablename)\n\n        val alteredDs = newItemsDs\n            .withColumn(\"weight\", when($\"weight\" < 0.2, $\"weight\").otherwise(lit(null)))\n        alteredDs.show\n        alteredDs.write.option(\"update\", \"true\").dynamodb(tablename)\n\n        val validationDs = spark.read.dynamodb(tablename)\n        validationDs.show\n        assert(validationDs.count() === 3)\n        assert(validationDs.select(\"name\").as[String].collect().forall(Seq(\"lemon\", \"orange\", \"pomegranate\") contains _))\n        assert(validationDs.select(\"color\").as[String].collect().forall(Seq(\"yellow\", \"orange\", \"red\") contains _))\n        assert(validationDs.select(\"weight\").as[Double].collect().forall(Seq(0.2, 0.1) contains _))\n    }\n\n}\n"
  },
  {
    "path": "src/test/scala/com/audienceproject/spark/dynamodb/structs/TestFruit.scala",
    "content": "package com.audienceproject.spark.dynamodb.structs\n\nimport com.audienceproject.spark.dynamodb.attribute\n\ncase class TestFruit(@attribute(\"name\") primaryKey: String,\n                     color: String,\n                     weightKg: Double)\n"
  },
  {
    "path": "src/test/scala/com/audienceproject/spark/dynamodb/structs/TestFruitWithProperties.scala",
    "content": "package com.audienceproject.spark.dynamodb.structs\n\ncase class TestFruitProperties(freshness: String,\n                               eco: Boolean,\n                               price: Double)\n\ncase class TestFruitWithProperties(name: String,\n                                   color: String,\n                                   weight: Double,\n                                   properties: TestFruitProperties)\n"
  },
  {
    "path": "wercker.yml",
    "content": "box:\n    id: audienceproject/jvm\n    username: $DOCKERHUB_ACCOUNT\n    password: $DOCKERHUB_PASSWORD\n    tag: latest\n\nbuild:\n    steps:\n    - script:\n          name: Compile\n          code: sbt clean compile\n    - audienceproject/aws-cli-assume-role@1.0.2:\n        aws-access-key-id: $AWS_ACCESS_KEY\n        aws-secret-access-key: $AWS_SECRET_KEY\n        role-arn: arn:aws:iam::$AWS_ACCOUNT_ID:role/build-$WERCKER_GIT_REPOSITORY\n    - script:\n          name: Test\n          code: sbt clean compile test\n    - script:\n          name: Clean again\n          code: sbt clean\n\npublish-snapshot:\n    steps:\n    - audienceproject/sbt-to-maven-central@2.0.0:\n          user: $NEXUS_USER\n          password: $NEXUS_PASSWORD\n          private-key: $NEXUS_PK\n          passphrase: $NEXUS_PASSPHRASE\n\npublish-release:\n    steps:\n    - audienceproject/sbt-to-maven-central@2.0.0:\n          user: $NEXUS_USER\n          password: $NEXUS_PASSWORD\n          private-key: $NEXUS_PK\n          passphrase: $NEXUS_PASSPHRASE\n          destination: RELEASE\n"
  }
]