[
  {
    "path": ".gitattributes",
    "content": ".travis.yml merge=ours\n"
  },
  {
    "path": ".gitignore",
    "content": ".DS_Store\n.pio_store\n*.swp\n*.swo\nconf/pio-env.sh\ntarget/\nsbt/sbt-launch-*.jar\n..sxr/\n*.class\ncore/data\n*.orig\nexamples/data/ml-*\nfs/\nsupervisord.conf\n/dist\npio.log\n*.tar.gz\n*.pyc\n# Ignore source files whose name prefixed with \"Private\"\nPrivate*.scala\nquickstartapp/\n# Eclipse\n.project\n.classpath\n.settings/\n# IntelliJ\n*.iml\n.idea/\n.templates-cache\n/vendors\n/docs/manual/source/gallery/template-gallery.html.md\ntest-reports/\napache-rat-0.11.jar\ntests/dist\ntests/docker-files/*.jar\ntests/docker-files/*.tgz\nassembly/*.jar\nassembly/src/universal/\n"
  },
  {
    "path": ".travis.yml",
    "content": "##########\n# This is .travis.yml configuration file specifically for master and develop branch.\n# The travis job should contains only unit and integration tests.\n#\n# To avoid this file from being overwritten by .travis.yml from other branches,\n# please add the following to your local git config:\n#   git config merge.ours.driver true\n##########\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nbranches:\n  except:\n    - livedoc\n\nlanguage: scala\n\njdk:\n  - openjdk8\n\nservices:\n  - docker\n\nsudo: required\n\ncache:\n  directories:\n    - $HOME/.ivy2/cache\n    - $HOME/.sbt/boot\n    - $HOME/.sbt/launchers\n\nenv:\n  matrix:\n    - BUILD_TYPE=Unit\n      METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL\n    - BUILD_TYPE=Integration\n      METADATA_REP=ELASTICSEARCH EVENTDATA_REP=ELASTICSEARCH MODELDATA_REP=S3\n      PIO_ELASTICSEARCH_VERSION=6.8.1\n    - BUILD_TYPE=Integration\n      METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS\n      PIO_HBASE_VERSION=1.2.6\n\n    - BUILD_TYPE=Integration\n      METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL\n      PIO_SCALA_VERSION=2.11.12\n      PIO_SPARK_VERSION=2.0.2\n      PIO_HADOOP_VERSION=2.6.5\n    - BUILD_TYPE=Integration\n      METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL\n      PIO_SCALA_VERSION=2.11.12\n      PIO_SPARK_VERSION=2.1.3\n      PIO_HADOOP_VERSION=2.6.5\n    - BUILD_TYPE=Integration\n      METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL\n      PIO_SCALA_VERSION=2.11.12\n      PIO_SPARK_VERSION=2.2.3\n      PIO_HADOOP_VERSION=2.6.5\n    - BUILD_TYPE=Integration\n      METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS\n      PIO_SCALA_VERSION=2.11.12\n      PIO_SPARK_VERSION=2.3.3\n      PIO_HADOOP_VERSION=2.6.5\n\n    - BUILD_TYPE=Integration\n      METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL\n      PIO_SCALA_VERSION=2.11.12\n      PIO_SPARK_VERSION=2.0.2\n      PIO_HADOOP_VERSION=2.7.7\n    - BUILD_TYPE=Integration\n      METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL\n      PIO_SCALA_VERSION=2.11.12\n      PIO_SPARK_VERSION=2.1.3\n      PIO_HADOOP_VERSION=2.7.7\n    - BUILD_TYPE=Integration\n      METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL\n      PIO_SCALA_VERSION=2.11.12\n      PIO_SPARK_VERSION=2.2.3\n      PIO_HADOOP_VERSION=2.7.7\n    - BUILD_TYPE=Integration\n      METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS\n      PIO_SCALA_VERSION=2.11.12\n      PIO_SPARK_VERSION=2.4.0\n      PIO_HADOOP_VERSION=2.7.7\n\n    - BUILD_TYPE=LicenseCheck\n\nbefore_install:\n  - unset SBT_OPTS JVM_OPTS\n  - sudo rm /usr/local/bin/docker-compose\n  - travis_retry curl -L https://github.com/docker/compose/releases/download/1.11.1/docker-compose-`uname -s`-`uname -m` > docker-compose\n  - chmod +x docker-compose\n  - sudo mv docker-compose /usr/local/bin\n\nbefore_script:\n  - sudo sysctl -w vm.max_map_count=262144\n  - docker-compose -v\n  - travis_retry ./tests/before_script.travis.sh\n\nscript:\n  - travis_retry ./tests/script.travis.sh\n\nafter_script:\n  - ./tests/after_script.travis.sh\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThank you for your interest in contributing to Apache PredictionIO.\nOur mission is to enable developers to build scalable machine learning applications easily.\nHere is how you can help with the project development. If you have any\nquestion regarding development at anytime, please free to subscribe and post to\nthe Development Mailing List <mailto:dev-subscribe@predictionio.apache.org>.\n\nFor code contribution, please follow guidelines at\nhttp://predictionio.apache.org/community/contribute-code/.\n\nFor documentation contribution, please follow guidelines at\nhttp://predictionio.apache.org/community/contribute-documentation/.\n"
  },
  {
    "path": "Dockerfile",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# WARNING: THIS DOCKERFILE IS NOT INTENDED FOR PRODUCTION USE OR DEPLOYMENT. AT\n#          THIS POINT, THIS IS ONLY INTENDED FOR USE IN AUTOMATED TESTS. IF YOU\n#          ARE LOOKING TO DEPLOY PREDICTIONIO WITH DOCKER, PLEASE REFER TO\n#          http://predictionio.apache.org/community/projects/#docker-installation-for-predictionio\n\nFROM predictionio/pio-testing-base\n\n# Include the entire code tree\nENV PIO_HOME /PredictionIO\nENV PATH ${PIO_HOME}/bin/:${PATH}\nADD . ${PIO_HOME}\n"
  },
  {
    "path": "KEYS",
    "content": "This file contains the PGP keys of various developers.\nPlease don't use them for email unless you have to. Their main\npurpose is code signing.\n\nUsers: pgp < KEYS\n       gpg --import KEYS\nDevelopers:\n        pgp -kxa <your name> and append it to this file.\n        (pgpk -ll <your name> && pgpk -xa <your name>) >> this file.\n        (gpg --list-sigs <your name>\n             && gpg --armor --export <your name>) >> this file.\n\n--------------------------------------------------------------------------------------------\n\npub   4096R/D3541808 2014-01-09\nuid       [ultimate] Suneel Marthi (CODE SIGNING KEY) <smarthi@apache.org>\nsig 3        D3541808 2014-01-09  Suneel Marthi (CODE SIGNING KEY) <smarthi@apache.org>\nsub   4096R/AF46E2DE 2014-01-09\nsig          D3541808 2014-01-09  Suneel Marthi (CODE SIGNING KEY) <smarthi@apache.org>\n\n-----BEGIN PGP PUBLIC KEY BLOCK-----\nComment: GPGTools - https://gpgtools.org\n\nmQINBFLPJmEBEAC9d/dUZCXeyhB0fVGmJAjdjXfLebav4VqGdNZC+M1T9C3dcVsh\nX/JGme5bjJeIgVwiH5UsdNceYn1+hyxs8jXuRAWEWKP76gD+pNrp8Az0ZdBkJoAy\nzCywOPtJV2PCOz7+S5ri2nUA2+1Kgcu6IlSLMmYAGO0IAmRrjBEzxy9iGaxiNGTc\nLvQt/iVtIXWkKKI8yvpoJ8iFf3TGhpjgaC/h7cJP3zpy0SScmhJJASLXRsfocLv9\nsle6ndN9IPbDtRW8cL7Fk3VQlzp1ToVjmnQTyZZ6S1WafsjzCZ9hLN+k++o8VbvY\nv3icY6Sy0BKz0J6KwaxTkuZ6w1K7oUkVOQboKaWFIEdO+jwrEmU+Puyd8Np8jLnF\nQ0Y5GPfyMlqM3S/zaDm1t4D1eb5FLciStkxfg5wPVK6TkqB325KVD3aio5C7E7kt\naQechHxaJXCQOtCtVY4X+L4iClnMSuk+hcSc8W8MYRTSVansItK0vI9eQZXMnpan\nw9/jk5rS4Gts1rHB7+kdjT3QRJmkyk6fEFT0fz5tfMC7N8waeEUhCaRW6lAoiqDW\nNW1h+0UGxJw+9YcGxBC0kkt3iofNOWQWmuf/BS3DHPKT7XV/YtBHe44wW0sF5L5P\nnfQUHpnA3pcZ0En6bXAvepKVZTNdOWWJqMyHV+436DA+33h45QL6lWb/GwARAQAB\ntDVTdW5lZWwgTWFydGhpIChDT0RFIFNJR05JTkcgS0VZKSA8c21hcnRoaUBhcGFj\naGUub3JnPokCNwQTAQoAIQUCUs8mYQIbAwULCQgHAwUVCgkICwUWAgMBAAIeAQIX\ngAAKCRC08czE01QYCOKKEAChRtHBoYNTX+RZbFO0Kl1GlN+i1Ik0shEm5ZJ56XHv\nAnFx/gRK7CfZzJswWo7kf2s/dvJiFfs+rrolYVuO6E8gNhAaTEomSuvWQAMHdPcR\n9G5APRKCSkbZYugElqplEbSphk78FKoFO+sml52M7Pr9jj88ApBjoFVVY8njdnNq\n6DVlaDsg8YninCD78Z7PNFnRGwxyZ8Qd4Dh0rG+MUTfAWopZu6/MxpQxU7QpeVeX\nSIMLg7ClFrGfXnZcszYF4dnav1aa0i7W88PAdYNPko7tC5qz5yv2ep7t2gRbcYKf\nRXhYC2FHQey3wPhMKjA8V436lAqmfYnY/YdmhEy9Xq/1EdX1nHsQ7OEkfgXK14WM\nF+rnqXRAl/0cwiyb41eocdg5kpZFIKgCYT02usLWxwNnd3jOCe109Ze3y3acN/G8\n+xOf9YRfNVAe6pD8H6ieRbv9gRjBmsbz9bXQCmxFnDqxNri5Me6gBAQPNmYTJD0h\njgJTK6o0vJ0pwjBLauasJsLu+1tR3Cb0dxPE+JVaTF26FCd7pM7W6KdVfod9ZfrN\ncSyJ/cECc2KvYVGmTjQNVo1dYG0awBachlWnYNt+0Qx4opLsczZOLtPKtFY4BJA7\naZoXT4Qf9yB8km7x2/cgNExVbFummToJ/IP3M39/EaryspsQQuM5Qu5Q5lZp8Qnn\nybkCDQRSzyZhARAA7bAawFzbJaghYnm6mTZyGG5hQmfAynbF6cPAE+g2SnXcNQjP\n6kjYx3tSpb7rEzmjQqs46ztqdec6PIVBMhakON6z27Zz+IviAtO/TcaZHWNuCAjw\nFXVQZ+tYsSeiKInttfkrQc8jXAHWwSkSjLqNpvQpBdBEX80MYkFB6ZPOeON2+/Ta\nGC1H/HU2YngF0qQSmG33KKG6ezihBJdKxU6t2tsQfTlCmZW6R6MGpS9fVurYMKBk\nvR+7RGZ/H6dSjWPcpxhusGg92J9uz7r5SopN1wSdyPMUCMAFGeyoxcAuBDl38quU\nH/ENG3x5LDPq2aEH2AJ6yvZfIXbeJ1zmXf2cAHv+HbmvZaTSp0XIjq8Yxh8NkYEC\nZdfRWmsGLIpU16TkBijpK3Dn9MDXjHGT3V8/qfdpURtMvIaL8WFrq9ejcy/vGRFn\nmCYqxIIPH+vLiMXKWtuMc61GN3ES21msKQH6IuQxxfQLyhK44L/pv7FpF4E+6LaE\n8uRwAex5HIDpR1v4aJq089rRtye9VXTJJLZ7lYs0HctdZ30QbBRWT4jS9d9rj3cr\nHgQ7mIGO9TAfK2kWc6AJN/EvxPWNbOwptsTUzAF/adiy9ax8C18iw7nKczC+2eN6\nUcbxXiPdytuKYK7O9A8S9e1w89GwpxYN7Xfn2o6QfpSbL9cLKiinOeV+xikAEQEA\nAYkCHwQYAQoACQUCUs8mYQIbDAAKCRC08czE01QYCG7yD/471dmyOD+go8cZkdqR\n3CHhjH03odtI0EJNVy4VGEC0r9paz3BWYTy18LqWYkw3ygphOIU1r8/7QK3H5Ke3\nc4yCSUxaMk5SlAJ+iVRek5TABkR8+zI+ZN5pQtqRH+ya5JxV4F/Sx5Q3KWMzpvgY\nn6AgSSc3hEfkgdI7SalIeyLaLDWv+RFdGZ5JU5gD28C0G8BeH8L62x6sixZcqoGT\noy9rwkjs45/ZmmvBZhd1wLvC/au8l2Ecou6O8+8m26W8Z7vCuGKxuWn0KV3DLLWe\n66uchDVlakGoMJSPIK06JWYUlE+gL0CW+U2ekt/v2qb8hGgMVET3CBAMq+bFWuJ6\njuX7hJd7wHtCFfjnFDDAkdp2IIIZAlBW6FZGv7pJ82xsW6pSAg0A7VrV6nTtMtDv\nT8esOfo/t4t0gaL7bivy9DVVdATbUBcJJFpoVoe5MxiyjptveqPzIRwzt04n52Ph\nordVWAnX5AokXWTg+Glem/EWEuf7jUuZArfqCSl/sZoQdXGTjR7G4iFscispji4+\nkNjVQsItqFbgDpuc6n+GcFxlKQ7YMCnu5MVtTV01U4lFs0qy0NTUqsuR35DM4z14\nDkFmj1upWAayCoXTpKzsHBvJZPC+Wqf9Pl3O47apelg7KxU3S011YfXpVPvCTKBv\nkD2o/5GKWS5QkSUEUXXY1oDiLg==\n=f8kJ\n-----END PGP PUBLIC KEY BLOCK-----\n\npub   4096R/8BF4ABEB 2016-08-01\nuid                  Donald Szeto (CODE SIGNING KEY) <donald@apache.org>\nsig 3        8BF4ABEB 2016-08-01  Donald Szeto (CODE SIGNING KEY) <donald@apache.org>\nsub   4096R/D8AB5D20 2016-08-01\nsig          8BF4ABEB 2016-08-01  Donald Szeto (CODE SIGNING KEY) <donald@apache.org>\n\n-----BEGIN PGP PUBLIC KEY BLOCK-----\nVersion: GnuPG v1\n\nmQINBFefie0BEAC8RvYKQJ7xOeqaBKAi+PpcRvLxvpO9G8HIXDiw/6GCO3/tBHJ8\nZ2NMfGtFx351R+YpAd2KsiInU4iB25YoTeUqCrwR81zBnXPuNsKs6FXqSLlOZrYq\nO+a9wLkBY7bh6ABRc3OI3kGTpFMSqq8tlaJyLHvQIREHtQFckjSONMOjSnR0EAfn\n4DQS3xgVZNAUbpLeJUdc3B5XYAIzMnkFBPSXEQkBmA97kkDrgaoPpeUdGW4Cqsfz\nekUjkjxcax9Dp/OjhLKWmLabHdiVp161Td0x6e24rBaGSVNRlpNLHXfBCBW/+iml\niGEh8OGtW/Fc8b4V4HEhTXPbVLpvgt22T17OTIYKyueUGvSd+AIS0053asLlO9kQ\nX4Y00sH8nnCtJgeTDwwLiudCENvYmE5PvX6Kwiq3tOZJN/onFRKnOHrssXbPd87m\n+82yDx8/oKYKEoA23bz8f7yMPeqmiedgRr4/1b+ToVtiKSUGtnyzLiXbC2c0sxAZ\n/L8qFMEWQmO/iDMq5+JmMvZld+Ns4AO81gg+WiWoCaE0YB3kqo1L3yP+D0FDETke\n5Ky0i2RtVlCzoM9aXz0zQkHx7vhN24h2IJdCADhGAloykmNVxIqlsbDxx02SsNgV\nIuZQ+jq9zwL/VR3UUm8uJ+o55XcgBDjBPALvilMTnUG+tB99ip9H/p3l2QARAQAB\ntDNEb25hbGQgU3pldG8gKENPREUgU0lHTklORyBLRVkpIDxkb25hbGRAYXBhY2hl\nLm9yZz6JAjcEEwEKACEFAlefie0CGwMFCwkIBwMFFQoJCAsFFgIDAQACHgECF4AA\nCgkQbHTsq4v0q+ub4xAArvZBq7K1FjtqiKdwuOqOXGLuQC7Eq8e7mYUvac08nNsq\nrkvr2RtCDN9VaPbYh9TNJ/7BdcwG6IXOmOsW24FsAnrLSueGaw3zuaAhz8Q/vn+b\n7VPcJ3OQEHbNpHlVkAur2NzZobznNhWGK4M9LQnXrVxEMTTDTd5MJqdDKAPNZ/TE\nAav0AiAlOd56U8ONswBHgjqlXoS/xHvsUI9UrJIFGkdz96I1ohdcjmjkDiCYFJRt\n2NvSWgGEtiN8oykYSCjU1qlyPgcIkdHu5E5xy5fXvVdQEV3bc/Y0Ghf02W6Nb8RY\nfuq8qBtBVhbi6T0xqwnuh5iuuO4k1BAJUUC2H+c68VTUWWJ0b6Wzz1x54MClUwQV\nu1hrBFbMGubRTo6uuB9hKMzwXfl3WY6iBXQvb1eY+Y0Fu/NEnNSSSVFq+qyaluUq\n9RQn5u0+VCULomzr0TME4Etd+UbIliiylVFg+mtalvha0z5CrE4EJOJ/c+efI/JP\nvN/WOSJ44JDXUocvp05cZ0GZGyBcfTEepb/gR1dpidoYBnvScWkBak7P4trkadCS\nvpjbMPtWOOEa7hVP+vZg1MvmelZ2o+VvuyWvGMHryimpV6tFFtbiGR03ltC3cN1t\nHmjYjSb9rIsXGIN2c+b1LLuR0zxaK46y8UKNMWrwI++9Iqbl4BrBN4oofD78p4W5\nAg0EV5+J7QEQAKkBVsL1zSlOuh/GAeXBs3aIwfY+eQD3PIeo4DsCD1J1M8Dn5xa7\nSBHqM+aql7t9hw2iIdqioS8P9ScN1uyWi/MppxDVdTR526ViBR8+739EeprzWPn2\nk+cTGoTeisxQjgLgC/C48taCHDPcztDUh+rLnmcKxKJA+dfqswtiAK7qxCHrT+jq\n5ru78lDqzbHbJU6BqsEyzP9rwtMnGzjbevNC8YLddkZ8iF8KQgSlr93EXlDj6KaU\npZs1AUkPg4UEEkuHJv+pjDhqNfoRSM1vqFyilEe4dWFW+MOKt6yiexVmHB2kgXuV\n7J0PjFi4V2tlgInDhimrvW/6gg2b0EPNF3hPIAo9dBUDpW5EEj1HG+CFPEvLBOHX\nV3LLi5NLDSnj8iL4eiJV+l0/pQToAxDjV7VwFQ3T7gLyKM1YwkOXtS8GsjtqCsJ8\nxIGVWsgmaZWpZSIRuEWhgSLwSPOjZjo2YvXQA3WMdslrx+5/ZF5ElFPBKCYXxpUB\nDkSQ/jTLRSitAXH/0rHrsdw+TZdaW8GPsx7tzdXQVK1PajEpBIx5r+Ix/uhQarwr\n9T2yBQ75rMfPPCccHbAI5g4aGpAEpDNnXfjiYi2fEffB5mEiaDaftiHqjOcoMOBw\nOpN7Y6V+IWniaaqEfWfGMBjM+G/m6veLIgQhAv91TmWvdMksunM9sTAFABEBAAGJ\nAh8EGAEKAAkFAlefie0CGwwACgkQbHTsq4v0q+v1rQ//bp0a6vBrRYJU5RKTZ5me\nUx93RT5BxZqf0wX4deCz0GiaD9G8fJ9HZyv1jedygIeBiSU/dkrkemGA8j0fchca\nAn8yt4tNamo4AAdkvmPa9c7Z4qpHQvKpKDqrVT+ztB+a6qGFjx9cw3iioji3HW9m\nykOPYYk03q9H8h0dW2sa4jaVlXNq/3b2t5cWJ14GGk9XkraSfd+0ZEIT8ffT6u45\nB5l35FuzBdxjyNh91T9UGrREjo1e5sgB0WSss/EJVBAJq+xDbWeOgE/azXQ1MhT+\nZ+BwKfMfvI35SIHG4Ngr/OSirZbQy4s+OktFBbGhBy7dlmWbS2A9SMn7pt9e/i9K\nY44sFGC1xAjq2gnVhbkal0mvT0iDLwIe4/sWuMZJHG05wUGJNMqf8au9KjVj5NzT\niDo8roJi4jFolm6YmH6FRFCeYmLON0pXdNFnCe+cLanrQDI59TBgHh/XPFweOjla\nLqMCEjGpfSLvnopusA1SJUnenwjWWecdfnChnSB8EVIkBiAah1DP1wk+Qy9YfZlj\nMV2jufd24oqFWQmddOCCkIPYeDOavZvcxYevdftY3LX1B4WbiZOMHqh/HDH2NkyW\n6YQIPrw2fEHG2av55RCzCKVcW0PKgW05zjUWwFVo7gEaj5CHK/SeYv0Fpc6QQKls\n93sbykwywNMHkdFPE0109cM=\n=QAQh\n-----END PGP PUBLIC KEY BLOCK-----\npub   4096R/4719A8F4 2017-09-12 [expires: 2021-09-12]\nuid       [ultimate] Chan Lee <chanlee@apache.org>\nsig 3        4719A8F4 2017-09-12  Chan Lee <chanlee@apache.org>\nsub   4096R/A18B1E8F 2017-09-12 [expires: 2021-09-12]\nsig          4719A8F4 2017-09-12  Chan Lee <chanlee@apache.org>\n\n-----BEGIN PGP PUBLIC KEY BLOCK-----\n\nmQINBFm4YoUBEAC3CFA/xIiqn+NoqSB8ya+mgnwfuL7XoRcR0gaZQ5BNXMjRZcqw\n5On1v2TTcXo6LcD/g7oxdBYWzUaubsCSmZzMp+cT4w4bmLr3bSZol0akNq4n2MgT\nq7jXOhXDhMdKzIdxuJe7wGtFGLjm8Macc6576MEYq1AUtdDNYuEMWr5PmQwsbNQQ\nCBkH8007trDlPygvzh+w/tLOHNbIv3ynHCIYeY/vYpm3XLHPEF0CjV05sDUMdHkb\nl5s9xYFOfPT6JqXEC1gnjanSvQ20MlLwk1D9AYlkTir+a17/igi2S5YGBUHXvxvU\n7xJrNfN9xDklUMUkHcyZNNWgIr1U7U//4hKm/3D2ele0AmMtL+Y0MmnSKvqXMiRf\nPTON/6rBwK4WdKsYtq1e2nUWv5Btfzb/oEovh8nmJwrpWUi4oBo8q7lBj9PEb1yY\nQXBnSjzg2C4zx/z8O1aHx0zb8njTMwknH0Ii/0ukMyCvC1y9yoDTEcD01ctivxwq\nh4I547vuzVQA7LrPob8fa003R5jR95+bvfedR4beJlDb4uTIS7654Usk58xzCyYt\nhVz7YrD8Zn1u841YYqw52pQ+d29OpVC6z/IaEN06v77TIW6uJVPxdQU/tUJM+fvT\nkFVVU8qcGo5SjPUFyc66s9eJ3NRBdoRQO8ls3YO9JxMNSd4wtSysj9vKOwARAQAB\ntB1DaGFuIExlZSA8Y2hhbmxlZUBhcGFjaGUub3JnPokCPQQTAQoAJwUCWbhihQIb\nAwUJB4YfgAULCQgHAwUVCgkICwUWAgMBAAIeAQIXgAAKCRB+I2PYRxmo9Eq3D/9Q\nTg45t1DXEq911JAfCDA/nIFNBaziPt3FIdKeWR3EGxwTie/KrSH7sL6ou40+GnSb\nDcWxfq3CobFkTyaAAF7NGt3cmP5e7H02EaEqrSfewUnV0eZlZOb7qoc4qWoG8vGe\nOwQQEY48DFxmzBzSZSyUvtgJQBX08mlE4jtEMh0QngkxyH5qGi+NEfbvCZaRWf/3\nnfErYSfFenQqqRmXol69zGJ+MPXDZLCBdpayTjNZRRs9Bw4B/VVm3zGgdm2GZ/2U\nG4aPNNBnqDBi6WDBqC8RwOtWcyvHKCXXu2suHTN79SIWhksuPQBILI0lMEfrdsdk\n76WUlgHLvdUAjb/g4a+cV7XkbSstGbMvKi7foM5V2o5FppaL7BApMdIZUN5xTAsh\nzbJ/Wyru69SgNYm/4byWOFyzZBvZLtodqhT/BUHerwOO2ZNtRijwct8Q6o7PePbu\nK/35KTCTmU5D+91igIa72981ePnShuG1qx1v9a3wpoENYRt6YPchW9vhnErKdh3L\nnMfgcobI0HESQDx2Si8n8YGYIwXNeUAdDhkuhULXs22XIAay503R5tYiBm3mTaRR\nq25Tl1uSBeMeNGRoGOfHl7s8oIx/kH54PzgmEbLtEUs4SaUUW1l92ShQgkj87lKX\nZytqx27HUTwkUz3AIxBHs9VloJ2Sh/cmMXB70uTV1LkCDQRZuGKFARAAzPli/9oU\nQLC5Bp75UDIRLQk4kDqyaSfCgcNTuqsXxL0UR/IoICz8S4qo1UccwO6LoUNH5L7j\nEFHdeG5P2a/Y/WQUIduwGs6M4Btb8vC9Q6kw2P+6R7gCoq9L6QKGOWjOwCbd7H1e\nYnxB7IizTYzgHq8llr3V7VXXkBSytCMOXdCxOMwoSyNqJdQtrrV1XdzPSwaHZ5vn\ni+VMuKVM3XlS3cB/KhJTJckWfaQzHMWmCYZytz5GwSJuMz37/YYFKTBPvYfk6z3F\nkVnpYTK/TZ9ZalbVUQY39Xk9MRhF22FbY1/kdmTQaA9+lxEONvmULXo/vmZIrJlS\nua+DdUhrbmz8E6zLvqsH5HcPqJx7nbv/wcjcp4jRPk8vUNu3OO7a4ClFBymJzwL3\nk9bgCoVEY/fo71jVSpeajzWR3psJjOFLIOufNortYx3AjdVR2TkyKko0k0nFJ3FV\nP2MN86nwCh89kU581Q/XLDmZFrlrqFek6V+J2fu4l42Bu4/rWlML4AGst7p7UsYU\ny/TbF9wJhHgpSV/70gcS2jW26xZpcjNM09fgGlaJFzeqCrxkuIfx5CbdJzkAeFFC\n3Nq9o6rcLZXAqVGlLJLl2hGF7mnrzm+7RpZkoIXwX0RyJVVLaz4e6TYXXygYTaD3\nkQK6fNabA1Yo+SvRiQLBGM9oAaNoe8ajfWEAEQEAAYkCJQQYAQoADwUCWbhihQIb\nDAUJB4YfgAAKCRB+I2PYRxmo9KlYD/0aP3YrtNjHdlmLMit9z1jS4hATMdIohS2M\n5woZkNogmol2SUXLWz0jGNSKlsPvmlFfD7BEJtzoal27GL5/LTu53a7XXtKMdPrT\nRF8ZHQd4SkvAmNySPAZo10utskWnkEBtVntHT4/T4KYaum+f+9DbmVLmBIvOMzWU\nqN5QIYI1HcXw/flY/7F/jH3pZWR1IZ+lgF8AERldYnMUYn+PmEo0YCsu2MulLkfr\n2puXx5k0U/dO2Ljnrai878maLuSH100T8nFnY0lJEvwTb1xns4Hszum//9JOk1kJ\nMpaD7iWKW8BtkN5bouWxT95Igi9I+9nxpDTF47s5fqCbhG7EGI7jeAJYaPUYkzof\nP/k4K44d4jdmPyxCDxcA2KhxU+8QaYYuKgKX51UZzYR2Xx9YKG5IJTsbZNeKvUku\n2T7F+k4B2Z/XbvjzBtsfdQx0hqiDwfw2ZOMTFiaRQKzTbuJ4jb9QoNZazAiuR6gO\nA9pyEhk6ZBWq9zw00ym3W5IGy/YTS/3buDOfgLL82Axp9TEEJo7vk6JN0I4nJScN\nCDBXY/trGluhnOmV1mQ2lJnq4FJ2sJSIVcbYTNahAD3cJ8ub+XFgyeMno0OTlrVg\nLxEW+096aYvYBaxyb9GdKZzUplRHOZWg89DjGM5EVIy8yST1XDyCCMQeK/XglWCA\ngslTn9eM5A==\n=Jacn\n-----END PGP PUBLIC KEY BLOCK-----\npub   rsa4096/9F4FBC3D 2018-09-11 [SC]\nuid         [ultimate] Takako Shimamoto (CODE SIGNING KEY) <shimamoto@apache.org>\nsig 3        9F4FBC3D 2018-09-11  Takako Shimamoto (CODE SIGNING KEY) <shimamoto@apache.org>\nsub   rsa4096/EC151981 2018-09-11 [E]\nsig          9F4FBC3D 2018-09-11  Takako Shimamoto (CODE SIGNING KEY) <shimamoto@apache.org>\n\n-----BEGIN PGP PUBLIC KEY BLOCK-----\n\nmQINBFuXgEQBEADA2p47lLog6fWkm3yXB7+jcvzzhZVkLweePBkEi5I0QBOX7PpL\nCFNGGDdJW0L6p+8PhHWkzEeCdzYEJj74TGuMT9pZ+ibbjw3BLw3CvFaJa24/g83j\n1jfoKOBLL7xdsvYyrMr/U3ZZYOpD6UkW4LjMWooGYcthlQgpuTXhmLswrym+b1YA\n9xJbjFFL7gktB9O/XPf80nKDv+/duCtCcLKsWRuVsfFmnabJGJsok17wT9j5gjjc\nGfADZvsQdXJDYFS8Z7Os5fczPzx+xpIKioqLUN1bmXDuwF1+e+hgQuK4WS3RfOu6\nN9bp2R/cnYOcPWIGi955wKkjbUo6ujFFg7ICxhWTEqALZuXXScDoA0SkjtD/E8u6\nd8L43Hb64v4TA6qc7sTzyUGsKjzs/j8iTCFu7H9rl+MvpTZj6BnovHKcqufWO9Aj\nndPPdVsnNse8MoBL7yxZ/eohVILA0LdHu/AnolfQTHtKkKFiCke18OhnS7x8Vg8j\nQ1rUDllsG77LhyA3EwecR//E518nOrxreD4PVXzQvkQz3HK9V2LBAeKrAzCtn0h8\nfpBfCgcN3r5+f8eA34Y3f4b6SGgyRRYYQtIwJ1w0CLGCWm06CKK8rKBK1wa988QA\n3W4r+vCNVKEKUjPBltOvSpebk3DS4Ymk6plRXxLWedS8c89UJE7jb9wF3wARAQAB\ntDpUYWtha28gU2hpbWFtb3RvIChDT0RFIFNJR05JTkcgS0VZKSA8c2hpbWFtb3Rv\nQGFwYWNoZS5vcmc+iQJOBBMBCgA4FiEEGRmUSApCN8H1BfYxFyTwJ59PvD0FAluX\ngEQCGwMFCwkIBwMFFQoJCAsFFgIDAQACHgECF4AACgkQFyTwJ59PvD3quw/+KJ7k\nlNPkF0ogvBW48bn9HZgm3M6I/fxmHoqqEF1q8uCjnSXuHboEb1LhQO+BKLA4WH+F\nfXUAlAIdzbGrUVIvuExr5QMhVY5oDofUMbUOouJSPG/1JXjikjnS3UP2eZYKyWst\nbNZH7OrMiXqKtGaF8HT0BgVsNYxIEeKAKo4N0QWaUS0n1ep/GqdCBKuFGfXnxH7e\nqMzJCEyuRhXu+S7t7EGdBUGz/2kYAHcfsuAj4y24xzyQOUYYox1wBGoMwg4MGzIy\nWmVflIO7Unqz33dquEfNrOopK6kAbHmI1MBloHcNOVclF8sDTBH2kkkBfnFPy+H7\nmunXFMg4Dtk+4fMsdPp8+wkvQd6J67ao67D9KBN/jnUSohw4bEuzOl6cy+dVlTf9\nXI+t/vKNJq+N8gRRTHvbaDfT9j5JgH51abFnh7Y9UDILU9JmvIFbjkBIQPpUg/ZV\nCPaNzafFPvTn3G/KTVkpc75IDMjgEX4i9scPosvJL8rpGJnfMIEjt7tSwlLJ8lWf\nDsW7XkWo6KpKdlve0e36KV23EdyUAAZ3+Oy05nAdZo/DhaC9LmmSlJwg9l/dwoUG\nHEdPFdEWquoqkAQCbw3JXNuISugrpq7l6gD2cckAEOMg4ub5nGUVCLgojrWa3vGN\neb0YLBR5HlcRHhCnAQj69l7jgID4/VLNCUMjKfi5Ag0EW5eARAEQANf2H038aioy\n0wFO27pERyVbPQrDr9kmN3AX4QoJBQ8U67jEkO+/vjz8S33H/Y/x8crMm8QCly6l\nECukdPCFmne+gloHlJm2pL2Qre/6YKuEDHMq/wut1/HDfKrkA86zfKkX5aut+Vtx\njdh7awf8ub6XcSTmmACBk+g/bvVoKCBD21bdw6h1I1/rYR+X+cCSCTSzvDLlv8ye\nJeMEeLdld0/lTDUHXUyYf/z8AVr2IrtcFJFlA27ixtCqU3getyhBT1zGhlSFtQar\n4tBzQ7UMOKQcxczHOCHM7lyB58zRsDv2PSr5LCV9tysR/CzNHdxIxvsK8qCf4wNp\nYQLqwZa0bMdk/1vfiHf2f9L5PIWvfXUcfLFEUqG0GvARia1JC/YCA2vFEluVRPPg\nEZgMsrUWoBOj+4qPaABAYFe8Tv7WW+vHKB9/+sETzUmo3TV0wcZz9qKiQwYoxWZQ\n/hME5q7SLWHd7kLfyTbHBZrYtIvYLRFBljH9RZQR7i+VMThG9yWPrmpiY9Pcxxfv\nsefGISNwlNWmXbK55+LuFHIcjPI+FTUQBF0SqvxJuCXjMmTsxjP3w+BiCrKgJEt4\nZVWN7VI8LYHP6IIctg65TPNti/rkd1AU4MDpSESN0o8b5R+04dIb05s2oEIGPY2R\ntr2WUZY6YwuEmcKQ/igsYq0TwOtIqEtdABEBAAGJAjYEGAEKACAWIQQZGZRICkI3\nwfUF9jEXJPAnn0+8PQUCW5eARAIbDAAKCRAXJPAnn0+8PenjEADAIkw4B/AC7cP+\nxJzjYsE0kVOKLRYo+2+5jRvyoLWffcU6WMs44sF9XI4BRDHAGgDC0xvK03LPeh0d\nmfhIMVEiqG8Zz+6Qkt4upCkXDuJ5QnjKZh4SWXNpW/avzOwCOX2f8JWz11Qoq+J4\nVnd0BbIjjI+rDiBbccr5Kc6tN43QhihLclJ5hO2QpdWIHGFjCaekIX6zWVYAkkFk\nUB4vHG/eghXJA44lP2kUtVb0Ay5Fl23G8bNqC30/DoswE5bJZjgEbKWUGHBCAA+q\nGtYDG6ttj1AzQuwhaW3mjCaspRHYbPp76Jqh/dw1mRuwWsgw/rYhw/Ptydpzeyad\nW3NlHFj0NweMmstfmvwMd6VOP9cXni87Ynra8pWUvzCO2kzCL+IpUylPFAOvsSHe\nM0exU5/K+ClKlsL5J9oL+6Nc02LDgk56H3aQimKc4sO7/TWqIhHtvYMHJj2PK+Da\nP0qc0vJo6f3wYNU3VhzX/IZg/94luTeMiQgCBie5jXrv3EtBqCg/B/+TzOIrl6pO\nTPFJ9Q5iQiLgoJNrBc2AjV34YaCa8esLRLTZQYoVX/9pN4ECcskNX3TxF45zExUi\n8LMAftwE+fVt7zsJe3oZhPwtLN1RZoLJI/zvXJhXKAP/1LK57Ezrws0AwFSazc0I\nwEvmHAxp9J8DJY97zR+oCp/Or62cNw==\n=8icF\n-----END PGP PUBLIC KEY BLOCK-----\n"
  },
  {
    "path": "LICENSE.txt",
    "content": "\n                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n\n\n================================================================================\nPredictionIO Subcomponents:\n\nThe Apache PredictionIO project contains subcomponents with\nseparate copyright notices and license terms. Your use of the source code for\nthese subcomponents is subject to the terms and conditions of the following\nlicenses.\n\n\n================================================================================\nFor semver.sh in bin/:\n================================================================================\nCopyright (c) 2013, Ray Bejjani\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\nANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\nWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\nANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\nLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\nON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\nSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\nThe views and conclusions contained in the software and documentation are those\nof the authors and should not be interpreted as representing official policies,\neither expressed or implied, of the FreeBSD Project.\n\n\n================================================================================\nFor sbt in sbt/:\n================================================================================\n\n// Generated from http://www.opensource.org/licenses/bsd-license.php\nCopyright (c) 2011, Paul Phillips.\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n    * Redistributions of source code must retain the above copyright notice,\n      this list of conditions and the following disclaimer.\n    * Redistributions in binary form must reproduce the above copyright notice,\n      this list of conditions and the following disclaimer in the documentation\n      and/or other materials provided with the distribution.\n    * Neither the name of the author nor the names of its contributors may be\n      used to endorse or promote products derived from this software without\n      specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\nARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE\nLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\nLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\nTHEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,\nEVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n================================================================================\nFor binary distribution:\n================================================================================\nBinary distribution bundles\n\n  javax.servlet # servlet-api # 2.5\n  javax.servlet.jsp # jsp-api # 2.1\n  javax.activation # activation # 1.1\n  javax.xml.stream # stax-api # 1.0-2\n\n  which are available under the CDDL v1.0 license (https://glassfish.java.net/public/CDDLv1.0.text)\n\nCOMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0\n\n1. Definitions.\n\n1.1. Contributor means each individual or entity that creates or contributes to the creation of Modifications.\n\n1.2. Contributor Version means the combination of the Original Software, prior Modifications used by a Contributor (if any), and the Modifications made by that particular Contributor.\n\n1.3. Covered Software means (a) the Original Software, or (b) Modifications, or (c) the combination of files containing Original Software with files containing Modifications, in each case including portions thereof.\n\n1.4. Executable means the Covered Software in any form other than Source Code.\n\n1.5. Initial Developer means the individual or entity that first makes Original Software available under this License.\n\n1.6. Larger Work means a work which combines Covered Software or portions thereof with code not governed by the terms of this License.\n\n1.7. License means this document.\n\n1.8. Licensable means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently acquired, any and all of the rights conveyed herein.\n\n1.9. Modifications means the Source Code and Executable form of any of the following:\n\nA. Any file that results from an addition to, deletion from or modification of the contents of a file containing Original Software or previous Modifications;\n\nB. Any new file that contains any part of the Original Software or previous Modification; or\n\nC. Any new file that is contributed or otherwise made available under the terms of this License.\n\n1.10. Original Software means the Source Code and Executable form of computer software code that is originally released under this License.\n\n1.11. Patent Claims means any patent claim(s), now owned or hereafter acquired, including without limitation, method, process, and apparatus claims, in any patent Licensable by grantor.\n\n1.12. Source Code means (a) the common form of computer software code in which modifications are made and (b) associated documentation included in or with such code.\n\n1.13. You (or Your) means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, You includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, control means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity.\n\n2. License Grants.\n\n2.1. The Initial Developer Grant.\nConditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, the Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license:\n(a) under intellectual property rights (other than patent or trademark) Licensable by Initial Developer, to use, reproduce, modify, display, perform, sublicense and distribute the Original Software (or portions thereof), with or without Modifications, and/or as part of a Larger Work; and\n(b) under Patent Claims infringed by the making, using or selling of Original Software, to make, have made, use, practice, sell, and offer for sale, and/or otherwise dispose of the Original Software (or portions thereof).\n(c) The licenses granted in Sections 2.1(a) and (b) are effective on the date Initial Developer first distributes or otherwise makes the Original Software available to a third party under the terms of this License.\n(d) Notwithstanding Section 2.1(b) above, no patent license is granted: (1) for code that You delete from the Original Software, or (2) for infringements caused by: (i) the modification of the Original Software, or (ii) the combination of the Original Software with other software or devices.\n\n2.2. Contributor Grant.\nConditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license:\n(a) under intellectual property rights (other than patent or trademark) Licensable by Contributor to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof), either on an unmodified basis, with other Modifications, as Covered Software and/or as part of a Larger Work; and\n(b) under Patent Claims infringed by the making, using, or selling of Modifications made by that Contributor either alone and/or in combination with its Contributor Version (or portions of such combination), to make, use, sell, offer for sale, have made, and/or otherwise dispose of: (1) Modifications made by that Contributor (or portions thereof); and (2) the combination of Modifications made by that Contributor with its Contributor Version (or portions of such combination).\n(c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective on the date Contributor first distributes or otherwise makes the Modifications available to a third party.\n(d) Notwithstanding Section 2.2(b) above, no patent license is granted: (1) for any code that Contributor has deleted from the Contributor Version; (2) for infringements caused by: (i) third party modifications of Contributor Version, or (ii) the combination of Modifications made by that Contributor with other software (except as part of the Contributor Version) or other devices; or (3) under Patent Claims infringed by Covered Software in the absence of Modifications made by that Contributor.\n\n3. Distribution Obligations.\n\n3.1. Availability of Source Code.\n\nAny Covered Software that You distribute or otherwise make available in Executable form must also be made available in Source Code form and that Source Code form must be distributed only under the terms of this License. You must include a copy of this License with every copy of the Source Code form of the Covered Software You distribute or otherwise make available. You must inform recipients of any such Covered Software in Executable form as to how they can obtain such Covered Software in Source Code form in a reasonable manner on or through a medium customarily used for software exchange.\n\n3.2. Modifications.\n\nThe Modifications that You create or to which You contribute are governed by the terms of this License. You represent that You believe Your Modifications are Your original creation(s) and/or You have sufficient rights to grant the rights conveyed by this License.\n\n3.3. Required Notices.\nYou must include a notice in each of Your Modifications that identifies You as the Contributor of the Modification. You may not remove or alter any copyright, patent or trademark notices contained within the Covered Software, or any notices of licensing or any descriptive text giving attribution to any Contributor or the Initial Developer.\n\n3.4. Application of Additional Terms.\nYou may not offer or impose any terms on any Covered Software in Source Code form that alters or restricts the applicable version of this License or the recipients rights hereunder. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, you may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear that any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of warranty, support, indemnity or liability terms You offer.\n\n3.5. Distribution of Executable Versions.\nYou may distribute the Executable form of the Covered Software under the terms of this License or under the terms of a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License and that the license for the Executable form does not attempt to limit or alter the recipients rights in the Source Code form from the rights set forth in this License. If You distribute the Covered Software in Executable form under a different license, You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer.\n\n3.6. Larger Works.\nYou may create a Larger Work by combining Covered Software with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Software.\n\n4. Versions of the License.\n\n4.1. New Versions.\nSun Microsystems, Inc. is the initial license steward and may publish revised and/or new versions of this License from time to time. Each version will be given a distinguishing version number. Except as provided in Section 4.3, no one other than the license steward has the right to modify this License.\n\n4.2. Effect of New Versions.\n\nYou may always continue to use, distribute or otherwise make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. If the Initial Developer includes a notice in the Original Software prohibiting it from being distributed or otherwise made available under any subsequent version of the License, You must distribute and make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. Otherwise, You may also choose to use, distribute or otherwise make the Covered Software available under the terms of any subsequent version of the License published by the license steward.\n4.3. Modified Versions.\n\nWhen You are an Initial Developer and You want to create a new license for Your Original Software, You may create and use a modified version of this License if You: (a) rename the license and remove any references to the name of the license steward (except to note that the license differs from this License); and (b) otherwise make it clear that the license contains terms which differ from this License.\n\n5. DISCLAIMER OF WARRANTY.\n\nCOVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN AS IS BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER.\n\n6. TERMINATION.\n\n6.1. This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive.\n\n6.2. If You assert a patent infringement claim (excluding declaratory judgment actions) against Initial Developer or a Contributor (the Initial Developer or Contributor against whom You assert such claim is referred to as Participant) alleging that the Participant Software (meaning the Contributor Version where the Participant is a Contributor or the Original Software where the Participant is the Initial Developer) directly or indirectly infringes any patent, then any and all rights granted directly or indirectly to You by such Participant, the Initial Developer (if the Initial Developer is not the Participant) and all Contributors under Sections 2.1 and/or 2.2 of this License shall, upon 60 days notice from Participant terminate prospectively and automatically at the expiration of such 60 day notice period, unless if within such 60 day period You withdraw Your claim with respect to the Participant Software against such Participant either unilaterally or pursuant to a written agreement with Participant.\n\n6.3. In the event of termination under Sections 6.1 or 6.2 above, all end user licenses that have been validly granted by You or any distributor hereunder prior to termination (excluding licenses granted to You by any distributor) shall survive termination.\n\n7. LIMITATION OF LIABILITY.\n\nUNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTYS NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU.\n\n8. U.S. GOVERNMENT END USERS.\n\nThe Covered Software is a commercial item, as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of commercial computer software (as that term is defined at 48 C.F.R.  252.227-7014(a)(1)) and commercial computer software documentation as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Software with only those rights set forth herein. This U.S. Government Rights clause is in lieu of, and supersedes, any other FAR, DFAR, or other clause or provision that addresses Government rights in computer software under this License.\n\n9. MISCELLANEOUS.\n\nThis License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by the law of the jurisdiction specified in a notice contained within the Original Software (except to the extent applicable law, if any, provides otherwise), excluding such jurisdictions conflict-of-law provisions. Any litigation relating to this License shall be subject to the jurisdiction of the courts located in the jurisdiction and venue specified in a notice contained within the Original Software, with the losing party responsible for costs, including, without limitation, court costs and reasonable attorneys fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. You agree that You alone are responsible for compliance with the United States export administration regulations (and the export control laws and regulation of any other countries) when You use, distribute or otherwise make available any Covered Software.\n\n10. RESPONSIBILITY FOR CLAIMS.\n\nAs between Initial Developer and the Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License and You agree to work with Initial Developer and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability.\n\nNOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL)\nThe GlassFish code released under the CDDL shall be governed by the laws of the State of California (excluding conflict-of-law provisions).\n--------------------------------------------------------------------------------\nBinary distribution bundles\n\n  com.sun.jersey # jersey-core # 1.9 (https://github.com/jersey/jersey-1.x)\n  com.sun.jersey # jersey-json # 1.9 (https://github.com/jersey/jersey-1.x)\n  com.sun.jersey # jersey-server # 1.9 (https://github.com/jersey/jersey-1.x)\n  javax.xml.bind # jaxb-api # 2.2.2\n  com.sun.xml.bind # jaxb-impl # 2.2.3-1\n\n  which are available under the CDDL v1.1 license (https://glassfish.java.net/public/CDDL+GPL_1_1.html)\n  \nCOMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1 \n\n1. Definitions. \n\n    1.1. \"Contributor\" means each individual or entity that creates or \n    contributes to the creation of Modifications. \n\n    1.2. \"Contributor Version\" means the combination of the Original \n    Software, prior Modifications used by a Contributor (if any), and the \n    Modifications made by that particular Contributor. \n\n    1.3. \"Covered Software\" means (a) the Original Software, or (b) \n    Modifications, or (c) the combination of files containing Original \n    Software with files containing Modifications, in each case including \n    portions thereof. \n\n    1.4. \"Executable\" means the Covered Software in any form other than \n    Source Code. \n\n    1.5. \"Initial Developer\" means the individual or entity that first makes \n    Original Software available under this License. \n\n    1.6. \"Larger Work\" means a work which combines Covered Software or \n    portions thereof with code not governed by the terms of this License. \n\n    1.7. \"License\" means this document. \n\n    1.8. \"Licensable\" means having the right to grant, to the maximum extent \n    possible, whether at the time of the initial grant or subsequently \n    acquired, any and all of the rights conveyed herein. \n\n    1.9. \"Modifications\" means the Source Code and Executable form of any of \n    the following: \n\n    A. Any file that results from an addition to, deletion from or \n    modification of the contents of a file containing Original Software or \n    previous Modifications; \n\n    B. Any new file that contains any part of the Original Software or \n    previous Modification; or \n\n    C. Any new file that is contributed or otherwise made available under \n    the terms of this License. \n\n    1.10. \"Original Software\" means the Source Code and Executable form of \n    computer software code that is originally released under this License. \n\n    1.11. \"Patent Claims\" means any patent claim(s), now owned or hereafter \n    acquired, including without limitation, method, process, and apparatus \n    claims, in any patent Licensable by grantor. \n\n    1.12. \"Source Code\" means (a) the common form of computer software code \n    in which modifications are made and (b) associated documentation \n    included in or with such code. \n\n    1.13. \"You\" (or \"Your\") means an individual or a legal entity exercising \n    rights under, and complying with all of the terms of, this License. For \n    legal entities, \"You\" includes any entity which controls, is controlled \n    by, or is under common control with You. For purposes of this \n    definition, \"control\" means (a) the power, direct or indirect, to cause \n    the direction or management of such entity, whether by contract or \n    otherwise, or (b) ownership of more than fifty percent (50%) of the \n    outstanding shares or beneficial ownership of such entity. \n\n2. License Grants. \n\n    2.1. The Initial Developer Grant. \n\n    Conditioned upon Your compliance with Section 3.1 below and subject to \n    third party intellectual property claims, the Initial Developer hereby \n    grants You a world-wide, royalty-free, non-exclusive license: \n\n    (a) under intellectual property rights (other than patent or trademark) \n    Licensable by Initial Developer, to use, reproduce, modify, display, \n    perform, sublicense and distribute the Original Software (or portions \n    thereof), with or without Modifications, and/or as part of a Larger \n    Work; and \n\n    (b) under Patent Claims infringed by the making, using or selling of \n    Original Software, to make, have made, use, practice, sell, and offer \n    for sale, and/or otherwise dispose of the Original Software (or portions \n    thereof). \n\n    (c) The licenses granted in Sections 2.1(a) and (b) are effective on the \n    date Initial Developer first distributes or otherwise makes the Original \n    Software available to a third party under the terms of this License. \n\n    (d) Notwithstanding Section 2.1(b) above, no patent license is granted: \n    (1) for code that You delete from the Original Software, or (2) for \n    infringements caused by: (i) the modification of the Original Software, \n    or (ii) the combination of the Original Software with other software or \n    devices. \n\n    2.2. Contributor Grant. \n\n    Conditioned upon Your compliance with Section 3.1 below and subject to \n    third party intellectual property claims, each Contributor hereby grants \n    You a world-wide, royalty-free, non-exclusive license: \n\n    (a) under intellectual property rights (other than patent or trademark) \n    Licensable by Contributor to use, reproduce, modify, display, perform, \n    sublicense and distribute the Modifications created by such Contributor \n    (or portions thereof), either on an unmodified basis, with other \n    Modifications, as Covered Software and/or as part of a Larger Work; and \n\n    (b) under Patent Claims infringed by the making, using, or selling of \n    Modifications made by that Contributor either alone and/or in \n    combination with its Contributor Version (or portions of such \n    combination), to make, use, sell, offer for sale, have made, and/or \n    otherwise dispose of: (1) Modifications made by that Contributor (or \n    portions thereof); and (2) the combination of Modifications made by that \n    Contributor with its Contributor Version (or portions of such \n    combination). \n\n    (c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective on \n    the date Contributor first distributes or otherwise makes the \n    Modifications available to a third party. \n\n    (d) Notwithstanding Section 2.2(b) above, no patent license is granted: \n    (1) for any code that Contributor has deleted from the Contributor \n    Version; (2) for infringements caused by: (i) third party modifications \n    of Contributor Version, or (ii) the combination of Modifications made by \n    that Contributor with other software (except as part of the Contributor \n    Version) or other devices; or (3) under Patent Claims infringed by \n    Covered Software in the absence of Modifications made by that \n    Contributor. \n\n3. Distribution Obligations. \n\n    3.1. Availability of Source Code. \n\n    Any Covered Software that You distribute or otherwise make available in \n    Executable form must also be made available in Source Code form and that \n    Source Code form must be distributed only under the terms of this \n    License. You must include a copy of this License with every copy of the \n    Source Code form of the Covered Software You distribute or otherwise \n    make available. You must inform recipients of any such Covered Software \n    in Executable form as to how they can obtain such Covered Software in \n    Source Code form in a reasonable manner on or through a medium \n    customarily used for software exchange. \n\n    3.2. Modifications. \n\n    The Modifications that You create or to which You contribute are \n    governed by the terms of this License. You represent that You believe \n    Your Modifications are Your original creation(s) and/or You have \n    sufficient rights to grant the rights conveyed by this License. \n\n    3.3. Required Notices. \n\n    You must include a notice in each of Your Modifications that identifies \n    You as the Contributor of the Modification. You may not remove or alter \n    any copyright, patent or trademark notices contained within the Covered \n    Software, or any notices of licensing or any descriptive text giving \n    attribution to any Contributor or the Initial Developer. \n\n    3.4. Application of Additional Terms. \n\n    You may not offer or impose any terms on any Covered Software in Source \n    Code form that alters or restricts the applicable version of this \n    License or the recipients' rights hereunder. You may choose to offer, \n    and to charge a fee for, warranty, support, indemnity or liability \n    obligations to one or more recipients of Covered Software. However, you \n    may do so only on Your own behalf, and not on behalf of the Initial \n    Developer or any Contributor. You must make it absolutely clear that any \n    such warranty, support, indemnity or liability obligation is offered by \n    You alone, and You hereby agree to indemnify the Initial Developer and \n    every Contributor for any liability incurred by the Initial Developer or \n    such Contributor as a result of warranty, support, indemnity or \n    liability terms You offer. \n\n    3.5. Distribution of Executable Versions. \n\n    You may distribute the Executable form of the Covered Software under the \n    terms of this License or under the terms of a license of Your choice, \n    which may contain terms different from this License, provided that You \n    are in compliance with the terms of this License and that the license \n    for the Executable form does not attempt to limit or alter the \n    recipient's rights in the Source Code form from the rights set forth in \n    this License. If You distribute the Covered Software in Executable form \n    under a different license, You must make it absolutely clear that any \n    terms which differ from this License are offered by You alone, not by \n    the Initial Developer or Contributor. You hereby agree to indemnify the \n    Initial Developer and every Contributor for any liability incurred by \n    the Initial Developer or such Contributor as a result of any such terms \n    You offer. \n\n    3.6. Larger Works. \n\n    You may create a Larger Work by combining Covered Software with other \n    code not governed by the terms of this License and distribute the Larger \n    Work as a single product. In such a case, You must make sure the \n    requirements of this License are fulfilled for the Covered Software. \n\n4. Versions of the License. \n\n    4.1. New Versions. \n\n    Oracle is the initial license steward and may publish revised and/or new \n    versions of this License from time to time. Each version will be given a \n    distinguishing version number. Except as provided in Section 4.3, no one \n    other than the license steward has the right to modify this License. \n\n    4.2. Effect of New Versions. \n\n    You may always continue to use, distribute or otherwise make the Covered \n    Software available under the terms of the version of the License under \n    which You originally received the Covered Software. If the Initial \n    Developer includes a notice in the Original Software prohibiting it from \n    being distributed or otherwise made available under any subsequent \n    version of the License, You must distribute and make the Covered \n    Software available under the terms of the version of the License under \n    which You originally received the Covered Software. Otherwise, You may \n    also choose to use, distribute or otherwise make the Covered Software \n    available under the terms of any subsequent version of the License \n    published by the license steward. \n\n    4.3. Modified Versions. \n\n    When You are an Initial Developer and You want to create a new license \n    for Your Original Software, You may create and use a modified version of \n    this License if You: (a) rename the license and remove any references to \n    the name of the license steward (except to note that the license differs \n    from this License); and (b) otherwise make it clear that the license \n    contains terms which differ from this License. \n\n5. DISCLAIMER OF WARRANTY. \n\n    COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN \"AS IS\" BASIS, \n    WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, \n    WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF \n    DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. \n    THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED \n    SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY \n    RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME \n    THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS \n    DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO \n    USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS \n    DISCLAIMER. \n\n6. TERMINATION. \n\n    6.1. This License and the rights granted hereunder will terminate \n    automatically if You fail to comply with terms herein and fail to cure \n    such breach within 30 days of becoming aware of the breach. Provisions \n    which, by their nature, must remain in effect beyond the termination of \n    this License shall survive. \n\n    6.2. If You assert a patent infringement claim (excluding declaratory \n    judgment actions) against Initial Developer or a Contributor (the \n    Initial Developer or Contributor against whom You assert such claim is \n    referred to as \"Participant\") alleging that the Participant Software \n    (meaning the Contributor Version where the Participant is a Contributor \n    or the Original Software where the Participant is the Initial Developer) \n    directly or indirectly infringes any patent, then any and all rights \n    granted directly or indirectly to You by such Participant, the Initial \n    Developer (if the Initial Developer is not the Participant) and all \n    Contributors under Sections 2.1 and/or 2.2 of this License shall, upon \n    60 days notice from Participant terminate prospectively and \n    automatically at the expiration of such 60 day notice period, unless if \n    within such 60 day period You withdraw Your claim with respect to the \n    Participant Software against such Participant either unilaterally or \n    pursuant to a written agreement with Participant. \n\n    6.3. If You assert a patent infringement claim against Participant \n    alleging that the Participant Software directly or indirectly infringes \n    any patent where such claim is resolved (such as by license or \n    settlement) prior to the initiation of patent infringement litigation, \n    then the reasonable value of the licenses granted by such Participant \n    under Sections 2.1 or 2.2 shall be taken into account in determining the \n    amount or value of any payment or license. \n\n    6.4. In the event of termination under Sections 6.1 or 6.2 above, all \n    end user licenses that have been validly granted by You or any \n    distributor hereunder prior to termination (excluding licenses granted \n    to You by any distributor) shall survive termination. \n\n7. LIMITATION OF LIABILITY. \n\n    UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT \n    (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL \n    DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED \n    SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY \n    PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES \n    OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF \n    GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL \n    OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN \n    INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF \n    LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY \n    RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW \n    PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION \n    OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION \n    AND LIMITATION MAY NOT APPLY TO YOU. \n\n8. U.S. GOVERNMENT END USERS. \n\n    The Covered Software is a \"commercial item,\" as that term is defined in \n    48 C.F.R. 2.101 (Oct. 1995), consisting of \"commercial computer \n    software\" (as that term is defined at 48 C.F.R. § 252.227-7014(a)(1)) \n    and \"commercial computer software documentation\" as such terms are used \n    in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and \n    48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government \n    End Users acquire Covered Software with only those rights set forth \n    herein. This U.S. Government Rights clause is in lieu of, and \n    supersedes, any other FAR, DFAR, or other clause or provision that \n    addresses Government rights in computer software under this License. \n\n9. MISCELLANEOUS. \n\n    This License represents the complete agreement concerning subject matter \n    hereof. If any provision of this License is held to be unenforceable, \n    such provision shall be reformed only to the extent necessary to make it \n    enforceable. This License shall be governed by the law of the \n    jurisdiction specified in a notice contained within the Original \n    Software (except to the extent applicable law, if any, provides \n    otherwise), excluding such jurisdiction's conflict-of-law provisions. \n    Any litigation relating to this License shall be subject to the \n    jurisdiction of the courts located in the jurisdiction and venue \n    specified in a notice contained within the Original Software, with the \n    losing party responsible for costs, including, without limitation, court \n    costs and reasonable attorneys' fees and expenses. The application of \n    the United Nations Convention on Contracts for the International Sale of \n    Goods is expressly excluded. Any law or regulation which provides that \n    the language of a contract shall be construed against the drafter shall \n    not apply to this License. You agree that You alone are responsible for \n    compliance with the United States export administration regulations (and \n    the export control laws and regulation of any other countries) when You \n    use, distribute or otherwise make available any Covered Software. \n\n10. RESPONSIBILITY FOR CLAIMS. \n\n    As between Initial Developer and the Contributors, each party is \n    responsible for claims and damages arising, directly or indirectly, out \n    of its utilization of rights under this License and You agree to work \n    with Initial Developer and Contributors to distribute such \n    responsibility on an equitable basis. Nothing herein is intended or \n    shall be deemed to constitute any admission of liability. \n\nNOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION \nLICENSE (CDDL) \n\nThe code released under the CDDL shall be governed by the laws of the \nState of California (excluding conflict-of-law provisions). Any \nlitigation relating to this License shall be subject to the jurisdiction \nof the Federal Courts of the Northern District of California and the \nstate courts of the State of California, with venue lying in Santa Clara \nCounty, California. \n\n\n\n\nThe GNU General Public License (GPL) Version 2, June 1991 \n\nCopyright (C) 1989, 1991 Free Software Foundation, Inc. 59 Temple Place, \nSuite 330, Boston, MA 02111-1307 USA \n\nEveryone is permitted to copy and distribute verbatim copies of this \nlicense document, but changing it is not allowed. \n\nPreamble \n\nThe licenses for most software are designed to take away your freedom to \nshare and change it. By contrast, the GNU General Public License is \nintended to guarantee your freedom to share and change free software--to \nmake sure the software is free for all its users. This General Public \nLicense applies to most of the Free Software Foundation's software and \nto any other program whose authors commit to using it. (Some other Free \nSoftware Foundation software is covered by the GNU Library General \nPublic License instead.) You can apply it to your programs, too. \n\nWhen we speak of free software, we are referring to freedom, not price. \nOur General Public Licenses are designed to make sure that you have the \nfreedom to distribute copies of free software (and charge for this \nservice if you wish), that you receive source code or can get it if you \nwant it, that you can change the software or use pieces of it in new \nfree programs; and that you know you can do these things. \n\nTo protect your rights, we need to make restrictions that forbid anyone \nto deny you these rights or to ask you to surrender the rights. These \nrestrictions translate to certain responsibilities for you if you \ndistribute copies of the software, or if you modify it. \n\nFor example, if you distribute copies of such a program, whether gratis \nor for a fee, you must give the recipients all the rights that you have. \nYou must make sure that they, too, receive or can get the source code. \nAnd you must show them these terms so they know their rights. \n\nWe protect your rights with two steps: (1) copyright the software, and \n(2) offer you this license which gives you legal permission to copy, \ndistribute and/or modify the software. \n\nAlso, for each author's protection and ours, we want to make certain \nthat everyone understands that there is no warranty for this free \nsoftware. If the software is modified by someone else and passed on, we \nwant its recipients to know that what they have is not the original, so \nthat any problems introduced by others will not reflect on the original \nauthors' reputations. \n\nFinally, any free program is threatened constantly by software patents. \nWe wish to avoid the danger that redistributors of a free program will \nindividually obtain patent licenses, in effect making the program \nproprietary. To prevent this, we have made it clear that any patent must \nbe licensed for everyone's free use or not licensed at all. \n\nThe precise terms and conditions for copying, distribution and \nmodification follow. \n\nTERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION \n\n0. This License applies to any program or other work which contains a \nnotice placed by the copyright holder saying it may be distributed under \nthe terms of this General Public License. The \"Program\", below, refers \nto any such program or work, and a \"work based on the Program\" means \neither the Program or any derivative work under copyright law: that is \nto say, a work containing the Program or a portion of it, either \nverbatim or with modifications and/or translated into another language. \n(Hereinafter, translation is included without limitation in the term \n\"modification\".) Each licensee is addressed as \"you\". \n\nActivities other than copying, distribution and modification are not \ncovered by this License; they are outside its scope. The act of running \nthe Program is not restricted, and the output from the Program is \ncovered only if its contents constitute a work based on the Program \n(independent of having been made by running the Program). Whether that \nis true depends on what the Program does. \n\n1. You may copy and distribute verbatim copies of the Program's source \ncode as you receive it, in any medium, provided that you conspicuously \nand appropriately publish on each copy an appropriate copyright notice \nand disclaimer of warranty; keep intact all the notices that refer to \nthis License and to the absence of any warranty; and give any other \nrecipients of the Program a copy of this License along with the Program. \n\nYou may charge a fee for the physical act of transferring a copy, and \nyou may at your option offer warranty protection in exchange for a fee. \n\n2. You may modify your copy or copies of the Program or any portion of \nit, thus forming a work based on the Program, and copy and distribute \nsuch modifications or work under the terms of Section 1 above, provided \nthat you also meet all of these conditions: \n\n    a) You must cause the modified files to carry prominent notices stating \n    that you changed the files and the date of any change. \n\n    b) You must cause any work that you distribute or publish, that in whole \n    or in part contains or is derived from the Program or any part thereof, \n    to be licensed as a whole at no charge to all third parties under the \n    terms of this License. \n\n    c) If the modified program normally reads commands interactively when \n    run, you must cause it, when started running for such interactive use in \n    the most ordinary way, to print or display an announcement including an \n    appropriate copyright notice and a notice that there is no warranty (or \n    else, saying that you provide a warranty) and that users may \n    redistribute the program under these conditions, and telling the user \n    how to view a copy of this License. (Exception: if the Program itself is \n    interactive but does not normally print such an announcement, your work \n    based on the Program is not required to print an announcement.) \n\nThese requirements apply to the modified work as a whole. If \nidentifiable sections of that work are not derived from the Program, and \ncan be reasonably considered independent and separate works in \nthemselves, then this License, and its terms, do not apply to those \nsections when you distribute them as separate works. But when you \ndistribute the same sections as part of a whole which is a work based on \nthe Program, the distribution of the whole must be on the terms of this \nLicense, whose permissions for other licensees extend to the entire \nwhole, and thus to each and every part regardless of who wrote it. \n\nThus, it is not the intent of this section to claim rights or contest \nyour rights to work written entirely by you; rather, the intent is to \nexercise the right to control the distribution of derivative or \ncollective works based on the Program. \n\nIn addition, mere aggregation of another work not based on the Program \nwith the Program (or with a work based on the Program) on a volume of a \nstorage or distribution medium does not bring the other work under the \nscope of this License. \n\n3. You may copy and distribute the Program (or a work based on it, under \nSection 2) in object code or executable form under the terms of Sections \n1 and 2 above provided that you also do one of the following: \n\n    a) Accompany it with the complete corresponding machine-readable source \n    code, which must be distributed under the terms of Sections 1 and 2 \n    above on a medium customarily used for software interchange; or, \n\n    b) Accompany it with a written offer, valid for at least three years, to \n    give any third party, for a charge no more than your cost of physically \n    performing source distribution, a complete machine-readable copy of the \n    corresponding source code, to be distributed under the terms of Sections \n    1 and 2 above on a medium customarily used for software interchange; or, \n\n    c) Accompany it with the information you received as to the offer to \n    distribute corresponding source code. (This alternative is allowed only \n    for noncommercial distribution and only if you received the program in \n    object code or executable form with such an offer, in accord with \n    Subsection b above.) \n\nThe source code for a work means the preferred form of the work for \nmaking modifications to it. For an executable work, complete source code \nmeans all the source code for all modules it contains, plus any \nassociated interface definition files, plus the scripts used to control \ncompilation and installation of the executable. However, as a special \nexception, the source code distributed need not include anything that is \nnormally distributed (in either source or binary form) with the major \ncomponents (compiler, kernel, and so on) of the operating system on \nwhich the executable runs, unless that component itself accompanies the \nexecutable. \n\nIf distribution of executable or object code is made by offering access \nto copy from a designated place, then offering equivalent access to copy \nthe source code from the same place counts as distribution of the source \ncode, even though third parties are not compelled to copy the source \nalong with the object code. \n\n4. You may not copy, modify, sublicense, or distribute the Program \nexcept as expressly provided under this License. Any attempt otherwise \nto copy, modify, sublicense or distribute the Program is void, and will \nautomatically terminate your rights under this License. However, parties \nwho have received copies, or rights, from you under this License will \nnot have their licenses terminated so long as such parties remain in \nfull compliance. \n\n5. You are not required to accept this License, since you have not \nsigned it. However, nothing else grants you permission to modify or \ndistribute the Program or its derivative works. These actions are \nprohibited by law if you do not accept this License. Therefore, by \nmodifying or distributing the Program (or any work based on the \nProgram), you indicate your acceptance of this License to do so, and all \nits terms and conditions for copying, distributing or modifying the \nProgram or works based on it. \n\n6. Each time you redistribute the Program (or any work based on the \nProgram), the recipient automatically receives a license from the \noriginal licensor to copy, distribute or modify the Program subject to \nthese terms and conditions. You may not impose any further restrictions \non the recipients' exercise of the rights granted herein. You are not \nresponsible for enforcing compliance by third parties to this License. \n\n7. If, as a consequence of a court judgment or allegation of patent \ninfringement or for any other reason (not limited to patent issues), \nconditions are imposed on you (whether by court order, agreement or \notherwise) that contradict the conditions of this License, they do not \nexcuse you from the conditions of this License. If you cannot distribute \nso as to satisfy simultaneously your obligations under this License and \nany other pertinent obligations, then as a consequence you may not \ndistribute the Program at all. For example, if a patent license would \nnot permit royalty-free redistribution of the Program by all those who \nreceive copies directly or indirectly through you, then the only way you \ncould satisfy both it and this License would be to refrain entirely from \ndistribution of the Program. \n\nIf any portion of this section is held invalid or unenforceable under \nany particular circumstance, the balance of the section is intended to \napply and the section as a whole is intended to apply in other \ncircumstances. \n\nIt is not the purpose of this section to induce you to infringe any \npatents or other property right claims or to contest validity of any \nsuch claims; this section has the sole purpose of protecting the \nintegrity of the free software distribution system, which is implemented \nby public license practices. Many people have made generous \ncontributions to the wide range of software distributed through that \nsystem in reliance on consistent application of that system; it is up to \nthe author/donor to decide if he or she is willing to distribute \nsoftware through any other system and a licensee cannot impose that \nchoice. \n\nThis section is intended to make thoroughly clear what is believed to be \na consequence of the rest of this License. \n\n8. If the distribution and/or use of the Program is restricted in \ncertain countries either by patents or by copyrighted interfaces, the \noriginal copyright holder who places the Program under this License may \nadd an explicit geographical distribution limitation excluding those \ncountries, so that distribution is permitted only in or among countries \nnot thus excluded. In such case, this License incorporates the \nlimitation as if written in the body of this License. \n\n9. The Free Software Foundation may publish revised and/or new versions \nof the General Public License from time to time. Such new versions will \nbe similar in spirit to the present version, but may differ in detail to \naddress new problems or concerns. \n\nEach version is given a distinguishing version number. If the Program \nspecifies a version number of this License which applies to it and \"any \nlater version\", you have the option of following the terms and \nconditions either of that version or of any later version published by \nthe Free Software Foundation. If the Program does not specify a version \nnumber of this License, you may choose any version ever published by the \nFree Software Foundation. \n\n10. If you wish to incorporate parts of the Program into other free \nprograms whose distribution conditions are different, write to the \nauthor to ask for permission. For software which is copyrighted by the \nFree Software Foundation, write to the Free Software Foundation; we \nsometimes make exceptions for this. Our decision will be guided by the \ntwo goals of preserving the free status of all derivatives of our free \nsoftware and of promoting the sharing and reuse of software generally. \n\nNO WARRANTY \n\n11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY \nFOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN \nOTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES \nPROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY OF ANY KIND, EITHER \nEXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \nWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE \nENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH \nYOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL \nNECESSARY SERVICING, REPAIR OR CORRECTION. \n\n12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN \nWRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY \nAND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR \nDAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL \nDAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM \n(INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED \nINACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF \nTHE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR \nOTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. \n\nEND OF TERMS AND CONDITIONS \n\nHow to Apply These Terms to Your New Programs \n\nIf you develop a new program, and you want it to be of the greatest \npossible use to the public, the best way to achieve this is to make it \nfree software which everyone can redistribute and change under these \nterms. \n\nTo do so, attach the following notices to the program. It is safest to \nattach them to the start of each source file to most effectively convey \nthe exclusion of warranty; and each file should have at least the \n\"copyright\" line and a pointer to where the full notice is found. \n\n    One line to give the program's name and a brief idea of what it does. \n    Copyright (C) <year> <name of author> \n\n    This program is free software; you can redistribute it and/or modify it \n    under the terms of the GNU General Public License as published by the \n    Free Software Foundation; either version 2 of the License, or (at your \n    option) any later version. \n\n    This program is distributed in the hope that it will be useful, but \n    WITHOUT ANY WARRANTY; without even the implied warranty of \n    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General \n    Public License for more details. \n\n    You should have received a copy of the GNU General Public License along \n    with this program; if not, write to the Free Software Foundation, Inc., \n    59 Temple Place, Suite 330, Boston, MA 02111-1307 USA \n\nAlso add information on how to contact you by electronic and paper mail. \n\nIf the program is interactive, make it output a short notice like this \nwhen it starts in an interactive mode: \n\n    Gnomovision version 69, Copyright (C) year name of author Gnomovision \n    comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is \n    free software, and you are welcome to redistribute it under certain \n    conditions; type `show c' for details. \n\nThe hypothetical commands `show w' and `show c' should show the \nappropriate parts of the General Public License. Of course, the commands \nyou use may be called something other than `show w' and `show c'; they \ncould even be mouse-clicks or menu items--whatever suits your program. \n\nYou should also get your employer (if you work as a programmer) or your \nschool, if any, to sign a \"copyright disclaimer\" for the program, if \nnecessary. Here is a sample; alter the names: \n\n    Yoyodyne, Inc., hereby disclaims all copyright interest in the program \n    `Gnomovision' (which makes passes at compilers) written by James Hacker. \n\n    signature of Ty Coon, 1 April 1989\n    Ty Coon, President of Vice \n\nThis General Public License does not permit incorporating your program \ninto proprietary programs. If your program is a subroutine library, you \nmay consider it more useful to permit linking proprietary applications \nwith the library. If this is what you want to do, use the GNU Library \nGeneral Public License instead of this License.\n\n# \n\n\"CLASSPATH\" EXCEPTION TO THE GPL VERSION 2 \n\nCertain source files distributed by Oracle are subject to the following \nclarification and special exception to the GPL Version 2, but only where \nOracle has expressly included in the particular source file's header the \nwords \"Oracle designates this particular file as subject to the \n\"Classpath\" exception as provided by Oracle in the License file that \naccompanied this code.\" \n\nLinking this library statically or dynamically with other modules is \nmaking a combined work based on this library. Thus, the terms and \nconditions of the GNU General Public License Version 2 cover the whole \ncombination. \n\nAs a special exception, the copyright holders of this library give you \npermission to link this library with independent modules to produce an \nexecutable, regardless of the license terms of these independent \nmodules, and to copy and distribute the resulting executable under terms \nof your choice, provided that you also meet, for each linked independent \nmodule, the terms and conditions of the license of that module. An \nindependent module is a module which is not derived from or based on \nthis library. If you modify this library, you may extend this exception \nto your version of the library, but you are not obligated to do so. If \nyou do not wish to do so, delete this exception statement from your \nversion. \n--------------------------------------------------------------------------------\nBinary distribution bundles\n\n  junit # junit # 4.12 (http://junit.org/junit4/)\n  \n  which are available under the CPL v1.0 license (https://eclipse.org/legal/cpl-v10.html)\n\nCommon Public License - v 1.0\n\nTHE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS COMMON PUBLIC\nLICENSE (\"AGREEMENT\"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM\nCONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.\n\n1. DEFINITIONS\n\n\"Contribution\" means:\n\n      a) in the case of the initial Contributor, the initial code and\n         documentation distributed under this Agreement, and\n      b) in the case of each subsequent Contributor:\n\n      i) changes to the Program, and\n\n      ii) additions to the Program;\n\n      where such changes and/or additions to the Program originate from and are\ndistributed by that particular Contributor. A Contribution 'originates' from a\nContributor if it was added to the Program by such Contributor itself or anyone\nacting on such Contributor's behalf. Contributions do not include additions to\nthe Program which: (i) are separate modules of software distributed in\nconjunction with the Program under their own license agreement, and (ii) are\nnot derivative works of the Program. \n\n\"Contributor\" means any person or entity that distributes the Program.\n\n\"Licensed Patents \" mean patent claims licensable by a Contributor which are\nnecessarily infringed by the use or sale of its Contribution alone or when\ncombined with the Program.\n\n\"Program\" means the Contributions distributed in accordance with this Agreement.\n\n\"Recipient\" means anyone who receives the Program under this Agreement,\nincluding all Contributors.\n\n2. GRANT OF RIGHTS\n\n      a) Subject to the terms of this Agreement, each Contributor hereby grants\nRecipient a non-exclusive, worldwide, royalty-free copyright license to\nreproduce, prepare derivative works of, publicly display, publicly perform,\ndistribute and sublicense the Contribution of such Contributor, if any, and\nsuch derivative works, in source code and object code form.\n\n      b) Subject to the terms of this Agreement, each Contributor hereby grants\nRecipient a non-exclusive, worldwide, royalty-free patent license under\nLicensed Patents to make, use, sell, offer to sell, import and otherwise\ntransfer the Contribution of such Contributor, if any, in source code and\nobject code form. This patent license shall apply to the combination of the\nContribution and the Program if, at the time the Contribution is added by the\nContributor, such addition of the Contribution causes such combination to be\ncovered by the Licensed Patents. The patent license shall not apply to any\nother combinations which include the Contribution. No hardware per se is\nlicensed hereunder. \n\n      c) Recipient understands that although each Contributor grants the\nlicenses to its Contributions set forth herein, no assurances are provided by\nany Contributor that the Program does not infringe the patent or other\nintellectual property rights of any other entity. Each Contributor disclaims\nany liability to Recipient for claims brought by any other entity based on\ninfringement of intellectual property rights or otherwise. As a condition to\nexercising the rights and licenses granted hereunder, each Recipient hereby\nassumes sole responsibility to secure any other intellectual property rights\nneeded, if any. For example, if a third party patent license is required to\nallow Recipient to distribute the Program, it is Recipient's responsibility to\nacquire that license before distributing the Program.\n\n      d) Each Contributor represents that to its knowledge it has sufficient\ncopyright rights in its Contribution, if any, to grant the copyright license\nset forth in this Agreement. \n\n3. REQUIREMENTS\n\nA Contributor may choose to distribute the Program in object code form under\nits own license agreement, provided that:\n\n      a) it complies with the terms and conditions of this Agreement; and\n\n      b) its license agreement:\n\n      i) effectively disclaims on behalf of all Contributors all warranties and\nconditions, express and implied, including warranties or conditions of title\nand non-infringement, and implied warranties or conditions of merchantability\nand fitness for a particular purpose; \n\n      ii) effectively excludes on behalf of all Contributors all liability for\ndamages, including direct, indirect, special, incidental and consequential\ndamages, such as lost profits; \n\n      iii) states that any provisions which differ from this Agreement are\noffered by that Contributor alone and not by any other party; and\n\n      iv) states that source code for the Program is available from such\nContributor, and informs licensees how to obtain it in a reasonable manner on\nor through a medium customarily used for software exchange. \n\nWhen the Program is made available in source code form:\n\n      a) it must be made available under this Agreement; and \n\n      b) a copy of this Agreement must be included with each copy of the\nProgram. \n\nContributors may not remove or alter any copyright notices contained within the\nProgram.\n\nEach Contributor must identify itself as the originator of its Contribution, if\nany, in a manner that reasonably allows subsequent Recipients to identify the\noriginator of the Contribution.\n\n4. COMMERCIAL DISTRIBUTION\n\nCommercial distributors of software may accept certain responsibilities with\nrespect to end users, business partners and the like. While this license is\nintended to facilitate the commercial use of the Program, the Contributor who\nincludes the Program in a commercial product offering should do so in a manner\nwhich does not create potential liability for other Contributors. Therefore, if\na Contributor includes the Program in a commercial product offering, such\nContributor (\"Commercial Contributor\") hereby agrees to defend and indemnify\nevery other Contributor (\"Indemnified Contributor\") against any losses, damages\nand costs (collectively \"Losses\") arising from claims, lawsuits and other legal\nactions brought by a third party against the Indemnified Contributor to the\nextent caused by the acts or omissions of such Commercial Contributor in\nconnection with its distribution of the Program in a commercial product\noffering. The obligations in this section do not apply to any claims or Losses\nrelating to any actual or alleged intellectual property infringement. In order\nto qualify, an Indemnified Contributor must: a) promptly notify the Commercial\nContributor in writing of such claim, and b) allow the Commercial Contributor\nto control, and cooperate with the Commercial Contributor in, the defense and\nany related settlement negotiations. The Indemnified Contributor may\nparticipate in any such claim at its own expense.\n\nFor example, a Contributor might include the Program in a commercial product\noffering, Product X. That Contributor is then a Commercial Contributor. If that\nCommercial Contributor then makes performance claims, or offers warranties\nrelated to Product X, those performance claims and warranties are such\nCommercial Contributor's responsibility alone. Under this section, the\nCommercial Contributor would have to defend claims against the other\nContributors related to those performance claims and warranties, and if a court\nrequires any other Contributor to pay any damages as a result, the Commercial\nContributor must pay those damages.\n\n5. NO WARRANTY\n\nEXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR\nIMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE,\nNON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each\nRecipient is solely responsible for determining the appropriateness of using\nand distributing the Program and assumes all risks associated with its exercise\nof rights under this Agreement, including but not limited to the risks and\ncosts of program errors, compliance with applicable laws, damage to or loss of\ndata, programs or equipment, and unavailability or interruption of operations.\n\n6. DISCLAIMER OF LIABILITY\n\nEXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY\nCONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,\nSPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST\nPROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,\nSTRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY\nWAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS\nGRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.\n\n7. GENERAL\n\nIf any provision of this Agreement is invalid or unenforceable under applicable\nlaw, it shall not affect the validity or enforceability of the remainder of the\nterms of this Agreement, and without further action by the parties hereto, such\nprovision shall be reformed to the minimum extent necessary to make such\nprovision valid and enforceable.\n\nIf Recipient institutes patent litigation against a Contributor with respect to\na patent applicable to software (including a cross-claim or counterclaim in a\nlawsuit), then any patent licenses granted by that Contributor to such\nRecipient under this Agreement shall terminate as of the date such litigation\nis filed. In addition, if Recipient institutes patent litigation against any\nentity (including a cross-claim or counterclaim in a lawsuit) alleging that the\nProgram itself (excluding combinations of the Program with other software or\nhardware) infringes such Recipient's patent(s), then such Recipient's rights\ngranted under Section 2(b) shall terminate as of the date such litigation is\nfiled.\n\nAll Recipient's rights under this Agreement shall terminate if it fails to\ncomply with any of the material terms or conditions of this Agreement and does\nnot cure such failure in a reasonable period of time after becoming aware of\nsuch noncompliance. If all Recipient's rights under this Agreement terminate,\nRecipient agrees to cease use and distribution of the Program as soon as\nreasonably practicable. However, Recipient's obligations under this Agreement\nand any licenses granted by Recipient relating to the Program shall continue\nand survive.\n\nEveryone is permitted to copy and distribute copies of this Agreement, but in\norder to avoid inconsistency the Agreement is copyrighted and may only be\nmodified in the following manner. The Agreement Steward reserves the right to\npublish new versions (including revisions) of this Agreement from time to time.\nNo one other than the Agreement Steward has the right to modify this Agreement.\nIBM is the initial Agreement Steward. IBM may assign the responsibility to\nserve as the Agreement Steward to a suitable separate entity. Each new version\nof the Agreement will be given a distinguishing version number. The Program\n(including Contributions) may always be distributed subject to the version of\nthe Agreement under which it was received. In addition, after a new version of\nthe Agreement is published, Contributor may elect to distribute the Program\n(including its Contributions) under the new version. Except as expressly stated\nin Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to\nthe intellectual property of any Contributor under this Agreement, whether\nexpressly, by implication, estoppel or otherwise. All rights in the Program not\nexpressly granted under this Agreement are reserved.\n\nThis Agreement is governed by the laws of the State of New York and the\nintellectual property laws of the United States of America. No party to this\nAgreement will bring a legal action under this Agreement more than one year\nafter the cause of action arose. Each party waives its rights to a jury trial\nin any resulting litigation. \n--------------------------------------------------------------------------------\nBinary distribution bundles\n\n  org.jamon # jamon-runtime # 2.4.1 (http://www.jamon.org/)\n\n  which are available under the MPL v1.1 license (http://www.mozilla.org/MPL/MPL-1.1.txt)\n--------------------------------------------------------------------------------\nBinary distribution bundles\n\n  org.slf4j # slf4j-api # 1.7.25 (https://www.slf4j.org/)\n  org.slf4j # slf4j-api # 1.7.10 (https://www.slf4j.org/)\n  org.slf4j # slf4j-api # 1.7.2 (https://www.slf4j.org/)\n  org.slf4j # slf4j-log4j12 # 1.7.18 (https://www.slf4j.org/)\n  org.slf4j # slf4j-log4j12 # 1.7.10 (https://www.slf4j.org/)\n  org.jruby.jcodings # jcodings # 1.0.8 (https://github.com/jruby/jcodings/)\n  org.jruby.joni # joni # 2.1.2 (https://github.com/jruby/joni/)\n  \n  \n  which are available under the MIT license (http://opensource.org/licenses/mit-license.php)\n  \n  Copyright (c) 2004-2008 QOS.ch\n  All rights reserved.\n\n  Permission is hereby granted, free of charge, to any person obtaining\n  a copy of this software and associated documentation files (the\n  \"Software\"), to deal in the Software without restriction, including\n  without limitation the rights to use, copy, modify, merge, publish,\n  distribute, sublicense, and/or sell copies of the Software, and to\n  permit persons to whom the Software is furnished to do so, subject to\n  the following conditions:\n\n  The above copyright notice and this permission notice shall be\n  included in all copies or substantial portions of the Software.\n\n  THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE\n  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\n  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION\n  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  \n--------------------------------------------------------------------------------\nBinary distribution bundles\n  \n  com.github.zafarkhaja # java-semver # 0.9.0 (https://github.com/zafarkhaja/jsemver)\n  \n  which are available under the MIT license (http://opensource.org/licenses/mit-license.php)\n  \nThe MIT License\n\nCopyright 2012-2014 Zafar Khaja <zafarkhaja@gmail.com>.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in\nall copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\nTHE SOFTWARE.\n--------------------------------------------------------------------------------\nBinary distribution bundles\n  \n  com.github.scopt # scopt_2.11 # 3.5.0 (https://github.com/scopt/scopt)\n\n  which are available under the MIT license (http://opensource.org/licenses/mit-license.php)\n  \nCopyright (c) scopt contributors\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n--------------------------------------------------------------------------------\nBinary distribution bundles\n\n  com.esotericsoftware # kryo # 3.0.3 (https://github.com/EsotericSoftware/kryo)\n  com.esotericsoftware # minlog # 1.3.0 (https://github.com/EsotericSoftware/minlog)\n  com.esotericsoftware # reflectasm # 1.10.1 (https://github.com/EsotericSoftware/reflectasm)\n  com.esotericsoftware.kryo # kryo # 2.21 (https://github.com/EsotericSoftware/kryo)\n  com.esotericsoftware.minlog # minlog # 1.2 (https://github.com/EsotericSoftware/minlog)\n  com.esotericsoftware.reflectasm # reflectasm # 1.07 (https://github.com/EsotericSoftware/reflectasm)\n  \n  which is available under the BSD license (http://www.opensource.org/licenses/bsd-license.php)\n  \nCopyright (c) 2008, Nathan Sweet\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:\n\n    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.\n    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.\n    * Neither the name of Esoteric Software nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n--------------------------------------------------------------------------------\nBinary distribution bundles\n  \n  com.google.protobuf # protobuf-java # 2.5.0 (https://github.com/google/protobuf)\n  com.google.protobuf # protobuf-java # 2.6.1 (https://github.com/google/protobuf)\n  \n  Copyright 2008, Google Inc.\n  All rights reserved.\n\n  Redistribution and use in source and binary forms, with or without\n  modification, are permitted provided that the following conditions are\n  met:\n\n      * Redistributions of source code must retain the above copyright\n  notice, this list of conditions and the following disclaimer.\n      * Redistributions in binary form must reproduce the above\n  copyright notice, this list of conditions and the following disclaimer\n  in the documentation and/or other materials provided with the\n  distribution.\n      * Neither the name of Google Inc. nor the names of its\n  contributors may be used to endorse or promote products derived from\n  this software without specific prior written permission.\n\n  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n  \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n  Code generated by the Protocol Buffer compiler is owned by the owner\n  of the input file used when generating it.  This code is not\n  standalone and requires a support library to be linked with it.  This\n  support library is itself covered by the above license.\n\n  which is available under the BSD license (http://www.opensource.org/licenses/bsd-license.php)\n--------------------------------------------------------------------------------\nBinary distribution bundles\n\n  xmlenc # xmlenc # 0.52 (http://xmlenc.sourceforge.net/)\n\n  which is available under the BSD license (http://www.opensource.org/licenses/bsd-license.php)\n\nCopyright 2003-2005, Ernst de Haan <wfe.dehaan@gmail.com>\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\n3. Neither the name of the copyright holder nor the names of its contributors\n   may be used to endorse or promote products derived from this software\n   without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS \"AS IS\"\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n--------------------------------------------------------------------------------\nBinary distribution bundles\n  \n  com.thoughtworks.paranamer # paranamer # 2.3 (https://github.com/paul-hammant/paranamer)\n  com.thoughtworks.paranamer # paranamer # 2.6 (https://github.com/paul-hammant/paranamer)\n  \n  which is available under the BSD license (http://www.opensource.org/licenses/bsd-license.php)\n  \n Copyright (c) 2006 Paul Hammant & ThoughtWorks Inc\n All rights reserved.\n\n Redistribution and use in source and binary forms, with or without\n modification, are permitted provided that the following conditions\n are met:\n 1. Redistributions of source code must retain the above copyright\n    notice, this list of conditions and the following disclaimer.\n 2. Redistributions in binary form must reproduce the above copyright\n    notice, this list of conditions and the following disclaimer in the\n    documentation and/or other materials provided with the distribution.\n 3. Neither the name of the copyright holders nor the names of its\n    contributors may be used to endorse or promote products derived from\n    this software without specific prior written permission.\n\n THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\n LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF\n THE POSSIBILITY OF SUCH DAMAGE.\n--------------------------------------------------------------------------------\nBinary distribution bundles\n  \n  org.hamcrest # hamcrest-core # 1.3 (http://hamcrest.org/JavaHamcrest/)\n  \n  which is available under the BSD license (http://www.opensource.org/licenses/bsd-license.php)\n  \nCopyright (c) 2000-2015 www.hamcrest.org\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\nRedistributions of source code must retain the above copyright notice, this list of\nconditions and the following disclaimer. Redistributions in binary form must reproduce\nthe above copyright notice, this list of conditions and the following disclaimer in\nthe documentation and/or other materials provided with the distribution.\n\nNeither the name of Hamcrest nor the names of its contributors may be used to endorse\nor promote products derived from this software without specific prior written\npermission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY\nEXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\nOF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT\nSHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,\nINCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED\nTO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR\nBUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\nCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY\nWAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH\nDAMAGE. \n--------------------------------------------------------------------------------\nBinary distribution bundles\n\n  asm # asm # 3.1 (http://asm.ow2.org/)\n  org.ow2.asm # asm # 5.0.3 (http://asm.ow2.org/)\n  \n  which is available under the BSD license (http://www.opensource.org/licenses/bsd-license.php)\n\nCopyright (c) 2000-2011 INRIA, France Telecom\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions\nare met:\n\n1. Redistributions of source code must retain the above copyright\n   notice, this list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright\n   notice, this list of conditions and the following disclaimer in the\n   documentation and/or other materials provided with the distribution.\n\n3. Neither the name of the copyright holders nor the names of its\n   contributors may be used to endorse or promote products derived from\n   this software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\nARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\nLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\nSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\nINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\nCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\nARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF\nTHE POSSIBILITY OF SUCH DAMAGE.  \n--------------------------------------------------------------------------------\nBinary distribution bundles\n  \n  org.clapper # grizzled-slf4j_2.11 # 1.0.2 (http://software.clapper.org/grizzled-slf4j/)\n  \n  which is available under the BSD license (http://www.opensource.org/licenses/bsd-license.php)\n  \nCopyright &copy; 2010-2016, Brian M. Clapper.\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\n3. Neither the name of the copyright holder nor the names of its contributors\n   may be used to endorse or promote products derived from this software without\n   specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\nANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\nWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH\nDAMAGE.\n--------------------------------------------------------------------------------\nBinary distribution bundles\n\n  com.jcraft # jsch # 0.1.54 (http://www.jcraft.com/jsch/)\n\n  which is available under the BSD license (http://www.jcraft.com/jsch/LICENSE.txt)\n  \nCopyright (c) 2002-2015 Atsuhiko Yamanaka, JCraft,Inc. \nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n  1. Redistributions of source code must retain the above copyright notice,\n     this list of conditions and the following disclaimer.\n\n  2. Redistributions in binary form must reproduce the above copyright \n     notice, this list of conditions and the following disclaimer in \n     the documentation and/or other materials provided with the distribution.\n\n  3. The names of the authors may not be used to endorse or promote products\n     derived from this software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,\nINCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\nFITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JCRAFT,\nINC. OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,\nINCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\nLIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,\nOR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\nLIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,\nEVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n--------------------------------------------------------------------------------\nBinary distribution bundles\n\n  org.scala-lang # scala-library # 2.11.12 (http://scala-lang.org/)\n  org.scala-lang # scala-compiler # 2.11.12 (http://scala-lang.org/)\n  org.scala-lang # scala-reflect # 2.11.12 (http://scala-lang.org/)\n  org.scala-lang # scalap # 2.11.12 (http://scala-lang.org/)\n  org.scala-lang.modules # scala-java8-compat_2.11 # 0.7.0 (http://scala-lang.org/)\n  org.scala-lang.modules # scala-parser-combinators_2.11 # 1.0.6 (http://scala-lang.org/)\n  org.scala-lang.modules # scala-parser-combinators_2.11 # 1.1.0 (http://scala-lang.org/)\n  org.scala-lang.modules # scala-xml_2.11 # 1.0.5 (http://scala-lang.org/)\n  \n  which is available under the BSD license (http://www.scala-lang.org/downloads/license.html)\n\nCopyright (c) 2002-2017 EPFL\nCopyright (c) 2011-2017 Lightbend, Inc.\n\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification,\nare permitted provided that the following conditions are met:\n\n  * Redistributions of source code must retain the above copyright notice,\n    this list of conditions and the following disclaimer.\n  * Redistributions in binary form must reproduce the above copyright notice,\n    this list of conditions and the following disclaimer in the documentation\n    and/or other materials provided with the distribution.\n  * Neither the name of the EPFL nor the names of its contributors\n    may be used to endorse or promote products derived from this software\n    without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\nLIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\nA PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\nCONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\nEXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\nPROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\nPROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\nLIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\nNEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\nSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n--------------------------------------------------------------------------------\nBinary distribution bundles\n\n  org.fusesource.leveldbjni # leveldbjni-all # 1.8 (https://github.com/fusesource/leveldbjni)\n\n  which is available under the BSD license (http://www.opensource.org/licenses/BSD-3-Clause)\n\nCopyright (c) 2011 FuseSource Corp. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are\nmet:\n\n   * Redistributions of source code must retain the above copyright\nnotice, this list of conditions and the following disclaimer.\n   * Redistributions in binary form must reproduce the above\ncopyright notice, this list of conditions and the following disclaimer\nin the documentation and/or other materials provided with the\ndistribution.\n   * Neither the name of FuseSource Corp. nor the names of its\ncontributors may be used to endorse or promote products derived from\nthis software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\nLIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\nA PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\nOWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\nSPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\nLIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\nDATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\nTHEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n--------------------------------------------------------------------------------\nThe following libraries are from the public domain.\n\n  org.tukaani # xz # 1.0 (http://tukaani.org/xz/java.html)\n  org.reactivestreams # reactive-streams # 1.0.2 (http://www.reactive-streams.org/)\n"
  },
  {
    "path": "NOTICE.txt",
    "content": "Apache PredictionIO\nCopyright 2016 The Apache Software Foundation\n\nThis product includes software developed at\nThe Apache Software Foundation (http://www.apache.org/).\n\nThis product depends on third party software that falls under a variety of licenses.\nAll dependencies with licenses other than Apache are specified in the LICENSE file.\nPlease see LICENSE for additional copyright and licensing information.\n"
  },
  {
    "path": "PMC.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n# Project Management Committee Documentation\n\nThis outlines the steps for a PMC member to create a new release. More details\nand policy guidelines can be found here: http://www.apache.org/dev/release-distribution\n\n## Release Procedure\n\n1. Generate code signing key if you do not already have one for Apache. Refer to\nhttp://apache.org/dev/openpgp.html#generate-key on how to generate a strong code\nsigning key.\n2. Add your public key to the `KEYS` file at the root of the source code tree.\n3. Create a new release branch, with version bumped to the next release version.\n    * `git checkout -b release/0.15.0`\n    * Replace all `0.15.0-SNAPSHOT` in the code tree to `0.15.0`\n    * `git commit -am \"Prepare 0.15.0-rc1\"`\n    * `git tag -am \"Apache PredictionIO 0.15.0-rc1\" v0.15.0-rc1`\n4. Push the release branch and tag to the apache git repo.\n5. Wait for Travis to pass build on the release branch.\n6. Package a clean tarball for staging a release candidate.\n    * `git archive --format tar v0.15.0-rc1 >\n  ../apache-predictionio-0.15.0-rc1.tar`\n    * `cd ..; gzip apache-predictionio-0.15.0-rc1.tar`\n7. Generate detached signature for the release candidate.\n(http://apache.org/dev/release-signing.html#openpgp-ascii-detach-sig)\n    * `gpg --armor --output apache-predictionio-0.15.0-rc1.tar.gz.asc\n  --detach-sig apache-predictionio-0.15.0-rc1.tar.gz`\n8. Generate SHA512 checksums for the release candidate.\n    * `gpg --print-md SHA512 apache-predictionio-0.15.0-rc1.tar.gz >\n  apache-predictionio-0.15.0-rc1.tar.gz.sha512`\n9. Run `./make-distribution.sh` and repeat steps 6 to 8 to create binary distribution release.\n    * `mv PredictionIO-0.15.0.tar.gz apache-predictionio-0.15.0-bin.tar.gz`\n    * `gpg --armor --output apache-predictionio-0.15.0-bin.tar.gz.asc\n  --detach-sig apache-predictionio-0.15.0-bin.tar.gz`\n    * `gpg --print-md SHA512 apache-predictionio-0.15.0-bin.tar.gz >\n  apache-predictionio-0.15.0-bin.tar.gz.sha512`\n10. If you have not done so, use SVN to checkout\nhttps://dist.apache.org/repos/dist/dev/predictionio. This is the area\nfor staging release candidates for voting.\n    * `svn co https://dist.apache.org/repos/dist/dev/predictionio`\n11. Create a subdirectory at the SVN staging area. The area should have a `KEYS` file.\n    * `mkdir apache-predictionio-0.15.0-rc1`\n    * `cp apache-predictionio-0.15.0-* apache-predictionio-0.15.0-rc1`\n12. If you have updated the `KEYS` file, also copy that to the staging area.\n13. `svn commit -m \"Apache PredictionIO 0.15.0-rc1\"`\n14. Set up credentials with Apache Nexus using the SBT Sonatype plugin. Put this\nin `~/.sbt/1.0/sonatype.sbt`.\n\n  ```\n  publishTo := {\n      val nexus = \"https://repository.apache.org/\"\n      if (isSnapshot.value)\n        Some(\"snapshots\" at nexus + \"content/repositories/snapshots\")\n      else\n        Some(\"releases\" at nexus + \"service/local/staging/deploy/maven2\")\n  }\n\n  credentials += Credentials(\"Sonatype Nexus Repository Manager\", \"repository.apache.org\", \"<YOUR APACHE LDAP USERNAME>\", \"<YOUR APACHE LDAP PASSWORD>\")\n  ```\n15. Run `sbt/sbt +publishLocal` first and then run `sbt/sbt +publishSigned +storage/publishSigned`.\nClose the staged repository on Apache Nexus.\n16. Send out email for voting on PredictionIO dev mailing list.\n\n  ```\n  Subject: [VOTE] Apache PredictionIO 0.15.0 Release (RC1)\n\n  This is the vote for 0.15.0 of Apache PredictionIO.\n\n  The vote will run for at least 72 hours and will close on Apr 7th, 2017.\n\n  The release candidate artifacts can be downloaded here: https://dist.apache.org/repos/dist/dev/predictionio/apache-predictionio-0.15.0-rc1/\n\n  Test results of RC1 can be found here: https://travis-ci.org/apache/predictionio/builds/xxx\n\n  Maven artifacts are built from the release candidate artifacts above, and are provided as convenience for testing with engine templates. The Maven artifacts are provided at the Maven staging repo here: https://repository.apache.org/content/repositories/orgapachepredictionio-nnnn/\n\n  All JIRAs completed for this release are tagged with 'FixVersion = 0.15.0'. You can view them here: https://issues.apache.org/jira/secure/ReleaseNote\n  .jspa?projectId=12320420&version=12337844\n\n  The artifacts have been signed with Key : YOUR_KEY_ID\n\n  Please vote accordingly:\n\n  [ ] +1, accept RC as the official 0.15.0 release\n  [ ] -1, do not accept RC as the official 0.15.0 release because...\n  ```\n17. After the vote has been accepted, update `RELEASE.md`.\n18. Create a release tag\n19. Repeat steps 6 to 8 to create the official release, and step 15 to publish it.\n20. Use SVN to checkout\nhttps://dist.apache.org/repos/dist/release/predictionio/. This is the area\nfor staging actual releases.\n21. Create a subdirectory at the SVN staging area. The area should have a `KEYS` file.\n    * `mkdir 0.15.0`\n    * Copy the binary distribution from the dev/ tree to the release/ tree\n    * Copy the official release to the release/ tree\n22. If you have updated the `KEYS` file, also copy that to the staging area.\n23. Remove old releases from the ASF distribution mirrors.\n(https://www.apache.org/dev/mirrors.html#location)\n    * `svn delete 0.14.0`\n24. `svn commit -m \"Apache PredictionIO 0.15.0\"`\n25. Document breaking changes in https://predictionio.apache.org/resources/upgrade/.\n26. Mark the version as released on JIRA.\n(https://issues.apache.org/jira/projects/PIO?selectedItem=com.atlassian.jira.jira-projects-plugin%3Arelease-page&status=no-filter)\n27. Send out an email to the following mailing lists: announce, user, dev.\n\n  ```\n  Subject: [ANNOUNCE] Apache PredictionIO 0.15.0 Release\n\n  The Apache PredictionIO team would like to announce the release of Apache PredictionIO 0.15.0.\n\n  Release notes are here:\n  https://github.com/apache/predictionio/blob/v0.15.0/RELEASE.md\n\n  Apache PredictionIO is an open source Machine Learning Server built on top of state-of-the-art open source stack, that enables developers to manage and deploy production-ready predictive services for various kinds of machine learning tasks.\n\n  More details regarding Apache PredictionIO can be found here:\n  https://predictionio.apache.org/\n\n  The release artifacts can be downloaded here:\n  https://www.apache.org/dyn/closer.lua/predictionio/0.15.0/apache-predictionio-0.15.0-bin.tar.gz\n\n  All JIRAs completed for this release are tagged with 'FixVersion = 0.15.0'; the JIRA release notes can be found here:\n  https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12320420&version=12337844\n\n  Thanks!\n  The Apache PredictionIO Team\n  ```\n"
  },
  {
    "path": "README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n# [Apache PredictionIO](http://predictionio.apache.org)\n\n[![Build\nStatus](https://api.travis-ci.org/apache/predictionio.svg?branch=develop)](https://travis-ci.org/apache/predictionio)\n\nApache PredictionIO is an open source machine learning framework\nfor developers, data scientists, and end users. It supports event collection,\ndeployment of algorithms, evaluation, querying predictive results via REST APIs.\nIt is based on scalable open source services like Hadoop, HBase (and other DBs),\nElasticsearch, Spark and implements what is called a Lambda Architecture.\n\nTo get started, check out http://predictionio.apache.org!\n\n\n## Table of contents\n- [Installation](#installation)\n- [Quick Start](#quick-start)\n- [Bugs and Feature Requests](#bugs-and-feature-requests)\n- [Documentation](#documentation)\n- [Contributing](#contributing)\n- [Community](#community)\n\n\n## Installation\n\nA few installation options available.\n\n*   [Installing Apache PredictionIO from\n    Binary/Source](http://predictionio.apache.org/install/install-sourcecode/)\n*   [Installing Apache PredictionIO with\n    Docker](http://predictionio.apache.org/install/install-docker/)\n\n## Quick Start\n\n*   [Recommendation Engine Template Quick\n    Start](http://predictionio.apache.org/templates/recommendation/quickstart/)\n    Guide\n*   [Similiar Product Engine Template Quick\n    Start](http://predictionio.apache.org/templates/similarproduct/quickstart/)\n    Guide\n*   [Classification Engine Template Quick\n    Start](http://predictionio.apache.org/templates/classification/quickstart/)\n    Guide\n\n\n## Bugs and Feature Requests\n\nUse [Apache JIRA](https://issues.apache.org/jira/browse/PIO) to report bugs or request new features.\n\n## Documentation\n\nDocumentation, included in this repo in the `docs/manual` directory, is built\nwith [Middleman](http://middlemanapp.com/) and publicly hosted at\n[predictionio.apache.org](http://predictionio.apache.org/).\n\nInterested in helping with our documentation? Read [Contributing\nDocumentation](http://predictionio.apache.org/community/contribute-documentation/).\n\n\n## Community\n\nKeep track of development and community news.\n\n*   Subscribe to the user mailing list <mailto:user-subscribe@predictionio.apache.org>\n    and the dev mailing list <mailto:dev-subscribe@predictionio.apache.org>\n*   Follow [@predictionio](https://twitter.com/predictionio) on Twitter.\n\n\n## Contributing\n\nRead the [Contribute Code](http://predictionio.apache.org/community/contribute-code/) page.\n\nYou can also list your projects on the [Community Project\npage](http://predictionio.apache.org//community/projects/).\n\n\n## License\n\nApache PredictionIO is under [Apache 2\nlicense](http://www.apache.org/licenses/LICENSE-2.0.html).\n"
  },
  {
    "path": "RELEASE.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n# Release Notes and News\n\n**Note:** For upgrade instructions please refer to [this page](https://predictionio.apache.org/resources/upgrade/).\n\n## Version History\n\n### 0.14.0\n\nMar 11, 2019\n\n#### Breaking changes\n\n- [PIO-168](https://issues.apache.org/jira/browse/PIO-168): Elasticsearch 6.x support (see the [pull request](https://github.com/apache/predictionio/pull/466))\n\n#### New Features\n\n- [PIO-183](https://issues.apache.org/jira/browse/PIO-183): Add Jupyter Docker image\n- [PIO-199](https://issues.apache.org/jira/browse/PIO-199): Spark 2.4 (Scala 2.11) support\n\n#### Behavior Changes\n\n- [PIO-31](https://issues.apache.org/jira/browse/PIO-31): Move from spray to akka-http in servers\n- [PIO-171](https://issues.apache.org/jira/browse/PIO-171): Drop Scala 2.10 and Spark 1.6 support\n- [PIO-175](https://issues.apache.org/jira/browse/PIO-175): Deprecation of Elasticsearch 1.x support\n- [PIO-179](https://issues.apache.org/jira/browse/PIO-179): bump up hbase client version and make it configurable\n- [PIO-192](https://issues.apache.org/jira/browse/PIO-192): Enhance PySpark support\n- [PIO-196](https://issues.apache.org/jira/browse/PIO-196): Use external PySpark environment variables in Jupyter Docker image\n\n#### Other Changes\n\n- [PIO-153](https://issues.apache.org/jira/browse/PIO-153): Allow use of GNU tar on non-GNU systems\n- [PIO-170](https://issues.apache.org/jira/browse/PIO-170): Upgrade sbt to 1.x\n- [PIO-176](https://issues.apache.org/jira/browse/PIO-176): Clean up unmanaged sources in the data module\n- [PIO-182](https://issues.apache.org/jira/browse/PIO-182): Add asynchronous (non-blocking) methods to LEventStore\n- [PIO-188](https://issues.apache.org/jira/browse/PIO-188): Update the build matrix to the latest supported versions\n- [PIO-189](https://issues.apache.org/jira/browse/PIO-189): ES6 integration test fails\n- [PIO-194](https://issues.apache.org/jira/browse/PIO-194): S3 Model Data Storage should allow more flexible ways for specifying AWS credentials\n- [PIO-203](https://issues.apache.org/jira/browse/PIO-203): pio status warnings\n- [PIO-205](https://issues.apache.org/jira/browse/PIO-205): Update Dockerfile to reflect new Spark version\n- [PIO-206](https://issues.apache.org/jira/browse/PIO-206): Spark 2.3.2 to 2.3.3\n\n#### Documentation\n\n- [PIO-172](https://issues.apache.org/jira/browse/PIO-172): Migration guide for ES 6.x changes\n- [PIO-180](https://issues.apache.org/jira/browse/PIO-180): Trivial LiveDoc Link Change in Readme\n- [PIO-185](https://issues.apache.org/jira/browse/PIO-185): Non-tracked Link in Apache Project page\n- [PIO-195](https://issues.apache.org/jira/browse/PIO-195): Improve readability and grammar of documentation\n\n#### Credits\n\nThe following contributors have spent a great deal of effort to bring to you\nthis release:\n\nAlexander Merritt, Chris Wewerka, Donald Szeto, Naoki Takezoe, Saurabh Gulati,\nShinsuke Sugaya, Takako Shimamoto, Wei Chen, Yavor Stoychev\n\n### 0.13.0\n\nSep 20, 2018\n\n#### New Features\n\n- [PIO-161](https://issues.apache.org/jira/browse/PIO-161): Spark 2.3 support.\n\n#### Behavior Changes\n\n- [PIO-158](https://issues.apache.org/jira/browse/PIO-158): More officially deprecate support for Scala 2.10 and Spark 1.x.\n\n#### Other Changes\n\n- [PIO-152](https://issues.apache.org/jira/browse/PIO-152): DOAP syntax error.\n- [PIO-155](https://issues.apache.org/jira/browse/PIO-155): Fix 'Topic Labelling with Wikipedia' Template Link.\n- [PIO-156](https://issues.apache.org/jira/browse/PIO-156): Stale release on download page.\n- [PIO-160](https://issues.apache.org/jira/browse/PIO-160): Array out of bound exception in JDBCUtils when --env is not supplied to CreateWorkflow.\n\n#### Credits\n\nThe following contributors have spent a great deal of effort to bring to you\nthis release:\n\nDonald Szeto, Takako Shimamoto\n\n### 0.12.1\n\nMar 11, 2018\n\n#### New Features\n\n- [PIO-125](https://issues.apache.org/jira/browse/PIO-125): Add support for Spark 2.2.\n- [PIO-137](https://issues.apache.org/jira/browse/PIO-137): Add CleanupFunctions for Python.\n\n#### Behavior Changes\n\n- [PIO-126](https://issues.apache.org/jira/browse/PIO-126): Update install.sh to use binary release.\n- [PIO-137](https://issues.apache.org/jira/browse/PIO-137): Create a connection object at a worker to delete events.\n\n#### Other Changes\n\n- [PIO-101](https://issues.apache.org/jira/browse/PIO-101): Document usage of plug-in of event server and engine server.\n- [PIO-127](https://issues.apache.org/jira/browse/PIO-127): Update PMC documentation for release process.\n- [PIO-129](https://issues.apache.org/jira/browse/PIO-129): Move CLI document in side menu.\n- [PIO-131](https://issues.apache.org/jira/browse/PIO-131): Fix Apache licensing issues for doc site.\n- [PIO-133](https://issues.apache.org/jira/browse/PIO-133): Make sure project web site meets all requirements in Apache Project Website Branding Policy.\n- [PIO-135](https://issues.apache.org/jira/browse/PIO-135): Remove all incubating status.\n- [PIO-139](https://issues.apache.org/jira/browse/PIO-139): Update release process doc to include closing all resolved stories within the new release.\n- [PIO-146](https://issues.apache.org/jira/browse/PIO-146): Change TM to (R) on text marks.\n- [PIO-147](https://issues.apache.org/jira/browse/PIO-147): Fix broken Scala API documentation.\n- [PIO-150](https://issues.apache.org/jira/browse/PIO-150): Update Ruby gem dependency versions for security improvement.\n- [PIO-151](https://issues.apache.org/jira/browse/PIO-151): Add S3 storage provider docs.\n\n#### Credits\n\nThe following contributors have spent a great deal of effort to bring to you\nthis release:\n\nChan Lee, Donald Szeto, Helene Brashear, James Ward, Jeffrey Cafferata,\nMars Hall, Naoki Takezoe, Shinsuke Sugaya, Steven Yan, Takahiro Hagino,\nTakako Shimamoto\n\n### 0.12.0\n\nSep 27, 2017\n\n#### New Features\n\n- [PIO-61](https://issues.apache.org/jira/browse/PIO-61): S3 support for model data\n- [PIO-69](https://issues.apache.org/jira/browse/PIO-69), [PIO-91](https://issues.apache.org/jira/browse/PIO-91): Binary distribution of PredictionIO\n- [PIO-105](https://issues.apache.org/jira/browse/PIO-105), [PIO-110](https://issues.apache.org/jira/browse/PIO-110), [PIO-111](https://issues.apache.org/jira/browse/PIO-111): Batch predictions\n- [PIO-95](https://issues.apache.org/jira/browse/PIO-95): Raise request timeout for REST API to 35-seconds\n- [PIO-114](https://issues.apache.org/jira/browse/PIO-114): Basic HTTP authentication for Elasticsearch 5.x StorageClient\n- [PIO-116](https://issues.apache.org/jira/browse/PIO-116): PySpark support\n\n#### Breaking changes\n\n- [PIO-106](https://issues.apache.org/jira/browse/PIO-106): Elasticsearch 5.x StorageClient should reuse RestClient (see the [pull request](https://github.com/apache/predictionio/pull/421))\n\n#### Behavior Changes\n\n- [PIO-59](https://issues.apache.org/jira/browse/PIO-59): `pio app new` uses /dev/urandom/ to generate entropy.\n- [PIO-72](https://issues.apache.org/jira/browse/PIO-72): `pio-shell` properly loads storage dependencies.\n- [PIO-83](https://issues.apache.org/jira/browse/PIO-83), [PIO-119](https://issues.apache.org/jira/browse/PIO-119): Default environment changed to Spark 2.1.1, Scala 2.11.8,\n  and Elasticsearch 5.5.2.\n- [PIO-99](https://issues.apache.org/jira/browse/PIO-99): `pio-build` checks for compilation errors before proceeding\n  to build engine.\n- [PIO-100](https://issues.apache.org/jira/browse/PIO-100): `pio` commands no longer display SLF4J warning messages.\n\n#### Other Changes\n\n- [PIO-56](https://issues.apache.org/jira/browse/PIO-56): Core unit tests no longer require meta data setup.\n- [PIO-60](https://issues.apache.org/jira/browse/PIO-60), [PIO-62](https://issues.apache.org/jira/browse/PIO-62): Minor fixes in authorship information and license checking.\n- [PIO-63](https://issues.apache.org/jira/browse/PIO-63): Apache incubator logo and disclaimer is displayed on the website.\n- [PIO-65](https://issues.apache.org/jira/browse/PIO-65): Integration tests on Travis caches downloaded jars.\n- [PIO-66](https://issues.apache.org/jira/browse/PIO-66): More detailed documentation regarding release process and adding\n  JIRA tickets.\n- [PIO-90](https://issues.apache.org/jira/browse/PIO-90): Improved performance for /batch/events.json API call.\n- [PIO-94](https://issues.apache.org/jira/browse/PIO-94): More detailed stack trace for REST API errors.\n- [PIO-97](https://issues.apache.org/jira/browse/PIO-97): Update examples in official templates.\n- [PIO-102](https://issues.apache.org/jira/browse/PIO-102), [PIO-117](https://issues.apache.org/jira/browse/PIO-117), [PIO-118](https://issues.apache.org/jira/browse/PIO-118), [PIO-120](https://issues.apache.org/jira/browse/PIO-120): Bug fixes, refactoring, and\n  improved performance on Elasticsearch behavior.\n- [PIO-104](https://issues.apache.org/jira/browse/PIO-104): Bug fix regarding plugins.\n- [PIO-107](https://issues.apache.org/jira/browse/PIO-107): Obsolete experimental examples are removed.\n\n#### Credits\n\nThe following contributors have spent a great deal of effort to bring to you\nthis release:\n\nAayush Kumar, Chan Lee, Donald Szeto, Hugo Duksis, Juha Syrjälä, Lucas Bonatto,\nMarius Rabenarivo, Mars Hall, Naoki Takezoe, Nilmax Moura, Shinsuke Sugaya,\nTomasz Stęczniewski, Vaghawan Ojha\n\n### 0.11.0\n\nApr 25, 2017\n\n#### New Features\n\n- [PIO-30](https://issues.apache.org/jira/browse/PIO-30): Scala 2.11 support\n- [PIO-30](https://issues.apache.org/jira/browse/PIO-30): Spark 2 support\n- [PIO-49](https://issues.apache.org/jira/browse/PIO-49): Elasticsearch 5 support\n- [PIO-30](https://issues.apache.org/jira/browse/PIO-30), [PIO-49](https://issues.apache.org/jira/browse/PIO-49): Flexible build system\n- [PIO-47](https://issues.apache.org/jira/browse/PIO-47), [PIO-51](https://issues.apache.org/jira/browse/PIO-51): Removal of engine manifests\n- [PIO-49](https://issues.apache.org/jira/browse/PIO-49): Modularized storage backend modules\n- [PIO-45](https://issues.apache.org/jira/browse/PIO-45): Self cleaning data source\n\n#### Behavior Changes\n\n- [PIO-25](https://issues.apache.org/jira/browse/PIO-25): `pio-start-all` will no longer start PostgreSQL if it is not being\n  used.\n- [PIO-47](https://issues.apache.org/jira/browse/PIO-47), [PIO-51](https://issues.apache.org/jira/browse/PIO-51): `pio build` no longer requires access to the metadata\n  repository. `pio` commands will now accept an optional `--engine-dir`\n  parameter if you want to run `pio build`, `pio train`, or `pio deploy` outside\n  of an engine directory. This is an interim solution before an engine registry\n  feature becomes available in the future.\n- [PIO-49](https://issues.apache.org/jira/browse/PIO-49): PostgreSQL JDBC driver is no longer bundled with the core assembly. If\n  you are using PostgreSQL, you must download the JDBC driver and update your\n  configuration to point to the correct JDBC driver file.\n- [PIO-54](https://issues.apache.org/jira/browse/PIO-54): New generated access keys will no longer start with a `-` character.\n\n#### Other Changes\n\n- [PIO-28](https://issues.apache.org/jira/browse/PIO-28): Code refactoring of the command line interface. It is now possible to\n  develop new interfaces that perform the same functionalities provided by the\n  CLI.\n- [PIO-53](https://issues.apache.org/jira/browse/PIO-53): Integration tests can now be tied to every single Git commit, without\n  the need to update the official test Docker image.\n- The meta data and model data access object methods are now public and marked\n  as Core Developer API.\n\n#### Credits\n\nThe following contributors have spent a great deal of effort to bring to you\nthis release:\n\nAhmet DAL, Alexander Merritt, Amy Lin, Bansari Shah, Chan Lee, Chris Woodford,\nDaniel Gabrieli, Dennis Jung, Donald Szeto, Emily Rose, Hari Charan Ayada,\ninfoquestsolutions, Jonny Daenen, Kenneth Chan, Laertis Pappas, Marcin\nZiemiński, Naoki Takezoe, Rajdeep Dua, Shinsuke Sugaya, Pat Ferrel, scorpiovn,\nSuneel Marthi, Steven Yan, Takahiro Hagino, Takako Shimamoto\n\n### 0.10.0\n\nOct 7, 2016\n\n - Make SSL optional\n - Merge ActionML fork\n - First Apache PredictionIO release\n\n### 0.9.7-aml (ActionML fork)\n\nAug 5, 2016\n\n - changed version id so artifacts don't conflict with naming in the Salesforce sponsored project.\n - bug fix in memory use during moving window event trim and compaction  EventStore data.\n - update [install.sh](https://github.com/actionml/PredictionIO/blob/master/bin/install.sh) script for single line installs with options that support various combinations required by some templates.\n\n### 0.9.6\n\nApril 11, 2015\n\nFor a detailed list of commits check [this page](https://github.com/apache/predictionio/commits/v0.9.6)\n\n- Upgrade components for install/runtime to Hbase 1, Spark 1.5.2 PIO still runs on older HBase and Spark back to 1.3.1, upgrading install of Elaticsearch to 1.5.2 since pio run well on it but also runs on older versions.\n- Support for maintaining a moving window of events by discarding old events from the EventStore\n- Support for doing a deploy without creating a Spark Context\n\n### 0.9.6 (ActionML fork)\n\nMarch 26, 2016\n\n- Upgrade components for install/runtime to Hbase 1.X, Spark 1.5.2 PIO still runs on older HBase and Spark back to 1.3.1, upgrading install of Elasticsearch to 1.5.2 since pio run well on it but also runs on older versions.\n- Support for maintaining a moving window of events by discarding old events from the EventStore\n- Support for doing a deploy without creating a Spark Context\n\n### 0.9.5\n\nOctober 14th, 2015\n\n[Release Notes](https://github.com/apache/predictionio/blob/master/RELEASE.md) have been moved to Github and you are reading them. For a detailed list of commits check [this page](https://github.com/apache/predictionio/commits/v0.9.5)\n\n- Support batches of events sent to the EventServer as json arrays\n- Support creating an Elasticsearch StorageClient created for an Elasticsearch cluster from variables in pio-env.sh\n- Fixed some errors installing PredictionIO through install.sh when SBT was not correctly downloaded\n\n### 0.9.4\n\nJuly 16th, 2015\n\nRelease Notes\n\n- Support event permissions with different access keys at the Event Server interface\n- Support detection of 3rd party Apache Spark distributions\n- Support running `pio eval` without `engine.json`\n- Fix an issue where `--verbose` is not handled properly by `pio train`\n\n### 0.9.3\n\nMay 20th, 2015\n\n\nRelease Notes\n\n- Add support of developing prediction engines in Java\n- Add support of PostgreSQL and MySQL\n- Spark 1.3.1 compatibility fix\n- Creation of specific app access keys\n- Prevent a case where `pio build` accidentally removes PredictionIO core library\n\n### 0.9.2\n\nApril 14th, 2015\n\nRelease Notes\n\n- Channels in the Event Server\n- Spark 1.3+ support (upgrade to Spark 1.3+ required)\n- [Webhook Connector](http://predictionio.apache.org/community/contribute-webhook/) support\n- Engine and Event Servers now by default bind to 0.0.0.0\n- Many documentation improvements\n\n### 0.9.1\n\nMarch 17th, 2015\n\nRelease Notes\n\n- Improved `pio-start-all`\n- Fixed a bug where `pio build` failed to set PredictionIO dependency version for engine templates\n\n### 0.9.0\n\nMarch 4th, 2015\n\nRelease Notes\n\n- [E-Commerce Recommendation Template](http://predictionio.apache.org/gallery/template-gallery#recommender-systems) which includes 1) out-of-stock items support 2) new user recommendation 3) unseen items only\n- [Complementary Purchase Template](http://predictionio.apache.org/gallery/template-gallery#unsupervised-learning) for shopping cart recommendation\n- [Lead Scoring Template](http://predictionio.apache.org/gallery/template-gallery#classification) predicts the probability of an user will convert in the current session\n- `pio-start-all`, `pio-stop-all` commands to start and stop all PredictionIO related services\n\n### 0.8.6\n\nFeb 10th, 2015\n\nRelease Notes\n\n- New engine template - [Product Ranking](/templates/productranking/quickstart/) for personalized product listing\n- [CloudFormation deployment](/system/deploy-cloudformation/) available\n"
  },
  {
    "path": "assembly/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport NativePackagerHelper._\nimport RpmConstants._\nimport com.typesafe.sbt.packager.linux.LinuxSymlink\n\nenablePlugins(RpmPlugin, DebianPlugin)\n\nname := \"predictionio\"\n\nmaintainer in Linux := \"Apache Software Foundation\"\npackageSummary in Linux := \"Apache PredictionIO\"\npackageDescription := \"Apache PredictionIO is an open source Machine Learning Server \" +\n  \"built on top of state-of-the-art open source stack for developers \" +\n  \"and data scientists create predictive engines for any machine learning task.\"\n\nversion in Rpm := version.value.replace(\"-\", \"_\")\nrpmRelease := \"1\"\nrpmVendor := \"apache\"\nrpmGroup := Some(\"Applications/System\")\nrpmUrl := Some(\"http://predictionio.apache.org/\")\nrpmLicense := Some(\"Apache License Version 2.0\")\n\nmaintainerScripts in Rpm := maintainerScriptsAppendFromFile((maintainerScripts in Rpm).value)(\n   Pre -> (sourceDirectory.value / \"rpm\" / \"scriptlets\" / \"preinst\"),\n   Postun -> (sourceDirectory.value / \"rpm\" / \"scriptlets\" / \"postun\")\n)\n\nmappings in Universal ++= {\n  val releaseFile = baseDirectory.value / \"..\" / \"RELEASE.md\"\n  val buildPropFile = baseDirectory.value / \"..\" / \"project\" / \"build.properties\"\n  val sbtFile = baseDirectory.value / \"..\" / \"sbt\" / \"sbt\"\n  Seq(releaseFile -> \"RELEASE\",\n      buildPropFile -> \"project/build.properties\",\n      sbtFile -> \"sbt/sbt\")\n}\n\nmappings in Universal ++= {\n  val files = IO.listFiles(baseDirectory.value / \"..\" / \"conf\")\n  files filterNot { f => f.getName.endsWith(\".travis\") } map {\n    case f if f.getName equals \"pio-env.sh.template\" => f -> \"conf/pio-env.sh\"\n    case f => f -> s\"conf/${f.getName}\"\n  } toSeq\n}\n\nmappings in Universal ++= {\n  val files = IO.listFiles(baseDirectory.value / \"..\" / \"bin\")\n  files map { f => f -> s\"bin/${f.getName}\" } toSeq\n}\n\nlinuxPackageMappings := {\n    val mappings = linuxPackageMappings.value\n    mappings map {  linuxPackage =>\n        val linuxFileMappings = linuxPackage.mappings map {\n            case (f, n) if f.getName equals \"conf\" => f -> s\"/etc/${name.value}\"\n            case (f, n) if f.getName equals \"pio-env.sh.template\" => f -> s\"/etc/${name.value}/pio-env.sh\"\n            case (f, n) if f.getParent endsWith \"conf\" => f -> s\"/etc/${name.value}/${f.getName}\"\n            case (f, n) if f.getName equals \"log\" => f -> s\"/var/log/${name.value}\"\n            case (f, n) if f.getName equals \"pio.log\" => f -> s\"/var/log/${name.value}/pio.log\"\n            case (f, n) => f -> n\n        }\n\n        val fileData = linuxPackage.fileData.copy(\n            user = s\"${name.value}\",\n            group = s\"${name.value}\"\n        )\n\n        linuxPackage.copy(\n            mappings = linuxFileMappings,\n            fileData = fileData\n        )\n    }\n}\n\nlinuxPackageSymlinks := {\n  Seq(LinuxSymlink(\"/usr/bin/pio\", s\"/usr/share/${name.value}/bin/pio\"),\n      LinuxSymlink(\"/usr/bin/pio-daemon\", s\"/usr/share/${name.value}/bin/pio-daemon\"))\n}\n"
  },
  {
    "path": "assembly/src/debian/DEBIAN/postrm",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nREMOVE_USER_AND_GROUP=false\n\ncase \"$1\" in\n    remove)\n    ;;\n    purge)\n        REMOVE_USER_AND_GROUP=true\n    ;;\n    failed-upgrade|abort-install|abort-upgrade|disappear|upgrade|disappear)\n    ;;\n    *)\n        echo \"post remove script called with unknown argument \\`$1'\" >&2\n        exit 1\n    ;;\nesac\n\nif [ \"$REMOVE_USER_AND_GROUP\" = \"true\" ]; then\n    if id \"predictionio\" > /dev/null 2>&1 ; then\n        userdel \"predictionio\"\n    fi\n\n    if getent group \"predictionio\" > /dev/null 2>&1 ; then\n        groupdel \"predictionio\"\n    fi\nfi\n\n"
  },
  {
    "path": "assembly/src/debian/DEBIAN/preinst",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nif ! getent group \"predictionio\" > /dev/null 2>&1 ; then\n  echo -n \"Creating predictionio group...\"\n  addgroup --quiet --system \"predictionio\"\n  echo \" OK\"\nfi\n\nif ! id predictionio > /dev/null 2>&1 ; then\n  echo -n \"Creating predictionio user...\"\n  adduser --quiet \\\n    --system \\\n    --no-create-home \\\n    --ingroup \"predictionio\" \\\n    --disabled-password \\\n    --shell /bin/false \\\n    --home \"/usr/share/predictionio\"  \\\n    \"predictionio\"\n  echo \" OK\"\nfi\n\n"
  },
  {
    "path": "assembly/src/rpm/scriptlets/postun",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nREMOVE_USER_AND_GROUP=false\n\ncase \"$1\" in\n    0)\n        REMOVE_USER_AND_GROUP=true\n    ;;\n    1)\n    ;;\n    *)\n        echo \"post remove script called with unknown argument \\`$1'\" >&2\n        exit 1\n    ;;\nesac\n\nif [ \"$REMOVE_USER_AND_GROUP\" = \"true\" ]; then\n    if id \"predictionio\" > /dev/null 2>&1 ; then\n        userdel \"predictionio\"\n    fi\n\n    if getent group \"predictionio\" > /dev/null 2>&1 ; then\n        groupdel \"predictionio\"\n    fi\nfi\n\n"
  },
  {
    "path": "assembly/src/rpm/scriptlets/preinst",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nif ! getent group \"predictionio\" > /dev/null 2>&1 ; then\n  echo -n \"Creating predictionio group...\"\n  groupadd -r \"predictionio\"\n  echo \" OK\"\nfi\n\nif ! id predictionio > /dev/null 2>&1 ; then\n  echo -n \"Creating predictionio user...\"\n  useradd --system \\\n    -M \\\n    --gid \"predictionio\" \\\n    --shell /sbin/nologin \\\n    --comment \"fess user\" \\\n    -d \"/usr/share/predictionio\"  \\\n    \"predictionio\"\n  echo \" OK\"\nfi\n\n"
  },
  {
    "path": "bin/cjson",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\ncurl -H \"Content-Type: application/json\" -d \"$1\" $2\n"
  },
  {
    "path": "bin/compute-classpath.sh",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# Figure out where PredictionIO is installed\nFWDIR=\"$(cd `dirname $0`/..; pwd)\"\n\n. ${FWDIR}/bin/load-pio-env.sh\n\nif [ -n \"$JAVA_HOME\" ]; then\n  JAR_CMD=\"$JAVA_HOME/bin/jar\"\nelse\n  JAR_CMD=\"jar\"\nfi\n\n# Use pio-assembly JAR from either RELEASE or assembly directory\nif [ -f \"${FWDIR}/RELEASE\" ]; then\n  assembly_folder=\"${FWDIR}\"/lib\nelse\n  assembly_folder=\"${FWDIR}\"/assembly/src/universal/lib\nfi\n\nMAIN_JAR=$(ls \"${assembly_folder}\"/pio-assembly*.jar 2>/dev/null)\nDATA_JARS=$(ls \"${assembly_folder}\"/spark/pio-data-*assembly*.jar 2>/dev/null)\n# Comma-separated list of assembly jars for submitting to spark-shell\nASSEMBLY_JARS=$(printf \"${MAIN_JAR}\\n${DATA_JARS}\" | paste -sd \",\" -)\n\n# Build up classpath\nCLASSPATH=\"${PIO_CONF_DIR}\"\n\n# stable classpath for plugin JARs\nif [ -d \"${FWDIR}/plugins\" ]; then\n  lib_plugin_jars=`ls \"${FWDIR}\"/plugins/*`\n  lib_plugin_classpath=''\n  for J in $lib_plugin_jars; do\n    lib_plugin_classpath=\"${lib_plugin_classpath}:${J}\"\n  done\n  CLASSPATH=\"$CLASSPATH${lib_plugin_classpath}\"\nfi\n\n# stable classpath for Spark JARs\nlib_spark_jars=`ls \"${assembly_folder}\"/spark/*.jar`\nlib_spark_classpath=''\nfor J in $lib_spark_jars; do\n  lib_spark_classpath=\"${lib_spark_classpath}:${J}\"\ndone\nCLASSPATH=\"$CLASSPATH${lib_spark_classpath}\"\n\nCLASSPATH=\"$CLASSPATH:${MAIN_JAR}\"\n\n# Add hadoop conf dir if given -- otherwise FileSystem.*, etc fail ! Note, this\n# assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts\n# the configurtion files.\nif [ -n \"$HADOOP_CONF_DIR\" ]; then\n  CLASSPATH=\"$CLASSPATH:$HADOOP_CONF_DIR\"\nfi\nif [ -n \"$YARN_CONF_DIR\" ]; then\n  CLASSPATH=\"$CLASSPATH:$YARN_CONF_DIR\"\nfi\nif [ -n \"$HBASE_CONF_DIR\" ]; then\n  CLASSPATH=\"$CLASSPATH:$HBASE_CONF_DIR\"\nfi\nif [ -n \"$ES_CONF_DIR\" ]; then\n  CLASSPATH=\"$CLASSPATH:$ES_CONF_DIR\"\nfi\nif [ -n \"$POSTGRES_JDBC_DRIVER\" ]; then\n  CLASSPATH=\"$CLASSPATH:$POSTGRES_JDBC_DRIVER\"\n  ASSEMBLY_JARS=\"$ASSEMBLY_JARS,$POSTGRES_JDBC_DRIVER\"\nfi\nif [ -n \"$MYSQL_JDBC_DRIVER\" ]; then\n  CLASSPATH=\"$CLASSPATH:$MYSQL_JDBC_DRIVER\"\n  ASSEMBLY_JARS=\"$ASSEMBLY_JARS,$MYSQL_JDBC_DRIVER\"\nfi\n\necho \"$CLASSPATH\"\n"
  },
  {
    "path": "bin/install.sh",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nOS=`uname`\nSPARK_VERSION=2.1.1\n# Looks like support for Elasticsearch 2.0 will require 2.0 so deferring\nELASTICSEARCH_VERSION=5.6.9\nHBASE_VERSION=1.2.6\nPOSTGRES_VERSION=42.0.0\nMYSQL_VERSION=5.1.41\nPIO_DIR=$HOME/PredictionIO\nUSER_PROFILE=$HOME/.profile\nPIO_FILE=PredictionIO-*.tar.gz\nTEMP_DIR=/tmp\n\nDISTRO_DEBIAN=\"Debian/Ubuntu\"\nDISTRO_OTHER=\"Other\"\n\nPGSQL=\"PostgreSQL\"\nMYSQL=\"MySQL\"\nES_PGSQL=\"Elasticsearch + PostgreSQL\"\nES_HB=\"Elasticsearch + HBase\"\n\n# Ask a yes/no question, with a default of \"yes\".\nconfirm () {\n  echo -ne $@ \"[Y/n] \"\n  read -r response\n\n  case ${response} in\n    [yY][eE][sS]|[yY]|\"\")\n      true\n      ;;\n    [nN][oO]|[nN])\n      false\n      ;;\n    *)\n      confirm $@\n      ;;\n  esac\n}\n\necho -e \"\\033[1;32mWelcome to PredictionIO !\\033[0m\"\n\n# Detect OS\nif [[ \"$OS\" = \"Darwin\" ]]; then\n  echo \"Mac OS detected!\"\n  SED_CMD=\"sed -i ''\"\nelif [[ \"$OS\" = \"Linux\" ]]; then\n  echo \"Linux OS detected!\"\n  SED_CMD=\"sed -i\"\nelse\n  echo -e \"\\033[1;31mYour OS $OS is not yet supported for automatic install :(\\033[0m\"\n  echo -e \"\\033[1;31mPlease do a manual install!\\033[0m\"\n  exit 1\nfi\n\nif [[ $USER ]]; then\n  echo \"Using user: $USER\"\nelse\n  echo \"No user found - this is OK!\"\nfi\n\nif [[ \"$OS\" = \"Linux\" && $(cat /proc/1/cgroup) == *cpu:/docker/* ]]; then\n  # Docker\n  # REQUIRED: No user input for Docker!\n  echo -e \"\\033[1;33mDocker detected!\\033[0m\"\n  echo -e \"\\033[1;33mForcing Docker defaults!\\033[0m\"\n  pio_dir=${PIO_DIR}\n  vendors_dir=${pio_dir}/vendors\n\n  spark_dir=${vendors_dir}/spark-${SPARK_VERSION}\n  elasticsearch_dir=${vendors_dir}/elasticsearch-${ELASTICSEARCH_VERSION}\n  hbase_dir=${vendors_dir}/hbase-${HBASE_VERSION}\n  zookeeper_dir=${vendors_dir}/zookeeper\n\n  echo \"--------------------------------------------------------------------------------\"\n  echo -e \"\\033[1;32mOK, looks good!\\033[0m\"\n  echo \"You are going to install PredictionIO to: $pio_dir\"\n  echo -e \"Vendor applications will go in: $vendors_dir\\n\"\n  echo \"Spark: $spark_dir\"\n  echo \"Elasticsearch: $elasticsearch_dir\"\n  echo \"HBase: $hbase_dir\"\n  echo \"ZooKeeper: $zookeeper_dir\"\n  echo \"--------------------------------------------------------------------------------\"\n\n  # Java Install\n  echo -e \"\\033[1;36mStarting Java install...\\033[0m\"\n\n  sudo add-apt-repository ppa:openjdk-r/ppa\n  sudo apt-get update\n  sudo apt-get install openjdk-8-jdk libgfortran3 -y\n\n  echo -e \"\\033[1;32mJava install done!\\033[0m\"\n\n  JAVA_HOME=$(readlink -f /usr/bin/javac | sed \"s:/bin/javac::\")\nelif [[ \"$1\" == \"-y\" ]]; then\n  # Non-interactive\n  echo -e \"\\033[1;33mNon-interactive installation requested!\\033[0m\"\n  echo -e \"\\033[1;33mForcing defaults!\\033[0m\"\n  pio_dir=${PIO_DIR}\n  vendors_dir=${pio_dir}/vendors\n  source_setup=${ES_HB}\n\n  spark_dir=${vendors_dir}/spark-${SPARK_VERSION}\n  elasticsearch_dir=${vendors_dir}/elasticsearch-${ELASTICSEARCH_VERSION}\n  hbase_dir=${vendors_dir}/hbase-${HBASE_VERSION}\n  zookeeper_dir=${vendors_dir}/zookeeper\n\n  echo \"--------------------------------------------------------------------------------\"\n  echo -e \"\\033[1;32mOK, looks good!\\033[0m\"\n  echo \"You are going to install PredictionIO to: $pio_dir\"\n  echo -e \"Vendor applications will go in: $vendors_dir\\n\"\n  echo \"Spark: $spark_dir\"\n  echo \"Elasticsearch: $elasticsearch_dir\"\n  echo \"HBase: $hbase_dir\"\n  echo \"ZooKeeper: $zookeeper_dir\"\n  echo \"--------------------------------------------------------------------------------\"\n\n  # Java Install\n  echo -e \"\\033[1;36mStarting Java install...\\033[0m\"\n\n  # todo: make java installation platform independent\n  sudo add-apt-repository ppa:openjdk-r/ppa\n  sudo apt-get update\n  sudo apt-get install openjdk-8-jdk libgfortran3 python-pip -y\n  sudo pip install predictionio\n\n  echo -e \"\\033[1;32mJava install done!\\033[0m\"\n\n  JAVA_HOME=$(readlink -f /usr/bin/javac | sed \"s:/bin/javac::\")\nelse\n  # Interactive\n  while true; do\n    echo -e \"\\033[1mWhere would you like to install PredictionIO?\\033[0m\"\n    read -e -p \"Installation path ($PIO_DIR): \" pio_dir\n    pio_dir=${pio_dir:-$PIO_DIR}\n\n    read -e -p \"Vendor path ($pio_dir/vendors): \" vendors_dir\n    vendors_dir=${vendors_dir:-$pio_dir/vendors}\n\n    echo -e \"\\033[1mPlease choose between the following sources (1, 2, 3 or 4):\\033[0m\"\n    select source_setup in \"$PGSQL\" \"$MYSQL\" \"$ES_PGSQL\" \"$ES_HB\"; do\n      case ${source_setup} in\n        \"$PGSQL\")\n          break\n          ;;\n        \"$MYSQL\")\n          break\n          ;;\n        \"$ES_PGSQL\")\n          break\n          ;;\n        \"$ES_HB\")\n          break\n          ;;\n        *)\n          ;;\n      esac\n    done\n\n    spark_dir=${vendors_dir}/spark-${SPARK_VERSION}\n    elasticsearch_dir=${vendors_dir}/elasticsearch-${ELASTICSEARCH_VERSION}\n    hbase_dir=${vendors_dir}/hbase-${HBASE_VERSION}\n    zookeeper_dir=${vendors_dir}/zookeeper\n\n    echo \"--------------------------------------------------------------------------------\"\n    echo -e \"\\033[1;32mOK, looks good!\\033[0m\"\n    echo \"You are going to install PredictionIO to: $pio_dir\"\n    echo -e \"Vendor applications will go in: $vendors_dir\\n\"\n    echo \"Spark: $spark_dir\"\n    case $source_setup in\n      \"$PGSQL\")\n        # PostgreSQL installed by apt-get so no path is printed beforehand\n        break\n        ;;\n      \"$MYSQL\")\n        # MySQL installed by apt-get so no path is printed beforehand\n        break\n        ;;\n      \"$ES_PGSQL\")\n        # PostgreSQL installed by apt-get so no path is printed beforehand\n        echo \"Elasticsearch: $elasticsearch_dir\"\n        break\n        ;;\n      \"$ES_HB\")\n        echo \"Elasticsearch: $elasticsearch_dir\"\n        echo \"HBase: $hbase_dir\"\n        echo \"ZooKeeper: $zookeeper_dir\"\n        break\n        ;;\n    esac\n    echo \"--------------------------------------------------------------------------------\"\n    if confirm \"\\033[1mIs this correct?\\033[0m\"; then\n      break;\n    fi\n  done\n\n  echo -e \"\\033[1mSelect your linux distribution:\\033[0m\"\n  select distribution in \"$DISTRO_DEBIAN\" \"$DISTRO_OTHER\"; do\n    case $distribution in\n      \"$DISTRO_DEBIAN\")\n        break\n        ;;\n      \"$DISTRO_OTHER\")\n        break\n        ;;\n      *)\n        ;;\n    esac\n  done\n\n  # Java Install\n  if [[ ${OS} = \"Linux\" ]] && confirm \"\\033[1mWould you like to install Java?\\033[0m\"; then\n    case ${distribution} in\n      \"$DISTRO_DEBIAN\")\n        echo -e \"\\033[1;36mStarting Java install...\\033[0m\"\n\n        echo -e \"\\033[33mThis script requires superuser access!\\033[0m\"\n        echo -e \"\\033[33mYou will be prompted for your password by sudo:\\033[0m\"\n\n        sudo add-apt-repository ppa:openjdk-r/ppa\n        sudo apt-get update\n        sudo apt-get install openjdk-8-jdk libgfortran3 python-pip -y\n        sudo pip install predictionio\n\n        echo -e \"\\033[1;32mJava install done!\\033[0m\"\n        break\n        ;;\n      \"$DISTRO_OTHER\")\n        echo -e \"\\033[1;31mYour distribution not yet supported for automatic install :(\\033[0m\"\n        echo -e \"\\033[1;31mPlease install Java manually!\\033[0m\"\n        exit 2\n        ;;\n      *)\n        ;;\n    esac\n  fi\n\n  # Try to find JAVA_HOME\n  echo \"Locating JAVA_HOME...\"\n  if [[ \"$OS\" = \"Darwin\" ]]; then\n    JAVA_VERSION=`echo \"$(java -version 2>&1)\" | grep \"java version\" | awk '{ print substr($3, 2, length($3)-2); }'`\n    JAVA_HOME=`/usr/libexec/java_home`\n  elif [[ \"$OS\" = \"Linux\" ]]; then\n    JAVA_HOME=$(readlink -f /usr/bin/javac | sed \"s:/bin/javac::\")\n  fi\n  echo \"Found: $JAVA_HOME\"\n\n  # Check JAVA_HOME\n  while [ ! -f \"$JAVA_HOME/bin/javac\" ]; do\n    echo -e \"\\033[1;31mJAVA_HOME is incorrect!\\033[0m\"\n    echo -e \"\\033[1;33mJAVA_HOME should be a directory containing \\\"bin/javac\\\"!\\033[0m\"\n    read -e -p \"Please enter JAVA_HOME manually: \" JAVA_HOME\n  done;\nfi\n\nif [ -n \"$JAVA_VERSION\" ]; then\n  echo \"Your Java version is: $JAVA_VERSION\"\nfi\necho \"JAVA_HOME is now set to: $JAVA_HOME\"\n\n# PredictionIO\necho -e \"\\033[1;36mStarting PredictionIO setup in:\\033[0m $pio_dir\"\n\ncd ${TEMP_DIR}\n\nfiles=$(ls PredictionIO*.tar.gz 2> /dev/null | wc -l)\n\nif [[ $files == 0 ]]; then\n  echo \"Downloading PredictionIO...\"\n  curl -L https://dist.apache.org/repos/dist/release/predictionio/0.12.1/apache-predictionio-0.12.1-bin.tar.gz > predictionio-release.tar.gz\n  tar zxf predictionio-0.12.1.tar.gz\n\n  mv predictionio-0.12.1 PredictionIO\n\n  sh PredictionIO/make-distribution.sh\n  cp PredictionIO/${PIO_FILE} ${TEMP_DIR}\n  rm -r PredictionIO\nfi\n\ntar zxf ${PIO_FILE}\nrm -rf ${pio_dir}\nmv PredictionIO*/ ${pio_dir}\n\nif [[ $USER ]]; then\n  chown -R $USER ${pio_dir}\nfi\n\necho \"Updating ~/.profile to include: $pio_dir\"\nPATH=$PATH:${pio_dir}/bin\necho \"export PATH=\\$PATH:$pio_dir/bin\" >> ${USER_PROFILE}\n\necho -e \"\\033[1;32mPredictionIO setup done!\\033[0m\"\n\nmkdir -p ${vendors_dir}\n\n# Spark\necho -e \"\\033[1;36mStarting Spark setup in:\\033[0m $spark_dir\"\nif [[ ! -e spark-${SPARK_VERSION}-bin-hadoop2.6.tgz ]]; then\n  echo \"Downloading Spark...\"\n  curl -O http://www-us.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.6.tgz\nfi\ntar xf spark-${SPARK_VERSION}-bin-hadoop2.6.tgz\nrm -rf ${spark_dir}\nmv spark-${SPARK_VERSION}-bin-hadoop2.6 ${spark_dir}\n\necho \"Updating: $pio_dir/conf/pio-env.sh\"\n${SED_CMD} \"s|SPARK_HOME=.*|SPARK_HOME=$spark_dir|g\" ${pio_dir}/conf/pio-env.sh\n\necho -e \"\\033[1;32mSpark setup done!\\033[0m\"\n\ninstallPGSQL () {\n  if [[ ${distribution} = \"$DISTRO_DEBIAN\" ]]; then\n      echo -e \"\\033[1;36mInstalling PostgreSQL...\\033[0m\"\n      sudo apt-get install postgresql-9.4 -y\n      echo -e \"\\033[1;36mPlease use the default password 'pio' when prompted to enter one\\033[0m\"\n      sudo -u postgres createdb pio\n      sudo -u postgres createuser -P pio\n      echo -e \"\\033[1;36mPlease update $pio_dir/conf/pio-env.sh if you did not enter the default password\\033[0m\"\n    else\n      echo -e \"\\033[1;31mYour distribution not yet supported for automatic install :(\\033[0m\"\n      echo -e \"\\033[1;31mPlease install PostgreSQL manually!\\033[0m\"\n    fi\n    curl -O https://jdbc.postgresql.org/download/postgresql-${POSTGRES_VERSION}.jar\n    mv postgresql-${POSTGRES_VERSION}.jar ${pio_dir}/lib/\n\n    echo -e \"\\033[1;32mPGSQL setup done!\\033[0m\"\n}\n\ninstallES() {\n    echo -e \"\\033[1;36mStarting Elasticsearch setup in:\\033[0m $elasticsearch_dir\"\n    if [[ -e elasticsearch-${ELASTICSEARCH_VERSION}.tar.gz ]]; then\n      if confirm \"Delete existing elasticsearch-$ELASTICSEARCH_VERSION.tar.gz?\"; then\n        rm elasticsearch-${ELASTICSEARCH_VERSION}.tar.gz\n      fi\n    fi\n    if [[ ! -e elasticsearch-${ELASTICSEARCH_VERSION}.tar.gz ]]; then\n      echo \"Downloading Elasticsearch...\"\n      curl -O https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-${ELASTICSEARCH_VERSION}.tar.gz\n    fi\n    tar zxf elasticsearch-${ELASTICSEARCH_VERSION}.tar.gz\n    rm -rf ${elasticsearch_dir}\n    mv elasticsearch-${ELASTICSEARCH_VERSION} ${elasticsearch_dir}\n\n    echo \"Updating: $elasticsearch_dir/config/elasticsearch.yml\"\n    echo 'network.host: 127.0.0.1' >> ${elasticsearch_dir}/config/elasticsearch.yml\n}\n\ncase $source_setup in\n  \"$PGSQL\")\n    installPGSQL\n    ;;\n  \"$ES_PGSQL\")\n    installES\n    installPGSQL\n    echo \"Updating: $pio_dir/conf/pio-env.sh\"\n    ${SED_CMD} \"s|PIO_STORAGE_REPOSITORIES_METADATA_SOURCE=PGSQL|PIO_STORAGE_REPOSITORIES_METADATA_SOURCE=ELASTICSEARCH|\" ${pio_dir}/conf/pio-env.sh\n    ${SED_CMD} \"s|# PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE|PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE|\" ${pio_dir}/conf/pio-env.sh\n    ${SED_CMD} \"s|# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=.*|PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$elasticsearch_dir|\" ${pio_dir}/conf/pio-env.sh\n    ;;\n  \"$MYSQL\")\n    if [[ ${distribution} = \"$DISTRO_DEBIAN\" ]]; then\n      echo -e \"\\033[1;36mInstalling MySQL...\\033[0m\"\n      echo -e \"\\033[1;36mPlease update $pio_dir/conf/pio-env.sh with your database configuration\\033[0m\"\n      sudo apt-get install mysql-server -y\n      sudo mysql -e \"create database pio; grant all on pio.* to pio@localhost identified by 'pio'\"\n      echo -e \"\\033[1;36mUpdating: $pio_dir/conf/pio-env.sh\\033[0m\"\n      ${SED_CMD} \"s|PIO_STORAGE_REPOSITORIES_METADATA_SOURCE=PGSQL|PIO_STORAGE_REPOSITORIES_METADATA_SOURCE=MYSQL|\" ${pio_dir}/conf/pio-env.sh\n      ${SED_CMD} \"s|PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=PGSQL|PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=MYSQL|\" ${pio_dir}/conf/pio-env.sh\n      ${SED_CMD} \"s|PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=PGSQL|PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=MYSQL|\" ${pio_dir}/conf/pio-env.sh\n      ${SED_CMD} \"s|PIO_STORAGE_SOURCES_PGSQL|# PIO_STORAGE_SOURCES_PGSQL|\" ${pio_dir}/conf/pio-env.sh\n      ${SED_CMD} \"s|# PIO_STORAGE_SOURCES_MYSQL|PIO_STORAGE_SOURCES_MYSQL|\" ${pio_dir}/conf/pio-env.sh\n    else\n      echo -e \"\\033[1;31mYour distribution not yet supported for automatic install :(\\033[0m\"\n      echo -e \"\\033[1;31mPlease install MySQL manually!\\033[0m\"\n      exit 4\n    fi\n    curl -O http://central.maven.org/maven2/mysql/mysql-connector-java/5.1.37/mysql-connector-java-${MYSQL_VERSION}.jar\n    mv mysql-connector-java-${MYSQL_VERSION}.jar ${pio_dir}/lib/\n    ;;\n  \"$ES_HB\")\n    # Elasticsearch\n    installES\n    echo \"Updating: $pio_dir/conf/pio-env.sh\"\n    ${SED_CMD} \"s|PIO_STORAGE_REPOSITORIES_METADATA_SOURCE=PGSQL|PIO_STORAGE_REPOSITORIES_METADATA_SOURCE=ELASTICSEARCH|\" ${pio_dir}/conf/pio-env.sh\n    ${SED_CMD} \"s|PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=PGSQL|PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=LOCALFS|\" ${pio_dir}/conf/pio-env.sh\n    ${SED_CMD} \"s|PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=PGSQL|PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=HBASE|\" ${pio_dir}/conf/pio-env.sh\n    ${SED_CMD} \"s|PIO_STORAGE_SOURCES_PGSQL|# PIO_STORAGE_SOURCES_PGSQL|\" ${pio_dir}/conf/pio-env.sh\n    ${SED_CMD} \"s|# PIO_STORAGE_SOURCES_LOCALFS|PIO_STORAGE_SOURCES_LOCALFS|\" ${pio_dir}/conf/pio-env.sh\n    ${SED_CMD} \"s|# PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE|PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE|\" ${pio_dir}/conf/pio-env.sh\n    ${SED_CMD} \"s|# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=.*|PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$elasticsearch_dir|\" ${pio_dir}/conf/pio-env.sh\n    echo -e \"\\033[1;32mElasticsearch setup done!\\033[0m\"\n\n    # HBase\n    echo -e \"\\033[1;36mStarting HBase setup in:\\033[0m $hbase_dir\"\n    if [[ ! -e hbase-${HBASE_VERSION}-bin.tar.gz ]]; then\n      echo \"Downloading HBase...\"\n      curl -O http://archive.apache.org/dist/hbase/${HBASE_VERSION}/hbase-${HBASE_VERSION}-bin.tar.gz\n    fi\n    tar zxf hbase-${HBASE_VERSION}-bin.tar.gz\n    rm -rf ${hbase_dir}\n    mv hbase-${HBASE_VERSION} ${hbase_dir}\n\n    echo \"Creating default site in: $hbase_dir/conf/hbase-site.xml\"\n    cat <<EOT > ${hbase_dir}/conf/hbase-site.xml\n<configuration>\n  <property>\n    <name>hbase.rootdir</name>\n    <value>file://${hbase_dir}/data</value>\n  </property>\n  <property>\n    <name>hbase.zookeeper.property.dataDir</name>\n    <value>${zookeeper_dir}</value>\n  </property>\n</configuration>\nEOT\n\n    echo \"Updating: $hbase_dir/conf/hbase-env.sh to include $JAVA_HOME\"\n    ${SED_CMD} \"s|# export JAVA_HOME=/usr/java/jdk1.6.0/|export JAVA_HOME=$JAVA_HOME|\" ${hbase_dir}/conf/hbase-env.sh\n\n    echo \"Updating: $pio_dir/conf/pio-env.sh\"\n    ${SED_CMD} \"s|# PIO_STORAGE_SOURCES_HBASE|PIO_STORAGE_SOURCES_HBASE|\" ${pio_dir}/conf/pio-env.sh\n    ${SED_CMD} \"s|PIO_STORAGE_SOURCES_HBASE_HOME=.*|PIO_STORAGE_SOURCES_HBASE_HOME=$hbase_dir|\" ${pio_dir}/conf/pio-env.sh\n    ${SED_CMD} \"s|# HBASE_CONF_DIR=.*|HBASE_CONF_DIR=$hbase_dir/conf|\" ${pio_dir}/conf/pio-env.sh\n\n    echo -e \"\\033[1;32mHBase setup done!\\033[0m\"\n\n    ;;\nesac\n\n\n\n\n\n\n\n\n\necho \"Updating permissions on: $vendors_dir\"\n\nif [[ $USER ]]; then\n  chown -R $USER ${vendors_dir}\nfi\n\necho -e \"\\033[1;32mInstallation done!\\033[0m\"\n\n\n\n\n\necho \"--------------------------------------------------------------------------------\"\necho -e \"\\033[1;32mInstallation of PredictionIO complete!\\033[0m\"\necho -e \"\\033[1;32mPlease follow documentation at http://predictionio.apache.org/start/download/ to download the engine template based on your needs\\033[0m\"\necho -e\necho -e \"\\033[1;33mCommand Line Usage Notes:\\033[0m\"\nif [[ ${source_setup} = $ES_HB ]]; then\n  echo -e \"To start PredictionIO and dependencies, run: '\\033[1mpio-start-all\\033[0m'\"\nelse\n  echo -e \"To start PredictionIO Event Server in the background, run: '\\033[1mpio eventserver &\\033[0m'\"\nfi\necho -e \"To check the PredictionIO status, run: '\\033[1mpio status\\033[0m'\"\necho -e \"To train/deploy engine, run: '\\033[1mpio [train|deploy|...]\\033[0m' commands\"\nif [[ ${source_setup} = $ES_HB ]]; then\n  echo -e \"To stop PredictionIO and dependencies, run: '\\033[1mpio-stop-all\\033[0m'\"\nfi\necho -e \"\"\necho -e \"Please report any problems to the user mailing list.\"\necho -e \"User mailing list instructions: \\033[1;34mhttp://predictionio.apache.org/support/\\033[0m\"\necho \"--------------------------------------------------------------------------------\"\n"
  },
  {
    "path": "bin/load-pio-env.sh",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# This script loads pio-env.sh if it exists, and ensures it is only loaded once.\n# pio-env.sh is loaded from PIO_CONF_DIR if set, or within the current\n# directory's conf/ subdirectory.\n\nif [ -z \"$PIO_ENV_LOADED\" ]; then\n  export PIO_ENV_LOADED=1\n\n  # Returns the parent of the directory this script lives in.\n  parent_dir=\"$(cd `dirname $0`/..; pwd)\"\n\n  use_conf_dir=${PIO_CONF_DIR:-\"${parent_dir}/conf\"}\n\n  if [ -f \"${use_conf_dir}/pio-env.sh\" ]; then\n    # Promote all variable declarations to environment (exported) variables\n    set -a\n    . \"${use_conf_dir}/pio-env.sh\"\n    set +a\n  else\n    echo -e \"\\033[0;35mWarning: pio-env.sh was not found in ${use_conf_dir}. Using system environment variables instead.\\033[0m\\n\"\n  fi\nfi\n"
  },
  {
    "path": "bin/pio",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nsearch() {\n  local i=0;\n  local needle=$1;\n  shift\n  for str in $@; do\n    if [ \"${str}\" = \"$needle\" ]; then\n      echo ${i}\n      return\n    else\n      ((i++))\n    fi\n  done\n  echo ${i}\n}\n\nif [ -z $PIO_HOME ] ; then\n  PIO_FILE=$(readlink -f $0 2>/dev/null)\n  if [ $? = 0 ] ; then\n    export PIO_HOME=\"$(cd $(dirname $PIO_FILE)/..; pwd)\"\n  else\n    CURRENT_DIR=`pwd`\n    TARGET_FILE=\"$0\"\n    cd \"$(dirname \"$TARGET_FILE\")\"\n    TARGET_FILE=$(basename \"$TARGET_FILE\")\n\n    while [ -L \"$TARGET_FILE\" ]\n    do\n      TARGET_FILE=$(readlink \"$TARGET_FILE\")\n      cd \"$(dirname \"$TARGET_FILE\")\"\n      TARGET_FILE=$(basename \"$TARGET_FILE\")\n    done\n\n    export PIO_HOME=\"$(cd $(dirname \"$TARGET_FILE\")/..; pwd -P)\"\n    cd \"$CURRENT_DIR\"\n  fi\nfi\n\nif [ -z $PIO_CONF_DIR ] ; then\n  export PIO_CONF_DIR=\"${PIO_HOME}/conf\"\n  if [ ! -d $PIO_CONF_DIR ] ; then\n    export PIO_CONF_DIR=\"/etc/predictionio\"\n    if [ ! -d $PIO_CONF_DIR ] ; then\n      echo \"PIO_CONF_DIR is not found.\"\n      exit 1\n    fi\n  fi\nfi\n\nFIRST_SEP=$(search \"--\" $@)\n\nFIRST_HALF=\"${@:1:$FIRST_SEP}\"\n\nSECOND_HALF=\"${@:$FIRST_SEP+1}\"\n\nexec ${PIO_HOME}/bin/pio-class org.apache.predictionio.tools.console.Console ${FIRST_HALF} --pio-home ${PIO_HOME} ${SECOND_HALF}\n"
  },
  {
    "path": "bin/pio-class",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\ncygwin=false\ncase \"`uname`\" in\n    CYGWIN*) cygwin=true;;\nesac\n\n# Figure out where PredictionIO is installed\nFWDIR=\"$(cd `dirname $0`/..; pwd)\"\n\n# Export this as PIO_HOME\nexport PIO_HOME=\"${FWDIR}\"\n\n. ${FWDIR}/bin/load-pio-env.sh\n\n. ${FWDIR}/bin/semver.sh\n\nif [ -z \"$1\" ]; then\n  echo \"Usage: pio-class <class> [<args>]\" 1>&2\n  exit 1\nfi\n\n# Warn if log4j.properties is not present\nif [ ! -f \"$PIO_CONF_DIR/log4j.properties\" ]; then\n  echo -e \"\\033[0;35mWarning: log4j.properties is missing from $PIO_CONF_DIR\\033[0m\"\nfi\n\n# Make sure the Apache Spark version meets the prerequisite if it is a binary\n# distribution\nMIN_SPARK_VERSION=\"2.0.2\"\nif [ -z \"$SPARK_HOME\" ]; then\n  echo -e \"\\033[0;31mSPARK_HOME must be set in conf/pio-env.sh, or in the environment!\\033[0m\"\n  exit 1\nelif [ -r \"$SPARK_HOME/RELEASE\" ]; then\n  SPARK_VERSION=`head -n 1 $SPARK_HOME/RELEASE | awk '{print $2}'`\n  if [ -z \"$SPARK_VERSION\" ]; then\n    echo -e \"\\033[0;35m$SPARK_HOME contains an empty RELEASE file. This is a known problem with certain vendors (e.g. Cloudera). Please make sure you are using at least $MIN_SPARK_VERSION.\\033[0m\"\n  elif semverLT ${SPARK_VERSION} ${MIN_SPARK_VERSION}; then\n    echo -e \"\\033[0;31mYou have Apache Spark $SPARK_VERSION at $SPARK_HOME which does not meet the minimum version requirement of $MIN_SPARK_VERSION.\\033[0m\"\n    echo -e \"\\033[0;31mAborting.\\033[0m\"\n    exit 1\n  fi\nelse\n  echo -e \"\\033[0;35m$SPARK_HOME is probably an Apache Spark development tree. Please make sure you are using at least $MIN_SPARK_VERSION.\\033[0m\"\nfi\n\n# Find the java binary\nif [ -n \"${JAVA_HOME}\" ]; then\n  RUNNER=\"${JAVA_HOME}/bin/java\"\nelse\n  if [ `command -v java` ]; then\n    RUNNER=\"java\"\n  else\n    echo -e \"\\033[0;31mJAVA_HOME is not set\\033[0m\" >&2\n    exit 1\n  fi\nfi\n\n# Compute classpath using external script\nclasspath_output=$(${FWDIR}/bin/compute-classpath.sh)\nif [[ \"$?\" != \"0\" ]]; then\n  echo \"$classpath_output\"\n  exit 1\nelse\n  CLASSPATH=${classpath_output}\nfi\n\nif [ -z $PIO_LOG_DIR ] ; then\n  PIO_LOG_DIR=$PIO_HOME/log\n  touch $PIO_LOG_DIR/pio.log > /dev/null 2>&1\n  if [ $? != 0 ] ; then\n    PIO_LOG_DIR=/var/log/predictionio\n    touch $PIO_LOG_DIR/pio.log > /dev/null 2>&1\n    if [ $? != 0 ] ; then\n      PIO_LOG_DIR=$HOME\n    fi\n  fi\nfi\n\nexport CLASSPATH\nexport JAVA_OPTS=\"$JAVA_OPTS -Dpio.log.dir=$PIO_LOG_DIR\"\n\nexec \"$RUNNER\" -cp \"$CLASSPATH\" $JAVA_OPTS \"$@\"\n"
  },
  {
    "path": "bin/pio-daemon",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nsearch() {\n  local i=0;\n  local needle=$1;\n  shift\n  for str in $@; do\n    if [ \"${str}\" = \"$needle\" ]; then\n      echo ${i}\n      return\n    else\n      ((i++))\n    fi\n  done\n  echo ${i}\n}\n\nif [ -z $PIO_HOME ] ; then\n  PIO_FILE=$(readlink -f $0 2>/dev/null)\n  if [ $? = 0 ] ; then\n    export PIO_HOME=\"$(cd $(dirname $PIO_FILE)/..; pwd)\"\n  else\n    CURRENT_DIR=`pwd`\n    TARGET_FILE=\"$0\"\n    cd \"$(dirname \"$TARGET_FILE\")\"\n    TARGET_FILE=$(basename \"$TARGET_FILE\")\n\n    while [ -L \"$TARGET_FILE\" ]\n    do\n      TARGET_FILE=$(readlink \"$TARGET_FILE\")\n      cd \"$(dirname \"$TARGET_FILE\")\"\n      TARGET_FILE=$(basename \"$TARGET_FILE\")\n    done\n\n    export PIO_HOME=\"$(cd $(dirname \"$TARGET_FILE\")/..; pwd -P)\"\n    cd \"$CURRENT_DIR\"\n  fi\nfi\n\nif [ -z $PIO_CONF_DIR ] ; then\n  export PIO_CONF_DIR=\"${PIO_HOME}/conf\"\n  if [ ! -d $PIO_CONF_DIR ] ; then\n    export PIO_CONF_DIR=\"/etc/predictionio\"\n    if [ ! -d $PIO_CONF_DIR ] ; then\n      echo \"PIO_CONF_DIR is not found.\"\n      exit 1\n    fi\n  fi\nfi\n\nPIDFILE=$1\n\nshift\n\nFIRST_SEP=$(search \"--\" $@)\n\nFIRST_HALF=\"${@:1:$FIRST_SEP}\"\n\nSECOND_HALF=\"${@:$FIRST_SEP+1}\"\n\nexec nohup ${PIO_HOME}/bin/pio-class org.apache.predictionio.tools.console.Console ${FIRST_HALF} --pio-home ${PIO_HOME} ${SECOND_HALF} <&- > /dev/null 2>&1 &\n\necho $! > ${PIDFILE}\n"
  },
  {
    "path": "bin/pio-shell",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nif [ -z $PIO_HOME ] ; then\n  PIO_FILE=$(readlink -f $0 2>/dev/null)\n  if [ $? = 0 ] ; then\n    export PIO_HOME=\"$(cd $(dirname $PIO_FILE)/..; pwd)\"\n  else\n    CURRENT_DIR=`pwd`\n    TARGET_FILE=\"$0\"\n    cd \"$(dirname \"$TARGET_FILE\")\"\n    TARGET_FILE=$(basename \"$TARGET_FILE\")\n\n    while [ -L \"$TARGET_FILE\" ]\n    do\n      TARGET_FILE=$(readlink \"$TARGET_FILE\")\n      cd \"$(dirname \"$TARGET_FILE\")\"\n      TARGET_FILE=$(basename \"$TARGET_FILE\")\n    done\n\n    export PIO_HOME=\"$(cd $(dirname \"$TARGET_FILE\")/..; pwd -P)\"\n    cd \"$CURRENT_DIR\"\n  fi\nfi\n\nif [ -z $PIO_CONF_DIR ] ; then\n  export PIO_CONF_DIR=\"${PIO_HOME}/conf\"\n  if [ ! -d $PIO_CONF_DIR ] ; then\n    export PIO_CONF_DIR=\"/etc/predictionio\"\n    if [ ! -d $PIO_CONF_DIR ] ; then\n      echo \"PIO_CONF_DIR is not found.\"\n      exit 1\n    fi\n  fi\nfi\n\n. ${PIO_HOME}/bin/load-pio-env.sh\n\nif [[ \"$1\" == \"--with-spark\" ]]\nthen\n  echo \"Starting the PIO shell with the Apache Spark Shell.\"\n  # Get paths of assembly jars to pass to spark-shell\n  . ${PIO_HOME}/bin/compute-classpath.sh\n  shift\n  ${SPARK_HOME}/bin/spark-shell --jars ${ASSEMBLY_JARS} $@\nelif [[ \"$1\" == \"--with-pyspark\" ]]\nthen\n  echo \"Starting the PIO shell with the Apache Spark Shell.\"\n  # Get paths of assembly jars to pass to pyspark\n  . ${PIO_HOME}/bin/compute-classpath.sh\n  shift\n  export PYTHONPATH=${PIO_HOME}/python\n  ${SPARK_HOME}/bin/pyspark --jars ${ASSEMBLY_JARS} $@\nelse\n  echo -e \"\\033[0;33mStarting the PIO shell without Apache Spark.\\033[0m\"\n  echo -e \"\\033[0;33mIf you need the Apache Spark library, run 'pio-shell --with-spark [spark-submit arguments...]'.\\033[0m\"\n  cd ${PIO_HOME}\n  ./sbt/sbt console\nfi\n\n"
  },
  {
    "path": "bin/pio-start-all",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# Convenience script for starting all default dependent services in a single\n# node scenario.\n\n# Figure out where PredictionIO is installed\nexport PIO_HOME=\"$(cd `dirname $0`/..; pwd)\"\n\n. ${PIO_HOME}/bin/load-pio-env.sh\n\nSOURCE_TYPE=$PIO_STORAGE_REPOSITORIES_METADATA_SOURCE\nSOURCE_TYPE=$SOURCE_TYPE$PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE\nSOURCE_TYPE=$SOURCE_TYPE$PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE\n\n# Elasticsearch\nif [ `echo $SOURCE_TYPE | grep -i elasticsearch | wc -l` != 0 ] ; then\n  echo \"Starting Elasticsearch...\"\n  if [ -n \"$PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME\" ]; then\n    ELASTICSEARCH_HOME=$PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME\n  fi\n  if [ -n \"$ELASTICSEARCH_HOME\" ]; then\n    if [ -n \"$JAVA_HOME\" ]; then\n      JPS=`$JAVA_HOME/bin/jps`\n    else\n      JPS=`jps`\n    fi\n    if [[ ${JPS} =~ \"Elasticsearch\" ]]; then\n      echo -e \"\\033[0;31mElasticsearch is already running. Please use pio-stop-all to try stopping it first.\\033[0m\"\n      echo -e \"\\033[0;31mNote: If you started Elasticsearch manually, you will need to kill it manually.\\033[0m\"\n      echo -e \"\\033[0;31mAborting...\\033[0m\"\n      exit 1\n    else\n      $ELASTICSEARCH_HOME/bin/elasticsearch -d -p $PIO_HOME/es.pid\n    fi\n  else\n    echo -e \"\\033[0;31mPlease set PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME in conf/pio-env.sh, or in your environment.\\033[0m\"\n    echo -e \"\\033[0;31mCannot start Elasticsearch. Aborting...\\033[0m\"\n    exit 1\n  fi\nfi\n\n# HBase\nif [ `echo $SOURCE_TYPE | grep -i hbase | wc -l` != 0 ] ; then\n  echo \"Starting HBase...\"\n  if [ -n \"$PIO_STORAGE_SOURCES_HBASE_HOME\" ]; then\n    $PIO_STORAGE_SOURCES_HBASE_HOME/bin/start-hbase.sh\n  else\n    echo -e \"\\033[0;31mPlease set PIO_STORAGE_SOURCES_HBASE_HOME in conf/pio-env.sh, or in your environment.\\033[0m\"\n    # Kill everything for cleanliness\n    echo -e \"\\033[0;31mCannot start HBase. Aborting...\\033[0m\"\n    sleep 3\n    ${PIO_HOME}/bin/pio-stop-all\n    exit 1\n  fi\nfi\n\n#PGSQL\nif [ `echo $SOURCE_TYPE | grep -i pgsql | wc -l` != 0 ] ; then\n  pgsqlStatus=\"$(ps auxwww | grep postgres | wc -l)\"\n  if [[ \"$pgsqlStatus\" < 5 ]]; then\n    # Detect OS\n    OS=`uname`\n    if [[ \"$OS\" = \"Darwin\" ]]; then\n      pg_cmd=`which pg_ctl`\n      if [[ \"$pg_cmd\" != \"\" ]]; then\n        pg_ctl -D /usr/local/var/postgres -l /usr/local/var/postgres/server.log start\n      fi\n    elif [[ \"$OS\" = \"Linux\" ]]; then\n      sudo service postgresql start\n    else\n      echo -e \"\\033[1;31mYour OS $OS is not yet supported for automatic postgresql startup:(\\033[0m\"\n      echo -e \"\\033[1;31mPlease do a manual startup!\\033[0m\"\n      ${PIO_HOME}/bin/pio-stop-all\n      exit 1\n    fi\n  fi\nfi\n\n# PredictionIO Event Server\necho \"Waiting 10 seconds for Storage Repositories to fully initialize...\"\nsleep 10\necho \"Starting PredictionIO Event Server...\"\n${PIO_HOME}/bin/pio-daemon ${PIO_HOME}/eventserver.pid eventserver --ip 0.0.0.0\n"
  },
  {
    "path": "bin/pio-stop-all",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# Convenience script for stopping all default dependent services in a single\n# node scenario.\n\n# Figure out where PredictionIO is installed\nexport PIO_HOME=\"$(cd `dirname $0`/..; pwd)\"\n\n. ${PIO_HOME}/bin/load-pio-env.sh\n\nSOURCE_TYPE=$PIO_STORAGE_REPOSITORIES_METADATA_SOURCE\nSOURCE_TYPE=$SOURCE_TYPE$PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE\nSOURCE_TYPE=$SOURCE_TYPE$PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE\n\n# PredictionIO Event Server\necho \"Stopping PredictionIO Event Server...\"\nPIDFILE=${PIO_HOME}/eventserver.pid\nif [ -e ${PIDFILE} ]; then\n  cat ${PIDFILE} | xargs kill\n  rm ${PIDFILE}\nfi\n\n# HBase\nif [ `echo $SOURCE_TYPE | grep -i hbase | wc -l` != 0 ] ; then\n  echo \"Stopping HBase...\"\n  if [ -n \"$PIO_STORAGE_SOURCES_HBASE_HOME\" ]; then\n    $PIO_STORAGE_SOURCES_HBASE_HOME/bin/stop-hbase.sh\n  fi\nfi\n\n# Elasticsearch\nif [ `echo $SOURCE_TYPE | grep -i elasticsearch | wc -l` != 0 ] ; then\n  echo \"Stopping Elasticsearch...\"\n  PIDFILE=${PIO_HOME}/es.pid\n  if [ -e ${PIDFILE} ]; then\n    cat ${PIDFILE} | xargs kill\n    rm ${PIDFILE}\n  fi\nfi\n\n#PGSQL\nif [ `echo $SOURCE_TYPE | grep -i pgsql | wc -l` != 0 ] ; then\n  if [ -n \"$PIO_STORAGE_SOURCES_PGSQL_TYPE\" ]; then\n    OS=`uname`\n    if [[ \"$OS\" = \"Darwin\" ]]; then\n      pg_cmd=`which pg_ctl`\n      if [[ \"$pg_cmd\" != \"\" ]]; then\n        pg_ctl -D /usr/local/var/postgres stop -s -m fast\n      fi\n    elif [[ \"$OS\" = \"Linux\" ]]; then\n      sudo service postgresql stop\n    else\n      echo -e \"\\033[1;31mYour OS $OS is not yet supported for automatic postgresql startup:(\\033[0m\"\n      echo -e \"\\033[1;31mPlease do a manual shutdown!\\033[0m\"\n      exit 1\n    fi\n  fi\nfi\n"
  },
  {
    "path": "bin/semver.sh",
    "content": "#!/usr/bin/env sh\n#\n# Copyright (c) 2013, Ray Bejjani\n# All rights reserved.\n#\n# Redistribution and use in source and binary forms, with or without\n# modification, are permitted provided that the following conditions are met:\n#\n# 1. Redistributions of source code must retain the above copyright notice, this\n#    list of conditions and the following disclaimer.\n# 2. Redistributions in binary form must reproduce the above copyright notice,\n#    this list of conditions and the following disclaimer in the documentation\n#    and/or other materials provided with the distribution.\n#\n# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\n# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR\n# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND\n# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n#\n# The views and conclusions contained in the software and documentation are those\n# of the authors and should not be interpreted as representing official policies,\n# either expressed or implied, of the FreeBSD Project.\n\nfunction semverParseInto() {\n  local RE='[^0-9]*\\([0-9]*\\)[.]\\([0-9]*\\)[.]\\([0-9]*\\)\\([0-9A-Za-z-]*\\)'\n  #MAJOR\n  eval $2=`echo $1 | sed -e \"s#$RE#\\1#\"`\n  #MINOR\n  eval $3=`echo $1 | sed -e \"s#$RE#\\2#\"`\n  #MINOR\n  eval $4=`echo $1 | sed -e \"s#$RE#\\3#\"`\n  #SPECIAL\n  eval $5=`echo $1 | sed -e \"s#$RE#\\4#\"`\n}\n\nfunction semverEQ() {\n  local MAJOR_A=0\n  local MINOR_A=0\n  local PATCH_A=0\n  local SPECIAL_A=0\n\n  local MAJOR_B=0\n  local MINOR_B=0\n  local PATCH_B=0\n  local SPECIAL_B=0\n\n  semverParseInto $1 MAJOR_A MINOR_A PATCH_A SPECIAL_A\n  semverParseInto $2 MAJOR_B MINOR_B PATCH_B SPECIAL_B\n\n  if [ $MAJOR_A -ne $MAJOR_B ]; then\n    return 1\n  fi\n\n  if [ $MINOR_A -ne $MINOR_B ]; then\n    return 1\n  fi\n\n  if [ $PATCH_A -ne $PATCH_B ]; then\n    return 1\n  fi\n\n  if [[ \"_$SPECIAL_A\" != \"_$SPECIAL_B\" ]]; then\n    return 1\n  fi\n\n  return 0\n}\n\nfunction semverLT() {\n  local MAJOR_A=0\n  local MINOR_A=0\n  local PATCH_A=0\n  local SPECIAL_A=0\n\n  local MAJOR_B=0\n  local MINOR_B=0\n  local PATCH_B=0\n  local SPECIAL_B=0\n\n  semverParseInto $1 MAJOR_A MINOR_A PATCH_A SPECIAL_A\n  semverParseInto $2 MAJOR_B MINOR_B PATCH_B SPECIAL_B\n\n  if [ $MAJOR_A -lt $MAJOR_B ]; then\n    return 0\n  fi\n\n  if [[ $MAJOR_A -le $MAJOR_B  && $MINOR_A -lt $MINOR_B ]]; then\n    return 0\n  fi\n\n  if [[ $MAJOR_A -le $MAJOR_B  && $MINOR_A -le $MINOR_B && $PATCH_A -lt $PATCH_B ]]; then\n    return 0\n  fi\n\n  if [[ \"_$SPECIAL_A\"  == \"_\" ]] && [[ \"_$SPECIAL_B\"  == \"_\" ]] ; then\n    return 1\n  fi\n  if [[ \"_$SPECIAL_A\"  == \"_\" ]] && [[ \"_$SPECIAL_B\"  != \"_\" ]] ; then\n    return 1\n  fi\n  if [[ \"_$SPECIAL_A\"  != \"_\" ]] && [[ \"_$SPECIAL_B\"  == \"_\" ]] ; then\n    return 0\n  fi\n\n  if [[ \"_$SPECIAL_A\" < \"_$SPECIAL_B\" ]]; then\n    return 0\n  fi\n\n  return 1\n\n}\n\nfunction semverGT() {\n  semverEQ $1 $2\n  local EQ=$?\n\n  semverLT $1 $2\n  local LT=$?\n\n  if [ $EQ -ne 0 ] && [ $LT -ne 0 ]; then\n    return 0\n  else\n    return 1\n  fi\n}\n\nif [ \"___semver.sh\" == \"___`basename $0`\" ]; then\n  MAJOR=0\n  MINOR=0\n  PATCH=0\n  SPECIAL=\"\"\n\n  semverParseInto $1 MAJOR MINOR PATCH SPECIAL\n  echo \"$1 -> M: $MAJOR m:$MINOR p:$PATCH s:$SPECIAL\"\n\n  semverParseInto $2 MAJOR MINOR PATCH SPECIAL\n  echo \"$2 -> M: $MAJOR m:$MINOR p:$PATCH s:$SPECIAL\"\n\n  semverEQ $1 $2\n  echo \"$1 == $2 -> $?.\"\n\n  semverLT $1 $2\n  echo \"$1 < $2 -> $?.\"\n\n  semverGT $1 $2\n  echo \"$1 > $2 -> $?.\"\nfi\n"
  },
  {
    "path": "bin/travis/pio-start-travis",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# Convenience script for starting all default dependent services in a single\n# node scenario.\n\n# Figure out where PredictionIO is installed\nexport PIO_HOME=\"$(cd `dirname $0`/..; pwd)\"\n\n. ${PIO_HOME}/load-pio-env.sh\n\n# HBase\necho \"Starting HBase...\"\nif [ -n \"$PIO_STORAGE_SOURCES_HBASE_HOME\" ]; then\n  $PIO_STORAGE_SOURCES_HBASE_HOME/bin/start-hbase.sh\nelse\n  echo -e \"\\033[0;31mPlease set PIO_STORAGE_SOURCES_HBASE_HOME in conf/pio-env.sh, or in your environment.\\033[0m\"\n  # Kill everything for cleanliness\n  echo -e \"\\033[0;31mCannot start HBase. Aborting...\\033[0m\"\n  sleep 3\n  ${PIO_HOME}/bin/pio-stop-all\n  exit 1\nfi\n\n# PredictionIO Event Server\necho \"Waiting 10 seconds for HBase to fully initialize...\"\nsleep 10\necho \"Starting PredictionIO Event Server...\"\n${PIO_HOME}/pio-daemon ${PIO_HOME}/../eventserver.pid eventserver --ip 0.0.0.0\n"
  },
  {
    "path": "bin/travis/pio-stop-travis",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# Convenience script for stopping all default dependent services in a single\n# node scenario.\n\n# Figure out where PredictionIO is installed\nexport PIO_HOME=\"$(cd `dirname $0`/..; pwd)\"\n\n. ${PIO_HOME}/load-pio-env.sh\n\n# PredictionIO Event Server\necho \"Stopping PredictionIO Event Server...\"\nPIDFILE=${PIO_HOME}/../eventserver.pid\nif [ -e ${PIDFILE} ]; then\n  cat ${PIDFILE} | xargs kill\n  rm ${PIDFILE}\nfi\n\n# HBase\necho \"Stopping HBase...\"\nif [ -n \"$PIO_STORAGE_SOURCES_HBASE_HOME\" ]; then\n  $PIO_STORAGE_SOURCES_HBASE_HOME/bin/stop-hbase.sh\nfi\n"
  },
  {
    "path": "build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\nimport PIOBuild._\n\nlazy val scalaSparkDepsVersion = Map(\n  \"2.11\" -> Map(\n    \"2.0\" -> Map(\n      \"akka\" -> \"2.5.16\",\n      \"hadoop\" -> \"2.7.7\",\n      \"json4s\" -> \"3.2.11\"),\n    \"2.1\" -> Map(\n      \"akka\" -> \"2.5.17\",\n      \"hadoop\" -> \"2.7.7\",\n      \"json4s\" -> \"3.2.11\"),\n    \"2.2\" -> Map(\n      \"akka\" -> \"2.5.17\",\n      \"hadoop\" -> \"2.7.7\",\n      \"json4s\" -> \"3.2.11\"),\n    \"2.3\" -> Map(\n      \"akka\" -> \"2.5.17\",\n      \"hadoop\" -> \"2.7.7\",\n      \"json4s\" -> \"3.2.11\")))\n\nname := \"apache-predictionio-parent\"\n\nversion in ThisBuild := \"0.15.0-SNAPSHOT\"\n\norganization in ThisBuild := \"org.apache.predictionio\"\n\nscalaVersion in ThisBuild := sys.props.getOrElse(\"scala.version\", \"2.11.12\")\n\nscalaBinaryVersion in ThisBuild := binaryVersion(scalaVersion.value)\n\ncrossScalaVersions in ThisBuild := Seq(scalaVersion.value)\n\nscalacOptions in ThisBuild ++= Seq(\"-deprecation\", \"-unchecked\", \"-feature\")\n\nscalacOptions in (ThisBuild, Test) ++= Seq(\"-Yrangepos\")\nfork in (ThisBuild, run) := true\n\njavacOptions in (ThisBuild, compile) ++= Seq(\"-source\", \"1.8\", \"-target\", \"1.8\",\n  \"-Xlint:deprecation\", \"-Xlint:unchecked\")\n\n// Ignore differentiation of Spark patch levels\nsparkVersion in ThisBuild := sys.props.getOrElse(\"spark.version\", \"2.1.3\")\n\nsparkBinaryVersion in ThisBuild := binaryVersion(sparkVersion.value)\n\nhadoopVersion in ThisBuild := sys.props.getOrElse(\"hadoop.version\", \"2.7.7\")\n\nakkaVersion in ThisBuild := sys.props.getOrElse(\"akka.version\", \"2.5.17\")\n\nelasticsearchVersion in ThisBuild := sys.props.getOrElse(\"elasticsearch.version\", \"6.8.1\")\n\nhbaseVersion in ThisBuild := sys.props.getOrElse(\"hbase.version\", \"1.2.6\")\n\njson4sVersion in ThisBuild := {\n  sparkBinaryVersion.value match {\n    case \"2.0\" | \"2.1\" | \"2.2\" | \"2.3\" => \"3.2.11\"\n    case \"2.4\" => \"3.5.3\"\n  }\n}\n\nval conf = file(\"conf\")\n\nval commonSettings = Seq(\n  autoAPIMappings := true,\n  licenseConfigurations := Set(\"compile\"),\n  licenseReportTypes := Seq(Csv),\n  unmanagedClasspath in Test += conf,\n  unmanagedClasspath in Test += baseDirectory.value.getParentFile / s\"storage/jdbc/target/scala-${scalaBinaryVersion.value}/classes\")\n\nval commonTestSettings = Seq(\n  libraryDependencies ++= Seq(\n    \"org.postgresql\"   % \"postgresql\"  % \"9.4-1204-jdbc41\" % \"test\",\n    \"org.scalikejdbc\" %% \"scalikejdbc\" % \"3.1.0\" % \"test\"))\n\nval dataElasticsearch = (project in file(\"storage/elasticsearch\")).\n  settings(commonSettings: _*)\n\nval dataHbase = (project in file(\"storage/hbase\")).\n  settings(commonSettings: _*).\n  enablePlugins(GenJavadocPlugin)\n\nval dataHdfs = (project in file(\"storage/hdfs\")).\n  settings(commonSettings: _*).\n  enablePlugins(GenJavadocPlugin)\n\nval dataJdbc = (project in file(\"storage/jdbc\")).\n  settings(commonSettings: _*).\n  enablePlugins(GenJavadocPlugin)\n\nval dataLocalfs = (project in file(\"storage/localfs\")).\n  settings(commonSettings: _*).\n  enablePlugins(GenJavadocPlugin)\n\nval dataS3 = (project in file(\"storage/s3\")).\n  settings(commonSettings: _*).\n  enablePlugins(GenJavadocPlugin)\n\nval common = (project in file(\"common\")).\n  settings(commonSettings: _*).\n  enablePlugins(GenJavadocPlugin).\n  disablePlugins(sbtassembly.AssemblyPlugin)\n\nval data = (project in file(\"data\")).\n  dependsOn(common).\n  settings(commonSettings: _*).\n  settings(commonTestSettings: _*).\n  enablePlugins(GenJavadocPlugin).\n  disablePlugins(sbtassembly.AssemblyPlugin)\n\nval core = (project in file(\"core\")).\n  dependsOn(data).\n  settings(commonSettings: _*).\n  settings(commonTestSettings: _*).\n  enablePlugins(GenJavadocPlugin).\n  enablePlugins(BuildInfoPlugin).\n  settings(\n    buildInfoKeys := Seq[BuildInfoKey](\n      name,\n      version,\n      scalaVersion,\n      scalaBinaryVersion,\n      sbtVersion,\n      sparkVersion,\n      hadoopVersion),\n    buildInfoPackage := \"org.apache.predictionio.core\"\n  ).\n  enablePlugins(SbtTwirl).\n  disablePlugins(sbtassembly.AssemblyPlugin)\n\nval e2 = (project in file(\"e2\")).\n  dependsOn(core).\n  settings(commonSettings: _*).\n  enablePlugins(GenJavadocPlugin).\n  disablePlugins(sbtassembly.AssemblyPlugin)\n\nval tools = (project in file(\"tools\")).\n  dependsOn(e2).\n  settings(commonSettings: _*).\n  settings(commonTestSettings: _*).\n  settings(skip in publish := true).\n  enablePlugins(GenJavadocPlugin).\n  enablePlugins(SbtTwirl)\n\nval storageProjectReference = Seq(\n    dataElasticsearch,\n    dataHbase,\n    dataHdfs,\n    dataJdbc,\n    dataLocalfs,\n    dataS3) map Project.projectToRef\n\nval storage = (project in file(\"storage\"))\n  .settings(skip in publish := true)\n  .aggregate(storageProjectReference: _*)\n  .disablePlugins(sbtassembly.AssemblyPlugin)\n\nval assembly = (project in file(\"assembly\")).\n  settings(commonSettings: _*)\n\nval root = (project in file(\".\")).\n  settings(commonSettings: _*).\n  enablePlugins(ScalaUnidocPlugin).\n  settings(\n    unidocProjectFilter in (ScalaUnidoc, unidoc) := inAnyProject -- inProjects(storageProjectReference: _*),\n    unidocProjectFilter in (JavaUnidoc, unidoc) := inAnyProject -- inProjects(storageProjectReference: _*),\n    scalacOptions in (ScalaUnidoc, unidoc) ++= Seq(\n      \"-groups\",\n      \"-skip-packages\",\n      Seq(\n        \"akka\",\n        \"org.apache.predictionio.annotation\",\n        \"org.apache.predictionio.authentication\",\n        \"org.apache.predictionio.configuration\",\n        \"org.apache.predictionio.controller.html\",\n        \"org.apache.predictionio.controller.java\",\n        \"org.apache.predictionio.data.api\",\n        \"org.apache.predictionio.data.storage.*\",\n        \"org.apache.predictionio.data.view\",\n        \"org.apache.predictionio.data.webhooks\",\n        \"org.apache.predictionio.tools\",\n        \"org.apache.predictionio.workflow.html\",\n        \"scalikejdbc\").mkString(\":\"),\n      \"-doc-title\",\n      \"PredictionIO Scala API\",\n      \"-doc-version\",\n      version.value,\n      \"-doc-root-content\",\n      \"docs/scaladoc/rootdoc.txt\")).\n  settings(\n    javacOptions in (JavaUnidoc, unidoc) := Seq(\n      \"-subpackages\",\n      \"org.apache.predictionio\",\n      \"-exclude\",\n      Seq(\n        \"org.apache.predictionio.controller.html\",\n        \"org.apache.predictionio.data.api\",\n        \"org.apache.predictionio.data.view\",\n        \"org.apache.predictionio.data.webhooks.*\",\n        \"org.apache.predictionio.workflow\",\n        \"org.apache.predictionio.tools\",\n        \"org.apache.hadoop\").mkString(\":\"),\n      \"-windowtitle\",\n      \"PredictionIO Javadoc \" + version.value,\n      \"-group\",\n      \"Java Controllers\",\n      Seq(\n        \"org.apache.predictionio.controller.java\",\n        \"org.apache.predictionio.data.store.java\").mkString(\":\"),\n      \"-group\",\n      \"Scala Base Classes\",\n      Seq(\n        \"org.apache.predictionio.controller\",\n        \"org.apache.predictionio.core\",\n        \"org.apache.predictionio.data.storage\",\n        \"org.apache.predictionio.data.storage.*\",\n        \"org.apache.predictionio.data.store\").mkString(\":\"),\n      \"-overview\",\n      \"docs/javadoc/javadoc-overview.html\",\n      \"-noqualifier\",\n      \"java.lang\")).\n  aggregate(common, core, data, tools, e2).\n  disablePlugins(sbtassembly.AssemblyPlugin)\n\nval pioUnidoc = taskKey[Unit](\"Builds PredictionIO ScalaDoc\")\n\npioUnidoc := {\n  (unidoc in Compile).value\n  val log = streams.value.log\n  log.info(\"Adding custom styling.\")\n  IO.append(\n    crossTarget.value / \"unidoc\" / \"lib\" / \"template.css\",\n    IO.read(baseDirectory.value / \"docs\" / \"scaladoc\" / \"api-docs.css\"))\n  IO.append(\n    crossTarget.value / \"unidoc\" / \"lib\" / \"template.js\",\n    IO.read(baseDirectory.value / \"docs\" / \"scaladoc\" / \"api-docs.js\"))\n}\n\nhomepage := Some(url(\"http://predictionio.apache.org\"))\n\npomExtra := {\n  <parent>\n    <groupId>org.apache</groupId>\n    <artifactId>apache</artifactId>\n    <version>18</version>\n  </parent>\n  <scm>\n    <connection>scm:git:github.com/apache/predictionio</connection>\n    <developerConnection>scm:git:https://gitbox.apache.org/repos/asf/predictionio.git</developerConnection>\n    <url>github.com/apache/predictionio</url>\n  </scm>\n  <developers>\n    <developer>\n      <id>donald</id>\n      <name>Donald Szeto</name>\n      <url>http://predictionio.apache.org</url>\n      <email>donald@apache.org</email>\n    </developer>\n  </developers>\n}\n\nchildrenPomExtra in ThisBuild := {\n  <parent>\n    <groupId>{organization.value}</groupId>\n    <artifactId>{name.value}_{scalaBinaryVersion.value}</artifactId>\n    <version>{version.value}</version>\n  </parent>\n}\n\nconcurrentRestrictions in Global := Seq(\n  Tags.limit(Tags.CPU, 1),\n  Tags.limit(Tags.Network, 1),\n  Tags.limit(Tags.Test, 1),\n  Tags.limitAll( 1 )\n)\n\nparallelExecution := false\n\nparallelExecution in Global := false\n\ntestOptions in Test += Tests.Argument(\"-oDF\")\n\nprintBuildInfo := {\n  println(s\"PIO_SCALA_VERSION=${scalaVersion.value}\")\n  println(s\"PIO_SPARK_VERSION=${sparkVersion.value}\")\n  println(s\"PIO_HADOOP_VERSION=${hadoopVersion.value}\")\n  println(s\"PIO_ELASTICSEARCH_VERSION=${elasticsearchVersion.value}\")\n  println(s\"PIO_HBASE_VERSION=${hbaseVersion.value}\")\n}\n"
  },
  {
    "path": "common/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport PIOBuild._\n\nname := \"apache-predictionio-common\"\n\nlibraryDependencies ++= Seq(\n  \"com.typesafe.akka\" %% \"akka-actor\"           % akkaVersion.value,\n  \"com.typesafe.akka\" %% \"akka-slf4j\"           % akkaVersion.value,\n  \"com.typesafe.akka\" %% \"akka-http\"            % \"10.1.5\",\n  \"org.json4s\"        %% \"json4s-native\"        % json4sVersion.value,\n  \"com.typesafe.akka\" %% \"akka-stream\"          % \"2.5.12\"\n)\n\npomExtra := childrenPomExtra.value\n"
  },
  {
    "path": "common/src/main/java/org/apache/predictionio/annotation/DeveloperApi.java",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.annotation;\n\nimport java.lang.annotation.*;\n\n/**\n * A lower-level, unstable API intended for developers.\n *\n * Developer API's might change or be removed in minor versions of Spark.\n *\n * NOTE: If there exists a Scaladoc comment that immediately precedes this\n * annotation, the first line of the comment must be \":: DeveloperApi ::\" with\n * no trailing blank line. This is because of the known issue that Scaladoc\n * displays only either the annotation or the comment, whichever comes first.\n */\n@Retention(RetentionPolicy.RUNTIME)\n@Target({ElementType.TYPE, ElementType.FIELD, ElementType.METHOD,\n        ElementType.PARAMETER, ElementType.CONSTRUCTOR, ElementType.LOCAL_VARIABLE,\n        ElementType.PACKAGE})\npublic @interface DeveloperApi {}\n"
  },
  {
    "path": "common/src/main/java/org/apache/predictionio/annotation/Experimental.java",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.annotation;\n\nimport java.lang.annotation.*;\n\n/**\n * An experimental user-facing API.\n *\n * Experimental API's might change or be removed, or be adopted as first-class\n * API's.\n *\n * NOTE: If there exists a Scaladoc comment that immediately precedes this\n * annotation, the first line of the comment must be \":: Experimental ::\" with\n * no trailing blank line. This is because of the known issue that Scaladoc\n * displays only either the annotation or the comment, whichever comes first.\n */\n@Retention(RetentionPolicy.RUNTIME)\n@Target({ElementType.TYPE, ElementType.FIELD, ElementType.METHOD,\n  ElementType.PARAMETER, ElementType.CONSTRUCTOR, ElementType.LOCAL_VARIABLE,\n  ElementType.PACKAGE})\npublic @interface Experimental {}\n"
  },
  {
    "path": "common/src/main/resources/application.conf",
    "content": "akka {\n  log-config-on-start = false\n  loggers = [\"akka.event.slf4j.Slf4jLogger\"]\n  loglevel = \"INFO\"\n}\n"
  },
  {
    "path": "common/src/main/scala/org/apache/predictionio/akkahttpjson4s/Json4sSupport.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.akkahttpjson4s\n\n// Referenced from https://github.com/hseeberger/akka-http-json\n// because of the difference of supported json4s version.\nimport java.lang.reflect.InvocationTargetException\n\nimport akka.http.scaladsl.marshalling.{ Marshaller, ToEntityMarshaller }\nimport akka.http.scaladsl.model.ContentTypeRange\nimport akka.http.scaladsl.model.MediaType\nimport akka.http.scaladsl.model.MediaTypes.`application/json`\nimport akka.http.scaladsl.unmarshalling.{ FromEntityUnmarshaller, Unmarshaller }\nimport akka.util.ByteString\nimport org.json4s.{ Formats, MappingException, Serialization }\nimport scala.collection.immutable.Seq\n\n/**\n  * Automatic to and from JSON marshalling/unmarshalling using an in-scope *Json4s* protocol.\n  *\n  * Pretty printing is enabled if an implicit [[Json4sSupport.ShouldWritePretty.True]] is in scope.\n  */\nobject Json4sSupport extends Json4sSupport {\n\n  sealed abstract class ShouldWritePretty\n\n  final object ShouldWritePretty {\n    final object True  extends ShouldWritePretty\n    final object False extends ShouldWritePretty\n  }\n}\n\n/**\n  * Automatic to and from JSON marshalling/unmarshalling using an in-scope *Json4s* protocol.\n  *\n  * Pretty printing is enabled if an implicit [[Json4sSupport.ShouldWritePretty.True]] is in scope.\n  */\ntrait Json4sSupport {\n  import Json4sSupport._\n\n  def unmarshallerContentTypes: Seq[ContentTypeRange] =\n    mediaTypes.map(ContentTypeRange.apply)\n\n  def mediaTypes: Seq[MediaType.WithFixedCharset] =\n    List(`application/json`)\n\n  private val jsonStringUnmarshaller =\n    Unmarshaller.byteStringUnmarshaller\n      .forContentTypes(unmarshallerContentTypes: _*)\n      .mapWithCharset {\n        case (ByteString.empty, _) => throw Unmarshaller.NoContentException\n        case (data, charset)       => data.decodeString(charset.nioCharset.name)\n      }\n\n  private val jsonStringMarshaller =\n    Marshaller.oneOf(mediaTypes: _*)(Marshaller.stringMarshaller)\n\n  /**\n    * HTTP entity => `A`\n    *\n    * @tparam A type to decode\n    * @return unmarshaller for `A`\n    */\n  implicit def unmarshaller[A: Manifest](implicit serialization: Serialization,\n    formats: Formats): FromEntityUnmarshaller[A] =\n    jsonStringUnmarshaller\n      .map(s => serialization.read(s))\n      .recover { _ => _ =>\n      { case MappingException(_, ite: InvocationTargetException) => throw ite.getCause }\n      }\n\n  /**\n    * `A` => HTTP entity\n    *\n    * @tparam A type to encode, must be upper bounded by `AnyRef`\n    * @return marshaller for any `A` value\n    */\n  implicit def marshaller[A <: AnyRef](implicit serialization: Serialization,\n    formats: Formats,\n    shouldWritePretty: ShouldWritePretty =\n    ShouldWritePretty.False): ToEntityMarshaller[A] =\n    shouldWritePretty match {\n      case ShouldWritePretty.False =>\n        jsonStringMarshaller.compose(serialization.write[A])\n      case ShouldWritePretty.True =>\n        jsonStringMarshaller.compose(serialization.writePretty[A])\n    }\n}\n"
  },
  {
    "path": "common/src/main/scala/org/apache/predictionio/authentication/KeyAuthentication.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.authentication\n\n/**\n  * This is a (very) simple authentication for the dashboard and engine servers\n  * It is highly recommended to implement a stonger authentication mechanism\n  */\n\nimport akka.http.scaladsl.model.HttpRequest\nimport akka.http.scaladsl.model.headers.HttpChallenge\nimport akka.http.scaladsl.server.{AuthenticationFailedRejection, Rejection, RequestContext}\nimport com.typesafe.config.ConfigFactory\nimport scala.concurrent.ExecutionContext.Implicits.global\nimport scala.concurrent.Future\n\ntrait KeyAuthentication {\n\n  object ServerKey {\n    private val config = ConfigFactory.load(\"server.conf\")\n\n    val authEnforced = config.getBoolean(\"org.apache.predictionio.server.key-auth-enforced\")\n    val get = config.getString(\"org.apache.predictionio.server.accessKey\")\n\n    val param = \"accessKey\"\n  }\n\n  def withAccessKeyFromFile: RequestContext => Future[Either[Rejection, HttpRequest]] = {\n    ctx: RequestContext =>\n      val accessKeyParamOpt = ctx.request.uri.query().get(ServerKey.param)\n      Future {\n        val passedKey = accessKeyParamOpt.getOrElse {\n          Left(AuthenticationFailedRejection(\n            AuthenticationFailedRejection.CredentialsRejected, HttpChallenge(\"\", None)))\n        }\n\n        if (!ServerKey.authEnforced || passedKey.equals(ServerKey.get)) Right(ctx.request)\n        else Left(AuthenticationFailedRejection(\n          AuthenticationFailedRejection.CredentialsRejected, HttpChallenge(\"\", None)))\n\n      }\n  }\n}\n"
  },
  {
    "path": "common/src/main/scala/org/apache/predictionio/configuration/SSLConfiguration.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.configuration\n\nimport java.io.FileInputStream\nimport java.security.KeyStore\n\nimport com.typesafe.config.ConfigFactory\nimport javax.net.ssl.{KeyManagerFactory, SSLContext, TrustManagerFactory}\n\ntrait SSLConfiguration {\n\n  private val serverConfig = ConfigFactory.load(\"server.conf\")\n\n  private val keyStoreResource =\n    serverConfig.getString(\"org.apache.predictionio.server.ssl-keystore-resource\")\n  private val password = serverConfig.getString(\"org.apache.predictionio.server.ssl-keystore-pass\")\n  private val keyAlias = serverConfig.getString(\"org.apache.predictionio.server.ssl-key-alias\")\n\n  private val keyStore = {\n    // Loading keystore from specified file\n    val clientStore = KeyStore.getInstance(\"JKS\")\n    val inputStream = new FileInputStream(\n      getClass().getClassLoader().getResource(keyStoreResource).getFile())\n    clientStore.load(inputStream, password.toCharArray)\n    inputStream.close()\n    clientStore\n  }\n\n  // Creating SSL context\n  def sslContext: SSLContext = {\n    val context = SSLContext.getInstance(\"TLS\")\n    val tmf = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm)\n    val kmf = KeyManagerFactory.getInstance(KeyManagerFactory.getDefaultAlgorithm)\n    kmf.init(keyStore, password.toCharArray)\n    tmf.init(keyStore)\n    context.init(kmf.getKeyManagers, tmf.getTrustManagers, null)\n    context\n  }\n\n}\n"
  },
  {
    "path": "conf/log4j.properties",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nlog4j.rootLogger=INFO, console, file\n\n# console appender\nlog4j.appender.console=org.apache.log4j.ConsoleAppender\nlog4j.appender.console.follow=true\nlog4j.appender.console.layout=org.apache.log4j.EnhancedPatternLayout\nlog4j.appender.console.layout.ConversionPattern=[%p] [%c{1}] %m%n%throwable{0}\n\n# file appender\nlog4j.appender.file=org.apache.log4j.FileAppender\nlog4j.appender.file.File=${pio.log.dir}/pio.log\nlog4j.appender.file.layout=org.apache.log4j.EnhancedPatternLayout\nlog4j.appender.file.layout.ConversionPattern=%d %-5p %c [%t] - %m%n\n\n# quiet some packages that are too verbose\nlog4j.logger.org.elasticsearch=WARN\nlog4j.logger.org.apache.hadoop=WARN\nlog4j.logger.org.apache.hadoop.hbase.zookeeper=ERROR\nlog4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR\nlog4j.logger.org.apache.spark=WARN\nlog4j.logger.org.apache.zookeeper=ERROR\nlog4j.logger.org.eclipse.jetty=WARN\nlog4j.logger.org.spark-project.jetty=WARN\nlog4j.logger.akka=WARN\n"
  },
  {
    "path": "conf/pio-env.sh.template",
    "content": "#!/usr/bin/env bash\n#\n# Copy this file as pio-env.sh and edit it for your site's configuration.\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# PredictionIO Main Configuration\n#\n# This section controls core behavior of PredictionIO. It is very likely that\n# you need to change these to fit your site.\n\n# SPARK_HOME: Apache Spark is a hard dependency and must be configured.\nSPARK_HOME=$PIO_HOME/vendors/spark-2.1.1-bin-hadoop2.6\n\nPOSTGRES_JDBC_DRIVER=$PIO_HOME/lib/postgresql-42.0.0.jar\nMYSQL_JDBC_DRIVER=$PIO_HOME/lib/mysql-connector-java-5.1.41.jar\n\n# ES_CONF_DIR: You must configure this if you have advanced configuration for\n#              your Elasticsearch setup.\n# ES_CONF_DIR=/opt/elasticsearch\n\n# HADOOP_CONF_DIR: You must configure this if you intend to run PredictionIO\n#                  with Hadoop 2.\n# HADOOP_CONF_DIR=/opt/hadoop\n\n# HBASE_CONF_DIR: You must configure this if you intend to run PredictionIO\n#                 with HBase on a remote cluster.\n# HBASE_CONF_DIR=$PIO_HOME/vendors/hbase-1.2.6/conf\n\n# Filesystem paths where PredictionIO uses as block storage.\nPIO_FS_BASEDIR=$HOME/.pio_store\nPIO_FS_ENGINESDIR=$PIO_FS_BASEDIR/engines\nPIO_FS_TMPDIR=$PIO_FS_BASEDIR/tmp\n\n# PredictionIO Storage Configuration\n#\n# This section controls programs that make use of PredictionIO's built-in\n# storage facilities. Default values are shown below.\n#\n# For more information on storage configuration please refer to\n# http://predictionio.apache.org/system/anotherdatastore/\n\n# Storage Repositories\n\n# Default is to use PostgreSQL\nPIO_STORAGE_REPOSITORIES_METADATA_NAME=pio_meta\nPIO_STORAGE_REPOSITORIES_METADATA_SOURCE=PGSQL\n\nPIO_STORAGE_REPOSITORIES_EVENTDATA_NAME=pio_event\nPIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=PGSQL\n\nPIO_STORAGE_REPOSITORIES_MODELDATA_NAME=pio_model\nPIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=PGSQL\n\n# Storage Data Sources\n\n# PostgreSQL Default Settings\n# Please change \"pio\" to your database name in PIO_STORAGE_SOURCES_PGSQL_URL\n# Please change PIO_STORAGE_SOURCES_PGSQL_USERNAME and\n# PIO_STORAGE_SOURCES_PGSQL_PASSWORD accordingly\nPIO_STORAGE_SOURCES_PGSQL_TYPE=jdbc\nPIO_STORAGE_SOURCES_PGSQL_URL=jdbc:postgresql://localhost/pio\nPIO_STORAGE_SOURCES_PGSQL_USERNAME=pio\nPIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio\n\n# MySQL Example\n# PIO_STORAGE_SOURCES_MYSQL_TYPE=jdbc\n# PIO_STORAGE_SOURCES_MYSQL_URL=jdbc:mysql://localhost/pio\n# PIO_STORAGE_SOURCES_MYSQL_USERNAME=pio\n# PIO_STORAGE_SOURCES_MYSQL_PASSWORD=pio\n\n# Elasticsearch Example\n# PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch\n# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=localhost\n# PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200\n# PIO_STORAGE_SOURCES_ELASTICSEARCH_SCHEMES=http\n# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$PIO_HOME/vendors/elasticsearch-6.8.1\n# Optional basic HTTP auth\n# PIO_STORAGE_SOURCES_ELASTICSEARCH_USERNAME=my-name\n# PIO_STORAGE_SOURCES_ELASTICSEARCH_PASSWORD=my-secret\n\n# Local File System Example\n# PIO_STORAGE_SOURCES_LOCALFS_TYPE=localfs\n# PIO_STORAGE_SOURCES_LOCALFS_PATH=$PIO_FS_BASEDIR/models\n\n# HBase Example\n# PIO_STORAGE_SOURCES_HBASE_TYPE=hbase\n# PIO_STORAGE_SOURCES_HBASE_HOME=$PIO_HOME/vendors/hbase-1.2.6\n\n# AWS S3 Example\n# PIO_STORAGE_SOURCES_S3_TYPE=s3\n# PIO_STORAGE_SOURCES_S3_BUCKET_NAME=pio_bucket\n# PIO_STORAGE_SOURCES_S3_BASE_PATH=pio_model\n"
  },
  {
    "path": "conf/pio-env.sh.travis",
    "content": "#!/usr/bin/env bash\n#\n# Copy this file as pio-env.sh and edit it for your site's configuration.\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# PredictionIO Main Configuration\n#\n# This section controls core behavior of PredictionIO. It is very likely that\n# you need to change these to fit your site.\n\n# SPARK_HOME: Apache Spark is a hard dependency and must be configured.\n# it is set up in script.travis.sh\nSPARK_HOME=$SPARK_HOME\n\n# Filesystem paths where PredictionIO uses as block storage.\nPIO_FS_BASEDIR=$HOME/.pio_store\nPIO_FS_ENGINESDIR=$PIO_FS_BASEDIR/engines\nPIO_FS_TMPDIR=$PIO_FS_BASEDIR/tmp\n\n# PredictionIO Storage Configuration\n#\n# This section controls programs that make use of PredictionIO's built-in\n# storage facilities. Default values are shown below.\n\n# Storage Data Sources\nPIO_STORAGE_SOURCES_LOCALFS_TYPE=localfs\nPIO_STORAGE_SOURCES_LOCALFS_PATH=$PIO_FS_BASEDIR/models\n\nPIO_STORAGE_SOURCES_HBASE_TYPE=hbase\nPIO_STORAGE_SOURCES_HBASE_HOME=$HBASE_HOME\n\n# Storage Data Sources (pgsql)\nPIO_STORAGE_SOURCES_PGSQL_TYPE=jdbc\nPIO_STORAGE_SOURCES_PGSQL_URL=jdbc:postgresql:predictionio\n\nPIO_STORAGE_SOURCES_PGSQL_USERNAME=postgres\nPIO_STORAGE_SOURCES_PGSQL_PASSWORD=\n\n# Storage Repositories\nPIO_STORAGE_REPOSITORIES_METADATA_NAME=predictionio_metadata\nPIO_STORAGE_REPOSITORIES_METADATA_SOURCE=PGSQL\n\nPIO_STORAGE_REPOSITORIES_MODELDATA_NAME=predictionio_modeldata\nPIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=PGSQL\n\nPIO_STORAGE_REPOSITORIES_EVENTDATA_NAME=predictionio_eventdata\nPIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=PGSQL\n"
  },
  {
    "path": "conf/pio-vendors.sh",
    "content": "#!/bin/bash\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# IMPORTANT: PIO_*_VERSION for dependencies must be set before envoking this script.\n# `source conf/set_build_profile.sh $BUILD_PROFILE` to get the proper versions\n\nif [ -z \"$PIO_SCALA_VERSION\" ]; then\n    PIO_SCALA_VERSION=\"2.11.12\"\nfi\n\nif [ -z \"$PIO_SPARK_VERSION\" ]; then\n    PIO_SPARK_VERSION=\"2.1.3\"\nfi\n\nif [ -z \"$PIO_HADOOP_VERSION\" ]; then\n    PIO_HADOOP_VERSION=\"2.7.7\"\nfi\n\nif [ -z \"$PIO_ELASTICSEARCH_VERSION\" ]; then\n    PIO_ELASTICSEARCH_VERSION=\"6.8.1\"\nfi\n\nif [ -z \"$PIO_HBASE_VERSION\" ]; then\n    PIO_HBASE_VERSION=\"1.2.6\"\nfi\n\nexport ES_IMAGE=\"docker.elastic.co/elasticsearch/elasticsearch\"\nexport ES_TAG=\"$PIO_ELASTICSEARCH_VERSION\"\n\nHBASE_MAJOR=`echo $PIO_HBASE_VERSION | awk -F. '{print $1 \".\" $2}'`\nexport HBASE_TAG=\"$HBASE_MAJOR\"\n\nPGSQL_JAR=postgresql-9.4-1204.jdbc41.jar\nPGSQL_DOWNLOAD=https://jdbc.postgresql.org/download/${PGSQL_JAR}\n\nHADOOP_MAJOR=`echo $PIO_HADOOP_VERSION | awk -F. '{print $1 \".\" $2}'`\nSPARK_DIR=spark-${PIO_SPARK_VERSION}-bin-hadoop${HADOOP_MAJOR}\nSPARK_ARCHIVE=${SPARK_DIR}.tgz\nSPARK_DOWNLOAD_MIRROR=https://www.apache.org/dyn/closer.lua\\?action=download\\&filename=spark/spark-${PIO_SPARK_VERSION}/${SPARK_ARCHIVE}\nSPARK_DOWNLOAD_ARCHIVE=https://archive.apache.org/dist/spark/spark-${PIO_SPARK_VERSION}/${SPARK_ARCHIVE}\n"
  },
  {
    "path": "conf/server.conf",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# Engine and dashboard Server related configurations\norg.apache.predictionio.server {\n\n  # This access key is used by org.apache.predictionio.authentication.KeyAuthentication\n  # to authenticate Evalutaion Dashboard, and Engine Server /stop and /reload enpoints\n  # Should be passed as a query string param\n  key-auth-enforced = \"false\"\n  accessKey = \"\"\n\n  # configs used by org.apache.predictionio.configuration.SSLConfiguration\n\n  ssl-enforced = \"false\"\n  ssl-keystore-resource = \"keystore.jks\"\n  ssl-keystore-pass = \"pioserver\"\n  ssl-key-alias = \"selfsigned\"\n}\n"
  },
  {
    "path": "core/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport PIOBuild._\n\nname := \"apache-predictionio-core\"\n\nlibraryDependencies ++= Seq(\n  \"com.github.scopt\"       %% \"scopt\"            % \"3.5.0\",\n  \"com.google.code.gson\"    % \"gson\"             % \"2.5\",\n  \"com.twitter\"            %% \"chill-bijection\"  % \"0.7.2\",\n  \"de.javakaffee\"           % \"kryo-serializers\" % \"0.37\",\n  \"net.jodah\"               % \"typetools\"        % \"0.3.1\",\n  \"org.apache.spark\"       %% \"spark-core\"       % sparkVersion.value % \"provided\",\n  \"org.json4s\"             %% \"json4s-ext\"       % json4sVersion.value,\n  \"org.scalaj\"             %% \"scalaj-http\"      % \"1.1.6\",\n  \"org.slf4j\"               % \"slf4j-log4j12\"    % \"1.7.18\",\n  \"org.scalatest\"          %% \"scalatest\"        % \"2.1.7\" % \"test\",\n  \"org.specs2\"             %% \"specs2\"           % \"2.3.13\" % \"test\",\n  \"org.scalamock\"          %% \"scalamock-scalatest-support\" % \"3.5.0\" % \"test\",\n  \"com.h2database\"           % \"h2\"             % \"1.4.196\" % \"test\"\n)\n\nparallelExecution in Test := false\n\npomExtra := childrenPomExtra.value\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/CustomQuerySerializer.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.core.BaseQuerySerializer\n\n/** If your query class cannot be automatically serialized/deserialized to/from\n  * JSON, implement a trait by extending this trait, and overriding the\n  * `querySerializer` member with your\n  * [[https://github.com/json4s/json4s#serializing-non-supported-types custom JSON4S serializer]].\n  * Algorithm and serving classes using your query class would only need to mix\n  * in the trait to enable the custom serializer.\n  *\n  * @group Helper\n  */\ntrait CustomQuerySerializer extends BaseQuerySerializer\n\n/** DEPRECATED. Use [[CustomQuerySerializer]] instead.\n  *\n  * @group Helper\n  */\n@deprecated(\"Use CustomQuerySerializer instead.\", \"0.9.2\")\ntrait WithQuerySerializer extends CustomQuerySerializer\n\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/Deployment.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.core.BaseEngine\n\nimport scala.language.implicitConversions\n\n/** Defines a deployment that contains an [[Engine]]\n  *\n  * @group Engine\n  */\ntrait Deployment extends EngineFactory {\n  protected[this] var _engine: BaseEngine[_, _, _, _] = _\n  protected[this] var engineSet: Boolean = false\n\n  /** Returns the [[Engine]] of this [[Deployment]] */\n  def apply(): BaseEngine[_, _, _, _] = {\n    assert(engineSet, \"Engine not set\")\n    _engine\n  }\n\n  /** Returns the [[Engine]] contained in this [[Deployment]]. */\n  private[predictionio]\n  def engine: BaseEngine[_, _, _, _] = {\n    assert(engineSet, \"Engine not set\")\n    _engine\n  }\n\n  /** Sets the [[Engine]] for this [[Deployment]]\n    *\n    * @param engine An implementation of [[Engine]]\n    * @tparam EI Evaluation information class\n    * @tparam Q Query class\n    * @tparam P Predicted result class\n    * @tparam A Actual result class\n    */\n  def engine_=[EI, Q, P, A](engine: BaseEngine[EI, Q, P, A]) {\n    assert(!engineSet, \"Engine can be set at most once\")\n    _engine = engine\n    engineSet = true\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport grizzled.slf4j.Logger\nimport org.apache.predictionio.core.BaseAlgorithm\nimport org.apache.predictionio.core.BaseDataSource\nimport org.apache.predictionio.core.BaseEngine\nimport org.apache.predictionio.core.BasePreparator\nimport org.apache.predictionio.core.BaseServing\nimport org.apache.predictionio.core.Doer\nimport org.apache.predictionio.data.storage.EngineInstance\nimport org.apache.predictionio.data.storage.StorageClientException\nimport org.apache.predictionio.workflow.CreateWorkflow\nimport org.apache.predictionio.workflow.EngineLanguage\nimport org.apache.predictionio.workflow.JsonExtractorOption.JsonExtractorOption\nimport org.apache.predictionio.workflow.NameParamsSerializer\nimport org.apache.predictionio.workflow.PersistentModelManifest\nimport org.apache.predictionio.workflow.SparkWorkflowUtils\nimport org.apache.predictionio.workflow.StopAfterPrepareInterruption\nimport org.apache.predictionio.workflow.StopAfterReadInterruption\nimport org.apache.predictionio.workflow.WorkflowParams\nimport org.apache.predictionio.workflow.WorkflowUtils\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\nimport org.json4s._\nimport org.json4s.native.JsonMethods._\nimport org.json4s.native.Serialization.read\n\nimport scala.collection.JavaConversions\nimport scala.language.implicitConversions\n\n/** This class chains up the entire data process. PredictionIO uses this\n  * information to create workflows and deployments. In Scala, you should\n  * implement an object that extends the [[EngineFactory]] trait similar to the\n  * following example.\n  *\n  * {{{\n  * object ItemRankEngine extends EngineFactory {\n  *   def apply() = {\n  *     new Engine(\n  *       classOf[ItemRankDataSource],\n  *       classOf[ItemRankPreparator],\n  *       Map(\n  *         \"knn\" -> classOf[KNNAlgorithm],\n  *         \"rand\" -> classOf[RandomAlgorithm],\n  *         \"mahoutItemBased\" -> classOf[MahoutItemBasedAlgorithm]),\n  *       classOf[ItemRankServing])\n  *   }\n  * }\n  * }}}\n  *\n  * @see [[EngineFactory]]\n  * @tparam TD Training data class.\n  * @tparam EI Evaluation info class.\n  * @tparam PD Prepared data class.\n  * @tparam Q Input query class.\n  * @tparam P Output prediction class.\n  * @tparam A Actual value class.\n  * @param dataSourceClassMap Map of data source names to class.\n  * @param preparatorClassMap Map of preparator names to class.\n  * @param algorithmClassMap Map of algorithm names to classes.\n  * @param servingClassMap Map of serving names to class.\n  * @group Engine\n  */\nclass Engine[TD, EI, PD, Q, P, A](\n    val dataSourceClassMap: Map[String,\n      Class[_ <: BaseDataSource[TD, EI, Q, A]]],\n    val preparatorClassMap: Map[String, Class[_ <: BasePreparator[TD, PD]]],\n    val algorithmClassMap: Map[String, Class[_ <: BaseAlgorithm[PD, _, Q, P]]],\n    val servingClassMap: Map[String, Class[_ <: BaseServing[Q, P]]])\n  extends BaseEngine[EI, Q, P, A] {\n\n  private[predictionio]\n  implicit lazy val formats = Utils.json4sDefaultFormats +\n    new NameParamsSerializer\n\n  @transient lazy protected val logger = Logger[this.type]\n\n  /** This auxiliary constructor is provided for backward compatibility.\n    *\n    * @param dataSourceClass Data source class.\n    * @param preparatorClass Preparator class.\n    * @param algorithmClassMap Map of algorithm names to classes.\n    * @param servingClass Serving class.\n    */\n  def this(\n    dataSourceClass: Class[_ <: BaseDataSource[TD, EI, Q, A]],\n    preparatorClass: Class[_ <: BasePreparator[TD, PD]],\n    algorithmClassMap: Map[String, Class[_ <: BaseAlgorithm[PD, _, Q, P]]],\n    servingClass: Class[_ <: BaseServing[Q, P]]) = this(\n      Map(\"\" -> dataSourceClass),\n      Map(\"\" -> preparatorClass),\n      algorithmClassMap,\n      Map(\"\" -> servingClass)\n    )\n\n  /** Java-friendly constructor\n    *\n    * @param dataSourceClass Data source class.\n    * @param preparatorClass Preparator class.\n    * @param algorithmClassMap Map of algorithm names to classes.\n    * @param servingClass Serving class.\n    */\n  def this(dataSourceClass: Class[_ <: BaseDataSource[TD, EI, Q, A]],\n    preparatorClass: Class[_ <: BasePreparator[TD, PD]],\n    algorithmClassMap: _root_.java.util.Map[String, Class[_ <: BaseAlgorithm[PD, _, Q, P]]],\n    servingClass: Class[_ <: BaseServing[Q, P]]) = this(\n    Map(\"\" -> dataSourceClass),\n    Map(\"\" -> preparatorClass),\n    JavaConversions.mapAsScalaMap(algorithmClassMap).toMap,\n    Map(\"\" -> servingClass)\n  )\n\n  /** Returns a new Engine instance, mimicking case class's copy method behavior.\n    */\n  def copy(\n    dataSourceClassMap: Map[String, Class[_ <: BaseDataSource[TD, EI, Q, A]]]\n      = dataSourceClassMap,\n    preparatorClassMap: Map[String, Class[_ <: BasePreparator[TD, PD]]]\n      = preparatorClassMap,\n    algorithmClassMap: Map[String, Class[_ <: BaseAlgorithm[PD, _, Q, P]]]\n      = algorithmClassMap,\n    servingClassMap: Map[String, Class[_ <: BaseServing[Q, P]]]\n      = servingClassMap): Engine[TD, EI, PD, Q, P, A] = {\n    new Engine(\n      dataSourceClassMap,\n      preparatorClassMap,\n      algorithmClassMap,\n      servingClassMap)\n  }\n\n  /** Training this engine would return a list of models.\n    *\n    * @param sc An instance of SparkContext.\n    * @param engineParams An instance of [[EngineParams]] for running a single training.\n    * @param params An instance of [[WorkflowParams]] that controls the workflow.\n    * @return A list of models.\n    */\n  def train(\n      sc: SparkContext,\n      engineParams: EngineParams,\n      engineInstanceId: String,\n      params: WorkflowParams): Seq[Any] = {\n    val (dataSourceName, dataSourceParams) = engineParams.dataSourceParams\n    val dataSource = Doer(dataSourceClassMap(dataSourceName), dataSourceParams)\n\n    val (preparatorName, preparatorParams) = engineParams.preparatorParams\n    val preparator = Doer(preparatorClassMap(preparatorName), preparatorParams)\n\n    val algoParamsList = engineParams.algorithmParamsList\n    require(\n      algoParamsList.size > 0,\n      \"EngineParams.algorithmParamsList must have at least 1 element.\")\n\n    val algorithms = algoParamsList.map { case (algoName, algoParams) =>\n      Doer(algorithmClassMap(algoName), algoParams)\n    }\n\n    val models = Engine.train(\n      sc, dataSource, preparator, algorithms, params)\n\n    val algoCount = algorithms.size\n    val algoTuples: Seq[(String, Params, BaseAlgorithm[_, _, _, _], Any)] =\n    (0 until algoCount).map { ax => {\n      // val (name, params) = algoParamsList(ax)\n      val (name, params) = algoParamsList(ax)\n      (name, params, algorithms(ax), models(ax))\n    }}\n\n    makeSerializableModels(\n      sc,\n      engineInstanceId = engineInstanceId,\n      algoTuples = algoTuples)\n  }\n\n  /** Algorithm models can be persisted before deploy. However, it is also\n    * possible that models are not persisted. This method retrains non-persisted\n    * models and return a list of models that can be used directly in deploy.\n    */\n  private[predictionio]\n  def prepareDeploy(\n    sc: SparkContext,\n    engineParams: EngineParams,\n    engineInstanceId: String,\n    persistedModels: Seq[Any],\n    params: WorkflowParams): Seq[Any] = {\n\n    val algoParamsList = engineParams.algorithmParamsList\n    val algorithms = algoParamsList.map { case (algoName, algoParams) =>\n      Doer(algorithmClassMap(algoName), algoParams)\n    }\n\n    val models = if (persistedModels.exists(m => m.isInstanceOf[Unit])) {\n      // If any of persistedModels is Unit, we need to re-train the model.\n      logger.info(\"Some persisted models are Unit, need to re-train.\")\n      val (dataSourceName, dataSourceParams) = engineParams.dataSourceParams\n      val dataSource = Doer(dataSourceClassMap(dataSourceName), dataSourceParams)\n\n      val (preparatorName, preparatorParams) = engineParams.preparatorParams\n      val preparator = Doer(preparatorClassMap(preparatorName), preparatorParams)\n\n      val td = dataSource.readTrainingBase(sc)\n      val pd = preparator.prepareBase(sc, td)\n\n      val models = algorithms.zip(persistedModels).map { case (algo, m) =>\n        m match {\n          case () => algo.trainBase(sc, pd)\n          case _ => m\n        }\n      }\n      models\n    } else {\n      logger.info(\"Using persisted model\")\n      persistedModels\n    }\n\n    models\n    .zip(algorithms)\n    .zip(algoParamsList)\n    .zipWithIndex\n    .map {\n      case (((model, algo), (algoName, algoParams)), ax) => {\n        model match {\n          case modelManifest: PersistentModelManifest => {\n            logger.info(\"Custom-persisted model detected for algorithm \" +\n              algo.getClass.getName)\n            SparkWorkflowUtils.getPersistentModel(\n              modelManifest,\n              Seq(engineInstanceId, ax, algoName).mkString(\"-\"),\n              algoParams,\n              Some(sc),\n              getClass.getClassLoader)\n          }\n          case m => {\n            try {\n              logger.info(\n                s\"Loaded model ${m.getClass.getName} for algorithm \" +\n                s\"${algo.getClass.getName}\")\n              sc.stop\n            } catch {\n              case e: NullPointerException =>\n                logger.warn(\n                  s\"Null model detected for algorithm ${algo.getClass.getName}\")\n            }\n            m\n          }\n        }  // model match\n      }\n    }\n  }\n\n  /** Extract model for persistent layer.\n    *\n    * PredictionIO persists models for future use. It allows custom\n    * implementation for persisting models. You need to implement the\n    * [[org.apache.predictionio.controller.PersistentModel]] interface. This method\n    * traverses all models in the workflow. If the model is a\n    * [[org.apache.predictionio.controller.PersistentModel]], it calls the save method\n    * for custom persistence logic.\n    *\n    * For model doesn't support custom logic, PredictionIO serializes the whole\n    * model if the corresponding algorithm is local. On the other hand, if the\n    * model is parallel (i.e. model associated with a number of huge RDDS), this\n    * method return Unit, in which case PredictionIO will retrain the whole\n    * model from scratch next time it is used.\n    */\n  private def makeSerializableModels(\n    sc: SparkContext,\n    engineInstanceId: String,\n    // AlgoName, Algo, Model\n    algoTuples: Seq[(String, Params, BaseAlgorithm[_, _, _, _], Any)]\n  ): Seq[Any] = {\n\n    logger.info(s\"engineInstanceId=$engineInstanceId\")\n\n    algoTuples\n    .zipWithIndex\n    .map { case ((name, params, algo, model), ax) =>\n      algo.makePersistentModel(\n        sc = sc,\n        modelId = Seq(engineInstanceId, ax, name).mkString(\"-\"),\n        algoParams = params,\n        bm = model)\n    }\n  }\n\n  /** This is implemented such that [[org.apache.predictionio.controller.Evaluation]] can\n    * use this method to generate inputs for [[org.apache.predictionio.controller.Metric]].\n    *\n    * @param sc An instance of SparkContext.\n    * @param engineParams An instance of [[EngineParams]] for running a single evaluation.\n    * @param params An instance of [[WorkflowParams]] that controls the workflow.\n    * @return A list of evaluation information and RDD of query, predicted\n    *         result, and actual result tuple tuple.\n    */\n  def eval(\n    sc: SparkContext,\n    engineParams: EngineParams,\n    params: WorkflowParams)\n  : Seq[(EI, RDD[(Q, P, A)])] = {\n    val (dataSourceName, dataSourceParams) = engineParams.dataSourceParams\n    val dataSource = Doer(dataSourceClassMap(dataSourceName), dataSourceParams)\n\n    val (preparatorName, preparatorParams) = engineParams.preparatorParams\n    val preparator = Doer(preparatorClassMap(preparatorName), preparatorParams)\n\n    val algoParamsList = engineParams.algorithmParamsList\n    require(\n      algoParamsList.size > 0,\n      \"EngineParams.algorithmParamsList must have at least 1 element.\")\n\n    val algorithms = algoParamsList.map { case (algoName, algoParams) => {\n      try {\n        Doer(algorithmClassMap(algoName), algoParams)\n      } catch {\n        case e: NoSuchElementException => {\n          if (algoName == \"\") {\n            logger.error(\"Empty algorithm name supplied but it could not \" +\n              \"match with any algorithm in the engine's definition. \" +\n              \"Existing algorithm name(s) are: \" +\n              s\"${algorithmClassMap.keys.mkString(\", \")}. Aborting.\")\n          } else {\n            logger.error(s\"$algoName cannot be found in the engine's \" +\n              \"definition. Existing algorithm name(s) are: \" +\n              s\"${algorithmClassMap.keys.mkString(\", \")}. Aborting.\")\n          }\n          sys.exit(1)\n        }\n      }\n    }}\n\n    val (servingName, servingParams) = engineParams.servingParams\n    val serving = Doer(servingClassMap(servingName), servingParams)\n\n    Engine.eval(sc, dataSource, preparator, algorithms, serving)\n  }\n\n  override def jValueToEngineParams(\n    variantJson: JValue,\n    jsonExtractor: JsonExtractorOption): EngineParams = {\n\n    val engineLanguage = EngineLanguage.Scala\n    // Extract EngineParams\n    logger.info(s\"Extracting datasource params...\")\n    val dataSourceParams: (String, Params) =\n      WorkflowUtils.getParamsFromJsonByFieldAndClass(\n        variantJson,\n        \"datasource\",\n        dataSourceClassMap,\n        engineLanguage,\n        jsonExtractor)\n    logger.info(s\"Datasource params: $dataSourceParams\")\n\n    logger.info(s\"Extracting preparator params...\")\n    val preparatorParams: (String, Params) =\n      WorkflowUtils.getParamsFromJsonByFieldAndClass(\n        variantJson,\n        \"preparator\",\n        preparatorClassMap,\n        engineLanguage,\n        jsonExtractor)\n    logger.info(s\"Preparator params: $preparatorParams\")\n\n    val algorithmsParams: Seq[(String, Params)] =\n      variantJson findField {\n        case JField(\"algorithms\", _) => true\n        case _ => false\n      } map { jv =>\n        val algorithmsParamsJson = jv._2\n        algorithmsParamsJson match {\n          case JArray(s) => s.map { algorithmParamsJValue =>\n            val eap = algorithmParamsJValue.extract[CreateWorkflow.AlgorithmParams]\n            (\n              eap.name,\n              WorkflowUtils.extractParams(\n                engineLanguage,\n                compact(render(eap.params)),\n                algorithmClassMap(eap.name),\n                jsonExtractor)\n            )\n          }\n          case _ => Nil\n        }\n      } getOrElse Seq((\"\", EmptyParams()))\n\n    logger.info(s\"Extracting serving params...\")\n    val servingParams: (String, Params) =\n      WorkflowUtils.getParamsFromJsonByFieldAndClass(\n        variantJson,\n        \"serving\",\n        servingClassMap,\n        engineLanguage,\n        jsonExtractor)\n    logger.info(s\"Serving params: $servingParams\")\n\n    new EngineParams(\n      dataSourceParams = dataSourceParams,\n      preparatorParams = preparatorParams,\n      algorithmParamsList = algorithmsParams,\n      servingParams = servingParams)\n  }\n\n  private[predictionio] def engineInstanceToEngineParams(\n    engineInstance: EngineInstance,\n    jsonExtractor: JsonExtractorOption): EngineParams = {\n\n    implicit val formats = DefaultFormats\n    val engineLanguage = EngineLanguage.Scala\n\n    val dataSourceParamsWithName: (String, Params) = {\n      val (name, params) =\n        read[(String, JValue)](engineInstance.dataSourceParams)\n      if (!dataSourceClassMap.contains(name)) {\n        logger.error(s\"Unable to find datasource class with name '$name'\" +\n          \" defined in Engine.\")\n        sys.exit(1)\n      }\n      val extractedParams = WorkflowUtils.extractParams(\n        engineLanguage,\n        compact(render(params)),\n        dataSourceClassMap(name),\n        jsonExtractor)\n      (name, extractedParams)\n    }\n\n    val preparatorParamsWithName: (String, Params) = {\n      val (name, params) =\n        read[(String, JValue)](engineInstance.preparatorParams)\n      if (!preparatorClassMap.contains(name)) {\n        logger.error(s\"Unable to find preparator class with name '$name'\" +\n          \" defined in Engine.\")\n        sys.exit(1)\n      }\n      val extractedParams = WorkflowUtils.extractParams(\n        engineLanguage,\n        compact(render(params)),\n        preparatorClassMap(name),\n        jsonExtractor)\n      (name, extractedParams)\n    }\n\n    val algorithmsParamsWithNames =\n      read[Seq[(String, JValue)]](engineInstance.algorithmsParams).map {\n        case (algoName, params) =>\n          val extractedParams = WorkflowUtils.extractParams(\n            engineLanguage,\n            compact(render(params)),\n            algorithmClassMap(algoName),\n            jsonExtractor)\n          (algoName, extractedParams)\n      }\n\n    val servingParamsWithName: (String, Params) = {\n      val (name, params) = read[(String, JValue)](engineInstance.servingParams)\n      if (!servingClassMap.contains(name)) {\n        logger.error(s\"Unable to find serving class with name '$name'\" +\n          \" defined in Engine.\")\n        sys.exit(1)\n      }\n      val extractedParams = WorkflowUtils.extractParams(\n        engineLanguage,\n        compact(render(params)),\n        servingClassMap(name),\n        jsonExtractor)\n      (name, extractedParams)\n    }\n\n    new EngineParams(\n      dataSourceParams = dataSourceParamsWithName,\n      preparatorParams = preparatorParamsWithName,\n      algorithmParamsList = algorithmsParamsWithNames,\n      servingParams = servingParamsWithName)\n  }\n}\n\n/** This object contains concrete implementation for some methods of the\n  * [[Engine]] class.\n  *\n  * @group Engine\n  */\nobject Engine {\n  private type EX = Int\n  private type AX = Int\n  private type QX = Long\n\n  @transient lazy private val logger = Logger[this.type]\n\n  /** Helper class to accept either a single data source, or a map of data\n    * sources, with a companion object providing implicit conversions, so\n    * using this class directly is not necessary.\n    *\n    * @tparam TD Training data class\n    * @tparam EI Evaluation information class\n    * @tparam Q Input query class\n    * @tparam A Actual result class\n    */\n  class DataSourceMap[TD, EI, Q, A](\n    val m: Map[String, Class[_ <: BaseDataSource[TD, EI, Q, A]]]) {\n    def this(c: Class[_ <: BaseDataSource[TD, EI, Q, A]]) = this(Map(\"\" -> c))\n  }\n\n  /** Companion object providing implicit conversions, so using this directly\n    * is not necessary.\n    */\n  object DataSourceMap {\n    implicit def cToMap[TD, EI, Q, A](\n      c: Class[_ <: BaseDataSource[TD, EI, Q, A]]):\n      DataSourceMap[TD, EI, Q, A] = new DataSourceMap(c)\n    implicit def mToMap[TD, EI, Q, A](\n      m: Map[String, Class[_ <: BaseDataSource[TD, EI, Q, A]]]):\n      DataSourceMap[TD, EI, Q, A] = new DataSourceMap(m)\n  }\n\n  /** Helper class to accept either a single preparator, or a map of\n    * preparators, with a companion object providing implicit conversions, so\n    * using this class directly is not necessary.\n    *\n    * @tparam TD Training data class\n    * @tparam PD Prepared data class\n    */\n  class PreparatorMap[TD, PD](\n    val m: Map[String, Class[_ <: BasePreparator[TD, PD]]]) {\n    def this(c: Class[_ <: BasePreparator[TD, PD]]) = this(Map(\"\" -> c))\n  }\n\n  /** Companion object providing implicit conversions, so using this directly\n    * is not necessary.\n    */\n  object PreparatorMap {\n    implicit def cToMap[TD, PD](\n      c: Class[_ <: BasePreparator[TD, PD]]):\n      PreparatorMap[TD, PD] = new PreparatorMap(c)\n    implicit def mToMap[TD, PD](\n      m: Map[String, Class[_ <: BasePreparator[TD, PD]]]):\n      PreparatorMap[TD, PD] = new PreparatorMap(m)\n  }\n\n  /** Helper class to accept either a single serving, or a map of serving, with\n    * a companion object providing implicit conversions, so using this class\n    * directly is not necessary.\n    *\n    * @tparam Q Input query class\n    * @tparam P Predicted result class\n    */\n  class ServingMap[Q, P](\n    val m: Map[String, Class[_ <: BaseServing[Q, P]]]) {\n    def this(c: Class[_ <: BaseServing[Q, P]]) = this(Map(\"\" -> c))\n  }\n\n  /** Companion object providing implicit conversions, so using this directly\n    * is not necessary.\n    */\n  object ServingMap {\n    implicit def cToMap[Q, P](\n      c: Class[_ <: BaseServing[Q, P]]): ServingMap[Q, P] =\n        new ServingMap(c)\n    implicit def mToMap[Q, P](\n      m: Map[String, Class[_ <: BaseServing[Q, P]]]): ServingMap[Q, P] =\n        new ServingMap(m)\n  }\n\n  /** Convenient method for returning an instance of [[Engine]].\n    *\n    * @param dataSourceMap Accepts either an instance of Class of the data\n    *                      source, or a Map of data source classes (implicitly\n    *                      converted to [[DataSourceMap]].\n    * @param preparatorMap Accepts either an instance of Class of the\n    *                      preparator, or a Map of preparator classes\n    *                      (implicitly converted to [[PreparatorMap]].\n    * @param algorithmClassMap Accepts a Map of algorithm classes.\n    * @param servingMap Accepts either an instance of Class of the serving, or\n    *                   a Map of serving classes (implicitly converted to\n    *                   [[ServingMap]].\n    * @tparam TD Training data class\n    * @tparam EI Evaluation information class\n    * @tparam PD Prepared data class\n    * @tparam Q Input query class\n    * @tparam P Predicted result class\n    * @tparam A Actual result class\n    * @return An instance of [[Engine]]\n    */\n  def apply[TD, EI, PD, Q, P, A](\n    dataSourceMap: DataSourceMap[TD, EI, Q, A],\n    preparatorMap: PreparatorMap[TD, PD],\n    algorithmClassMap: Map[String, Class[_ <: BaseAlgorithm[PD, _, Q, P]]],\n    servingMap: ServingMap[Q, P]): Engine[TD, EI, PD, Q, P, A] = new Engine(\n      dataSourceMap.m,\n      preparatorMap.m,\n      algorithmClassMap,\n      servingMap.m\n    )\n\n  /** Provides concrete implementation of training for [[Engine]].\n    *\n    * @param sc An instance of SparkContext\n    * @param dataSource An instance of data source\n    * @param preparator An instance of preparator\n    * @param algorithmList A list of algorithm instances\n    * @param params An instance of [[WorkflowParams]] that controls the training\n    *               process.\n    * @tparam TD Training data class\n    * @tparam PD Prepared data class\n    * @tparam Q Input query class\n    * @return A list of trained models\n    */\n  def train[TD, PD, Q](\n      sc: SparkContext,\n      dataSource: BaseDataSource[TD, _, Q, _],\n      preparator: BasePreparator[TD, PD],\n      algorithmList: Seq[BaseAlgorithm[PD, _, Q, _]],\n      params: WorkflowParams\n    ): Seq[Any] = {\n    logger.info(\"EngineWorkflow.train\")\n    logger.info(s\"DataSource: $dataSource\")\n    logger.info(s\"Preparator: $preparator\")\n    logger.info(s\"AlgorithmList: $algorithmList\")\n\n    if (params.skipSanityCheck) {\n      logger.info(\"Data sanity check is off.\")\n    } else {\n      logger.info(\"Data sanity check is on.\")\n    }\n\n    val td = try {\n      dataSource.readTrainingBase(sc)\n    } catch {\n      case e: StorageClientException =>\n        logger.error(s\"Error occurred reading from data source. (Reason: \" +\n          e.getMessage + \") Please see the log for debugging details.\", e)\n        sys.exit(1)\n    }\n\n    if (!params.skipSanityCheck) {\n      td match {\n        case sanityCheckable: SanityCheck => {\n          logger.info(s\"${td.getClass.getName} supports data sanity\" +\n            \" check. Performing check.\")\n          sanityCheckable.sanityCheck()\n        }\n        case _ => {\n          logger.info(s\"${td.getClass.getName} does not support\" +\n            \" data sanity check. Skipping check.\")\n        }\n      }\n    }\n\n    if (params.stopAfterRead) {\n      logger.info(\"Stopping here because --stop-after-read is set.\")\n      throw StopAfterReadInterruption()\n    }\n\n    val pd = preparator.prepareBase(sc, td)\n\n    if (!params.skipSanityCheck) {\n      pd match {\n        case sanityCheckable: SanityCheck => {\n          logger.info(s\"${pd.getClass.getName} supports data sanity\" +\n            \" check. Performing check.\")\n          sanityCheckable.sanityCheck()\n        }\n        case _ => {\n          logger.info(s\"${pd.getClass.getName} does not support\" +\n            \" data sanity check. Skipping check.\")\n        }\n      }\n    }\n\n    if (params.stopAfterPrepare) {\n      logger.info(\"Stopping here because --stop-after-prepare is set.\")\n      throw StopAfterPrepareInterruption()\n    }\n\n    val models: Seq[Any] = algorithmList.map(_.trainBase(sc, pd))\n\n    if (!params.skipSanityCheck) {\n      models.foreach { model =>\n        model match {\n          case sanityCheckable: SanityCheck => {\n            logger.info(s\"${model.getClass.getName} supports data sanity\" +\n              \" check. Performing check.\")\n            sanityCheckable.sanityCheck()\n          }\n          case _ => {\n            logger.info(s\"${model.getClass.getName} does not support\" +\n              \" data sanity check. Skipping check.\")\n          }\n        }\n      }\n    }\n\n    logger.info(\"EngineWorkflow.train completed\")\n    models\n  }\n\n  /** Provides concrete implementation of evaluation for [[Engine]].\n    *\n    * @param sc An instance of SparkContext\n    * @param dataSource An instance of data source\n    * @param preparator An instance of preparator\n    * @param algorithmList A list of algorithm instances\n    * @param serving An instance of serving\n    * @tparam TD Training data class\n    * @tparam PD Prepared data class\n    * @tparam Q Input query class\n    * @tparam P Predicted result class\n    * @tparam A Actual result class\n    * @tparam EI Evaluation information class\n    * @return A list of evaluation information, RDD of query, predicted result,\n    *         and actual result tuple tuple.\n    */\n  def eval[TD, PD, Q, P, A, EI](\n      sc: SparkContext,\n      dataSource: BaseDataSource[TD, EI, Q, A],\n      preparator: BasePreparator[TD, PD],\n      algorithmList: Seq[BaseAlgorithm[PD, _, Q, P]],\n      serving: BaseServing[Q, P]): Seq[(EI, RDD[(Q, P, A)])] = {\n    logger.info(s\"DataSource: $dataSource\")\n    logger.info(s\"Preparator: $preparator\")\n    logger.info(s\"AlgorithmList: $algorithmList\")\n    logger.info(s\"Serving: $serving\")\n\n    val algoMap: Map[AX, BaseAlgorithm[PD, _, Q, P]] = algorithmList\n      .zipWithIndex\n      .map(_.swap)\n      .toMap\n    val algoCount = algoMap.size\n\n    val evalTupleMap: Map[EX, (TD, EI, RDD[(Q, A)])] = dataSource\n      .readEvalBase(sc)\n      .zipWithIndex\n      .map(_.swap)\n      .toMap\n\n    val evalCount = evalTupleMap.size\n\n    val evalTrainMap: Map[EX, TD] = evalTupleMap.mapValues(_._1)\n    val evalInfoMap: Map[EX, EI] = evalTupleMap.mapValues(_._2)\n    val evalQAsMap: Map[EX, RDD[(QX, (Q, A))]] = evalTupleMap\n      .mapValues(_._3)\n      .mapValues{ _.zipWithUniqueId().map(_.swap) }\n\n    val preparedMap: Map[EX, PD] = evalTrainMap.mapValues { td =>\n      preparator.prepareBase(sc, td)\n    }\n\n    val algoModelsMap: Map[EX, Map[AX, Any]] = preparedMap.mapValues { pd =>\n      algoMap.mapValues(_.trainBase(sc,pd))\n    }\n\n    val suppQAsMap: Map[EX, RDD[(QX, (Q, A))]] = evalQAsMap.mapValues { qas =>\n      qas.map { case (qx, (q, a)) => (qx, (serving.supplementBase(q), a)) }\n    }\n\n    val algoPredictsMap: Map[EX, RDD[(QX, Seq[P])]] = (0 until evalCount)\n    .map { ex =>\n      val modelMap: Map[AX, Any] = algoModelsMap(ex)\n\n      val qs: RDD[(QX, Q)] = suppQAsMap(ex).mapValues(_._1)\n\n      val algoPredicts: Seq[RDD[(QX, (AX, P))]] = (0 until algoCount)\n      .map { ax =>\n        val algo = algoMap(ax)\n        val model = modelMap(ax)\n        val rawPredicts: RDD[(QX, P)] = algo.batchPredictBase(sc, model, qs)\n        val predicts: RDD[(QX, (AX, P))] = rawPredicts.map { case (qx, p) =>\n          (qx, (ax, p))\n        }\n        predicts\n      }\n\n      val unionAlgoPredicts: RDD[(QX, Seq[P])] = sc.union(algoPredicts)\n      .groupByKey()\n      .mapValues { ps =>\n        assert (ps.size == algoCount, \"Must have same length as algoCount\")\n        // TODO. Check size == algoCount\n        ps.toSeq.sortBy(_._1).map(_._2)\n      }\n\n      (ex, unionAlgoPredicts)\n    }\n    .toMap\n\n    val servingQPAMap: Map[EX, RDD[(Q, P, A)]] = algoPredictsMap\n    .map { case (ex, psMap) =>\n      // The query passed to serving.serve is the original one, not\n      // supplemented.\n      val qasMap: RDD[(QX, (Q, A))] = evalQAsMap(ex)\n      val qpsaMap: RDD[(QX, Q, Seq[P], A)] = psMap.join(qasMap)\n      .map { case (qx, t) => (qx, t._2._1, t._1, t._2._2) }\n\n      val qpaMap: RDD[(Q, P, A)] = qpsaMap.map {\n        case (qx, q, ps, a) => (q, serving.serveBase(q, ps), a)\n      }\n      (ex, qpaMap)\n    }\n\n    (0 until evalCount).map { ex =>\n      (evalInfoMap(ex), servingQPAMap(ex))\n    }\n  }\n}\n\n/** Mix in this trait for queries that contain prId (PredictedResultId).\n  * This is useful when your engine expects queries to also be associated with\n  * prId keys when feedback loop is enabled.\n  *\n  * @group Helper\n  */\n@deprecated(\"To be removed in future releases.\", \"0.9.2\")\ntrait WithPrId {\n  val prId: String = \"\"\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/EngineFactory.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.core.BaseEngine\n\nimport scala.language.implicitConversions\n\n/** If you intend to let PredictionIO create workflow and deploy serving\n  * automatically, you will need to implement an object that extends this class\n  * and return an [[Engine]].\n  *\n  * @group Engine\n  */\nabstract class EngineFactory {\n  /** Creates an instance of an [[Engine]]. */\n  def apply(): BaseEngine[_, _, _, _]\n\n  /** Override this method to programmatically return engine parameters. */\n  def engineParams(key: String): EngineParams = EngineParams()\n}\n\n/** DEPRECATED. Use [[EngineFactory]] instead.\n  *\n  * @group Engine\n  */\n@deprecated(\"Use EngineFactory instead.\", \"0.9.2\")\ntrait IEngineFactory extends EngineFactory\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/EngineParams.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.core.BaseDataSource\nimport org.apache.predictionio.core.BaseAlgorithm\n\nimport scala.collection.JavaConversions\nimport scala.language.implicitConversions\n\n/** This class serves as a logical grouping of all required engine's parameters.\n  *\n  * @param dataSourceParams Data Source name-parameters tuple.\n  * @param preparatorParams Preparator name-parameters tuple.\n  * @param algorithmParamsList List of algorithm name-parameter pairs.\n  * @param servingParams Serving name-parameters tuple.\n  * @group Engine\n  */\nclass EngineParams(\n    val dataSourceParams: (String, Params) = (\"\", EmptyParams()),\n    val preparatorParams: (String, Params) = (\"\", EmptyParams()),\n    val algorithmParamsList: Seq[(String, Params)] = Nil,\n    val servingParams: (String, Params) = (\"\", EmptyParams()))\n  extends Serializable {\n\n  /** Java-friendly constructor\n    *\n    * @param dataSourceName Data Source name\n    * @param dataSourceParams Data Source parameters\n    * @param preparatorName Preparator name\n    * @param preparatorParams Preparator parameters\n    * @param algorithmParamsList Map of algorithm name-parameters\n    * @param servingName Serving name\n    * @param servingParams Serving parameters\n    */\n  def this(\n    dataSourceName: String,\n    dataSourceParams: Params,\n    preparatorName: String,\n    preparatorParams: Params,\n    algorithmParamsList: _root_.java.util.Map[String, _ <: Params],\n    servingName: String,\n    servingParams: Params) = {\n\n    // To work around a json4s weird limitation, the parameter names can not be changed\n    this(\n      (dataSourceName, dataSourceParams),\n      (preparatorName, preparatorParams),\n      JavaConversions.mapAsScalaMap(algorithmParamsList).toSeq,\n      (servingName, servingParams)\n    )\n  }\n\n  // A case class style copy method.\n  def copy(\n    dataSourceParams: (String, Params) = dataSourceParams,\n    preparatorParams: (String, Params) = preparatorParams,\n    algorithmParamsList: Seq[(String, Params)] = algorithmParamsList,\n    servingParams: (String, Params) = servingParams): EngineParams = {\n\n    new EngineParams(\n      dataSourceParams,\n      preparatorParams,\n      algorithmParamsList,\n      servingParams)\n  }\n}\n\n/** Companion object for creating [[EngineParams]] instances.\n  *\n  * @group Engine\n  */\nobject EngineParams {\n  /** Create EngineParams.\n    *\n    * @param dataSourceName Data Source name\n    * @param dataSourceParams Data Source parameters\n    * @param preparatorName Preparator name\n    * @param preparatorParams Preparator parameters\n    * @param algorithmParamsList List of algorithm name-parameter pairs.\n    * @param servingName Serving name\n    * @param servingParams Serving parameters\n    */\n  def apply(\n    dataSourceName: String = \"\",\n    dataSourceParams: Params = EmptyParams(),\n    preparatorName: String = \"\",\n    preparatorParams: Params = EmptyParams(),\n    algorithmParamsList: Seq[(String, Params)] = Nil,\n    servingName: String = \"\",\n    servingParams: Params = EmptyParams()): EngineParams = {\n      new EngineParams(\n        dataSourceParams = (dataSourceName, dataSourceParams),\n        preparatorParams = (preparatorName, preparatorParams),\n        algorithmParamsList = algorithmParamsList,\n        servingParams = (servingName, servingParams)\n      )\n    }\n}\n\n/** SimpleEngine has only one algorithm, and uses default preparator and serving\n  * layer. Current default preparator is `IdentityPreparator` and serving is\n  * `FirstServing`.\n  *\n  * @tparam TD Training data class.\n  * @tparam EI Evaluation info class.\n  * @tparam Q Input query class.\n  * @tparam P Output prediction class.\n  * @tparam A Actual value class.\n  * @param dataSourceClass Data source class.\n  * @param algorithmClass of algorithm names to classes.\n  * @group Engine\n  */\nclass SimpleEngine[TD, EI, Q, P, A](\n    dataSourceClass: Class[_ <: BaseDataSource[TD, EI, Q, A]],\n    algorithmClass: Class[_ <: BaseAlgorithm[TD, _, Q, P]])\n  extends Engine(\n    dataSourceClass,\n    IdentityPreparator(dataSourceClass),\n    Map(\"\" -> algorithmClass),\n    LFirstServing(algorithmClass))\n\n/** This shorthand class serves the `SimpleEngine` class.\n  *\n  * @param dataSourceParams Data source parameters.\n  * @param algorithmParams List of algorithm name-parameter pairs.\n  * @group Engine\n  */\nclass SimpleEngineParams(\n    dataSourceParams: Params = EmptyParams(),\n    algorithmParams: Params = EmptyParams())\n  extends EngineParams(\n    dataSourceParams = (\"\", dataSourceParams),\n    algorithmParamsList = Seq((\"\", algorithmParams)))\n\n\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/EngineParamsGenerator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport scala.language.implicitConversions\n\n/** Defines an engine parameters generator.\n  *\n  * Implementations of this trait can be supplied to \"pio eval\" as the second\n  * command line argument.\n  *\n  * @group Evaluation\n  */\ntrait EngineParamsGenerator {\n  protected[this] var epList: Seq[EngineParams] = _\n  protected[this] var epListSet: Boolean = false\n\n  /** Returns the list of [[EngineParams]] of this [[EngineParamsGenerator]]. */\n  def engineParamsList: Seq[EngineParams] = {\n    assert(epListSet, \"EngineParamsList not set\")\n    epList\n  }\n\n  /** Sets the list of [[EngineParams]] of this [[EngineParamsGenerator]]. */\n  def engineParamsList_=(l: Seq[EngineParams]) {\n    assert(!epListSet, \"EngineParamsList can bet set at most once\")\n    epList = Seq(l:_*)\n    epListSet = true\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/Evaluation.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.core.BaseEngine\nimport org.apache.predictionio.core.BaseEvaluator\nimport org.apache.predictionio.core.BaseEvaluatorResult\n\nimport scala.language.implicitConversions\n\n/** Defines an evaluation that contains an engine and a metric.\n  *\n  * Implementations of this trait can be supplied to \"pio eval\" as the first\n  * argument.\n  *\n  * @group Evaluation\n  */\ntrait Evaluation extends Deployment {\n  protected [this] var _evaluatorSet: Boolean = false\n  protected [this] var _evaluator: BaseEvaluator[_, _, _, _, _ <: BaseEvaluatorResult] = _\n\n  private[predictionio]\n  def evaluator: BaseEvaluator[_, _, _, _, _ <: BaseEvaluatorResult] = {\n    assert(_evaluatorSet, \"Evaluator not set\")\n    _evaluator\n  }\n\n  /** Gets the tuple of the [[Engine]] and the implementation of\n    * [[org.apache.predictionio.core.BaseEvaluator]]\n    */\n  def engineEvaluator\n  : (BaseEngine[_, _, _, _], BaseEvaluator[_, _, _, _, _]) = {\n    assert(_evaluatorSet, \"Evaluator not set\")\n    (engine, _evaluator)\n  }\n\n  /** Sets both an [[Engine]] and an implementation of\n    * [[org.apache.predictionio.core.BaseEvaluator]] for this [[Evaluation]]\n    *\n    * @param engineEvaluator A tuple an [[Engine]] and an implementation of\n    *                        [[org.apache.predictionio.core.BaseEvaluator]]\n    * @tparam EI Evaluation information class\n    * @tparam Q Query class\n    * @tparam P Predicted result class\n    * @tparam A Actual result class\n    * @tparam R Metric result class\n    */\n  def engineEvaluator_=[EI, Q, P, A, R <: BaseEvaluatorResult](\n    engineEvaluator: (\n      BaseEngine[EI, Q, P, A],\n      BaseEvaluator[EI, Q, P, A, R])) {\n    assert(!_evaluatorSet, \"Evaluator can be set at most once\")\n    engine = engineEvaluator._1\n    _evaluator = engineEvaluator._2\n    _evaluatorSet = true\n  }\n\n  /** Returns both the [[Engine]] and the implementation of [[Metric]] for this\n    * [[Evaluation]]\n    */\n  def engineMetric: (BaseEngine[_, _, _, _], Metric[_, _, _, _, _]) = {\n    throw new NotImplementedError(\"This method is to keep the compiler happy\")\n  }\n\n  /** Sets both an [[Engine]] and an implementation of [[Metric]] for this\n    * [[Evaluation]]\n    *\n    * @param engineMetric A tuple of [[Engine]] and an implementation of\n    *                     [[Metric]]\n    * @tparam EI Evaluation information class\n    * @tparam Q Query class\n    * @tparam P Predicted result class\n    * @tparam A Actual result class\n    */\n  def engineMetric_=[EI, Q, P, A](\n    engineMetric: (BaseEngine[EI, Q, P, A], Metric[EI, Q, P, A, _])) {\n    engineEvaluator = (\n      engineMetric._1,\n      MetricEvaluator(\n        metric = engineMetric._2,\n        otherMetrics = Seq[Metric[EI, Q, P, A, _]](),\n        outputPath = \"best.json\"))\n  }\n\n  private[predictionio]\n  def engineMetrics: (BaseEngine[_, _, _, _], Metric[_, _, _, _, _]) = {\n    throw new NotImplementedError(\"This method is to keep the compiler happy\")\n  }\n\n  /** Sets an [[Engine]], an implementation of [[Metric]], and sequence of\n    * implementations of [[Metric]] for this [[Evaluation]]\n    *\n    * @param engineMetrics A tuple of [[Engine]], an implementation of\n    *                      [[Metric]] and sequence of implementations of [[Metric]]\n    * @tparam EI Evaluation information class\n    * @tparam Q Query class\n    * @tparam P Predicted result class\n    * @tparam A Actual result class\n    */\n  def engineMetrics_=[EI, Q, P, A](\n    engineMetrics: (\n      BaseEngine[EI, Q, P, A],\n      Metric[EI, Q, P, A, _],\n      Seq[Metric[EI, Q, P, A, _]])) {\n    engineEvaluator = (\n      engineMetrics._1,\n      MetricEvaluator(engineMetrics._2, engineMetrics._3))\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/FastEvalEngine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.core.BaseDataSource\nimport org.apache.predictionio.core.BasePreparator\nimport org.apache.predictionio.core.BaseAlgorithm\nimport org.apache.predictionio.core.BaseServing\nimport org.apache.predictionio.core.Doer\nimport org.apache.predictionio.annotation.Experimental\n\nimport grizzled.slf4j.Logger\nimport org.apache.predictionio.workflow.WorkflowParams\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nimport scala.language.implicitConversions\n\nimport _root_.java.util.NoSuchElementException\n\nimport scala.collection.mutable.{ HashMap => MutableHashMap }\n\n/** :: Experimental ::\n  * Workflow based on [[FastEvalEngine]]\n  *\n  * @group Evaluation\n  */\n@Experimental\nobject FastEvalEngineWorkflow  {\n  @transient lazy val logger = Logger[this.type]\n\n  type EX = Int\n  type AX = Int\n  type QX = Long\n\n  case class DataSourcePrefix(dataSourceParams: (String, Params)) {\n    def this(pp: PreparatorPrefix) = this(pp.dataSourceParams)\n    def this(ap: AlgorithmsPrefix) = this(ap.dataSourceParams)\n    def this(sp: ServingPrefix) = this(sp.dataSourceParams)\n  }\n\n  case class PreparatorPrefix(\n    dataSourceParams: (String, Params),\n    preparatorParams: (String, Params)) {\n    def this(ap: AlgorithmsPrefix) = {\n      this(ap.dataSourceParams, ap.preparatorParams)\n    }\n  }\n\n  case class AlgorithmsPrefix(\n    dataSourceParams: (String, Params),\n    preparatorParams: (String, Params),\n    algorithmParamsList: Seq[(String, Params)]) {\n    def this(sp: ServingPrefix) = {\n      this(sp.dataSourceParams, sp.preparatorParams, sp.algorithmParamsList)\n    }\n  }\n\n  case class ServingPrefix(\n    dataSourceParams: (String, Params),\n    preparatorParams: (String, Params),\n    algorithmParamsList: Seq[(String, Params)],\n    servingParams: (String, Params)) {\n    def this(ep: EngineParams) = this(\n      ep.dataSourceParams,\n      ep.preparatorParams,\n      ep.algorithmParamsList,\n      ep.servingParams)\n  }\n\n  def getDataSourceResult[TD, EI, PD, Q, P, A](\n    workflow: FastEvalEngineWorkflow[TD, EI, PD, Q, P, A],\n    prefix: DataSourcePrefix)\n  : Map[EX, (TD, EI, RDD[(QX, (Q, A))])] = {\n    val cache = workflow.dataSourceCache\n\n    if (!cache.contains(prefix)) {\n      val dataSource = Doer(\n        workflow.engine.dataSourceClassMap(prefix.dataSourceParams._1),\n        prefix.dataSourceParams._2)\n\n      val result = dataSource\n      .readEvalBase(workflow.sc)\n      .map { case (td, ei, qaRDD) => {\n        (td, ei, qaRDD.zipWithUniqueId().map(_.swap))\n      }}\n      .zipWithIndex\n      .map(_.swap)\n      .toMap\n\n      cache += Tuple2(prefix, result)\n    }\n    cache(prefix)\n  }\n\n  def getPreparatorResult[TD, EI, PD, Q, P, A](\n    workflow: FastEvalEngineWorkflow[TD, EI, PD, Q, P, A],\n    prefix: PreparatorPrefix): Map[EX, PD] = {\n    val cache = workflow.preparatorCache\n\n    if (!cache.contains(prefix)) {\n      val preparator = Doer(\n        workflow.engine.preparatorClassMap(prefix.preparatorParams._1),\n        prefix.preparatorParams._2)\n\n      val result = getDataSourceResult(\n        workflow = workflow,\n        prefix = new DataSourcePrefix(prefix))\n      .mapValues { case (td, _, _) => preparator.prepareBase(workflow.sc, td) }\n\n      cache += Tuple2(prefix, result)\n    }\n    cache(prefix)\n  }\n\n  def computeAlgorithmsResult[TD, EI, PD, Q, P, A](\n    workflow: FastEvalEngineWorkflow[TD, EI, PD, Q, P, A],\n    prefix: AlgorithmsPrefix): Map[EX, RDD[(QX, Seq[P])]] = {\n\n    val algoMap: Map[AX, BaseAlgorithm[PD, _, Q, P]] = prefix.algorithmParamsList\n      .map { case (algoName, algoParams) => {\n        try {\n          Doer(workflow.engine.algorithmClassMap(algoName), algoParams)\n        } catch {\n          case e: NoSuchElementException => {\n            val algorithmClassMap = workflow.engine.algorithmClassMap\n            if (algoName == \"\") {\n              logger.error(\"Empty algorithm name supplied but it could not \" +\n                \"match with any algorithm in the engine's definition. \" +\n                \"Existing algorithm name(s) are: \" +\n                s\"${algorithmClassMap.keys.mkString(\", \")}. Aborting.\")\n            } else {\n              logger.error(s\"${algoName} cannot be found in the engine's \" +\n                \"definition. Existing algorithm name(s) are: \" +\n                s\"${algorithmClassMap.keys.mkString(\", \")}. Aborting.\")\n            }\n            sys.exit(1)\n          }\n        }\n      }}\n      .zipWithIndex\n      .map(_.swap)\n      .toMap\n\n    val algoCount = algoMap.size\n\n    // Model Train\n    val algoModelsMap: Map[EX, Map[AX, Any]] = getPreparatorResult(\n      workflow,\n      new PreparatorPrefix(prefix))\n    .mapValues {\n      pd => algoMap.mapValues(_.trainBase(workflow.sc,pd))\n    }\n\n    // Predict\n    val dataSourceResult =\n      FastEvalEngineWorkflow.getDataSourceResult(\n        workflow = workflow,\n        prefix = new DataSourcePrefix(prefix))\n\n    val algoResult: Map[EX, RDD[(QX, Seq[P])]] = dataSourceResult\n    .par\n    .map { case (ex, (td, ei, iqaRDD)) => {\n      val modelsMap: Map[AX, Any] = algoModelsMap(ex)\n      val qs: RDD[(QX, Q)] = iqaRDD.mapValues(_._1)\n\n      val algoPredicts: Seq[RDD[(QX, (AX, P))]] = (0 until algoCount)\n      .map { ax => {\n        val algo = algoMap(ax)\n        val model = modelsMap(ax)\n        val rawPredicts: RDD[(QX, P)] = algo.batchPredictBase(\n          workflow.sc,\n          model,\n          qs)\n\n        val predicts: RDD[(QX, (AX, P))] = rawPredicts.map {\n          case (qx, p) => (qx, (ax, p))\n        }\n        predicts\n      }}\n\n      val unionAlgoPredicts: RDD[(QX, Seq[P])] = workflow.sc\n      .union(algoPredicts)\n      .groupByKey\n      .mapValues { ps => {\n        assert (ps.size == algoCount, \"Must have same length as algoCount\")\n        // TODO. Check size == algoCount\n        ps.toSeq.sortBy(_._1).map(_._2)\n      }}\n      (ex, unionAlgoPredicts)\n    }}\n    .seq\n    .toMap\n\n    algoResult\n  }\n\n  def getAlgorithmsResult[TD, EI, PD, Q, P, A](\n    workflow: FastEvalEngineWorkflow[TD, EI, PD, Q, P, A],\n    prefix: AlgorithmsPrefix): Map[EX, RDD[(QX, Seq[P])]] = {\n    val cache = workflow.algorithmsCache\n    if (!cache.contains(prefix)) {\n      val result = computeAlgorithmsResult(workflow, prefix)\n      cache += Tuple2(prefix, result)\n    }\n    cache(prefix)\n  }\n\n  def getServingResult[TD, EI, PD, Q, P, A](\n    workflow: FastEvalEngineWorkflow[TD, EI, PD, Q, P, A],\n    prefix: ServingPrefix)\n  : Seq[(EI, RDD[(Q, P, A)])] = {\n    val cache = workflow.servingCache\n    if (!cache.contains(prefix)) {\n      val serving = Doer(\n        workflow.engine.servingClassMap(prefix.servingParams._1),\n        prefix.servingParams._2)\n\n      val algoPredictsMap = getAlgorithmsResult(\n        workflow = workflow,\n        prefix = new AlgorithmsPrefix(prefix))\n\n      val dataSourceResult = getDataSourceResult(\n        workflow = workflow,\n        prefix = new DataSourcePrefix(prefix))\n\n      val evalQAsMap = dataSourceResult.mapValues(_._3)\n      val evalInfoMap = dataSourceResult.mapValues(_._2)\n\n      val servingQPAMap: Map[EX, RDD[(Q, P, A)]] = algoPredictsMap\n      .map { case (ex, psMap) => {\n        val qasMap: RDD[(QX, (Q, A))] = evalQAsMap(ex)\n        val qpsaMap: RDD[(QX, Q, Seq[P], A)] = psMap.join(qasMap)\n        .map { case (qx, t) => (qx, t._2._1, t._1, t._2._2) }\n\n        val qpaMap: RDD[(Q, P, A)] = qpsaMap.map {\n          case (qx, q, ps, a) => (q, serving.serveBase(q, ps), a)\n        }\n        (ex, qpaMap)\n      }}\n\n      val servingResult = (0 until evalQAsMap.size).map { ex => {\n        (evalInfoMap(ex), servingQPAMap(ex))\n      }}\n      .toSeq\n\n      cache += Tuple2(prefix, servingResult)\n    }\n    cache(prefix)\n  }\n\n  def get[TD, EI, PD, Q, P, A](\n    workflow: FastEvalEngineWorkflow[TD, EI, PD, Q, P, A],\n    engineParamsList: Seq[EngineParams])\n  : Seq[(EngineParams, Seq[(EI, RDD[(Q, P, A)])])] = {\n    engineParamsList.map { engineParams =>\n      (engineParams,\n        getServingResult(workflow, new ServingPrefix(engineParams)))\n    }\n  }\n}\n\n/** :: Experimental ::\n  * Workflow based on [[FastEvalEngine]]\n  *\n  * @group Evaluation\n  */\n@Experimental\nclass FastEvalEngineWorkflow[TD, EI, PD, Q, P, A](\n  val engine: FastEvalEngine[TD, EI, PD, Q, P, A],\n  val sc: SparkContext,\n  val workflowParams: WorkflowParams) extends Serializable {\n\n  import org.apache.predictionio.controller.FastEvalEngineWorkflow._\n\n  type DataSourceResult = Map[EX, (TD, EI, RDD[(QX, (Q, A))])]\n  type PreparatorResult = Map[EX, PD]\n  type AlgorithmsResult = Map[EX, RDD[(QX, Seq[P])]]\n  type ServingResult = Seq[(EI, RDD[(Q, P, A)])]\n\n  val dataSourceCache = MutableHashMap[DataSourcePrefix, DataSourceResult]()\n  val preparatorCache = MutableHashMap[PreparatorPrefix, PreparatorResult]()\n  val algorithmsCache = MutableHashMap[AlgorithmsPrefix, AlgorithmsResult]()\n  val servingCache = MutableHashMap[ServingPrefix, ServingResult]()\n}\n\n\n\n/** :: Experimental ::\n  * FastEvalEngine is a subclass of [[Engine]] that exploits the immutability of\n  * controllers to optimize the evaluation process\n  *\n  * @group Evaluation\n  */\n@Experimental\nclass FastEvalEngine[TD, EI, PD, Q, P, A](\n    dataSourceClassMap: Map[String, Class[_ <: BaseDataSource[TD, EI, Q, A]]],\n    preparatorClassMap: Map[String, Class[_ <: BasePreparator[TD, PD]]],\n    algorithmClassMap: Map[String, Class[_ <: BaseAlgorithm[PD, _, Q, P]]],\n    servingClassMap: Map[String, Class[_ <: BaseServing[Q, P]]])\n  extends Engine[TD, EI, PD, Q, P, A](\n    dataSourceClassMap,\n    preparatorClassMap,\n    algorithmClassMap,\n    servingClassMap) {\n  @transient override lazy val logger = Logger[this.type]\n\n  override def eval(\n    sc: SparkContext,\n    engineParams: EngineParams,\n    params: WorkflowParams): Seq[(EI, RDD[(Q, P, A)])] = {\n    logger.info(\"FastEvalEngine.eval\")\n    batchEval(sc, Seq(engineParams), params).head._2\n  }\n\n  override def batchEval(\n    sc: SparkContext,\n    engineParamsList: Seq[EngineParams],\n    params: WorkflowParams)\n  : Seq[(EngineParams, Seq[(EI, RDD[(Q, P, A)])])] = {\n\n    val fastEngineWorkflow = new FastEvalEngineWorkflow(\n      this, sc, params)\n\n    FastEvalEngineWorkflow.get(\n      fastEngineWorkflow,\n      engineParamsList)\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/IdentityPreparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.core.BaseDataSource\nimport org.apache.predictionio.core.BasePreparator\nimport org.apache.spark.SparkContext\n\n/** A helper concrete implementation of [[org.apache.predictionio.core.BasePreparator]]\n  * that passes training data through without any special preparation. This can\n  * be used in place for both [[PPreparator]] and [[LPreparator]].\n  *\n  * @tparam TD Training data class.\n  * @group Preparator\n  */\nclass IdentityPreparator[TD] extends BasePreparator[TD, TD] {\n  override def prepareBase(sc: SparkContext, td: TD): TD = td\n}\n\n/** Companion object of [[IdentityPreparator]] that conveniently returns an\n  * instance of the class of [[IdentityPreparator]] for use with\n  * [[EngineFactory]].\n  *\n  * @group Preparator\n  */\nobject IdentityPreparator {\n  /** Produces an instance of the class of [[IdentityPreparator]].\n    *\n    * @param ds Instance of the class of the data source for this preparator.\n    */\n  def apply[TD](ds: Class[_ <: BaseDataSource[TD, _, _, _]]): Class[IdentityPreparator[TD]] =\n    classOf[IdentityPreparator[TD]]\n}\n\n/** DEPRECATED. Use [[IdentityPreparator]] instead.\n  *\n  * @tparam TD Training data class.\n  * @group Preparator\n  */\nclass PIdentityPreparator[TD] extends IdentityPreparator[TD]\n\n/** DEPRECATED. Use [[IdentityPreparator]] instead.\n  *\n  * @group Preparator\n  */\nobject PIdentityPreparator {\n  /** Produces an instance of the class of [[IdentityPreparator]].\n    *\n    * @param ds Instance of the class of the data source for this preparator.\n    */\n  @deprecated(\"Use IdentityPreparator instead.\", \"0.9.2\")\n  def apply[TD](ds: Class[_ <: BaseDataSource[TD, _, _, _]]): Class[IdentityPreparator[TD]] =\n    classOf[IdentityPreparator[TD]]\n}\n\n/** DEPRECATED. Use [[IdentityPreparator]] instead.\n  *\n  * @tparam TD Training data class.\n  * @group Preparator\n  */\nclass LIdentityPreparator[TD] extends IdentityPreparator[TD]\n\n/** DEPRECATED. Use [[IdentityPreparator]] instead.\n  *\n  * @group Preparator\n  */\nobject LIdentityPreparator {\n  /** Produces an instance of the class of [[IdentityPreparator]].\n    *\n    * @param ds Instance of the class of the data source for this preparator.\n    */\n  @deprecated(\"Use IdentityPreparator instead.\", \"0.9.2\")\n  def apply[TD](ds: Class[_ <: BaseDataSource[TD, _, _, _]]): Class[IdentityPreparator[TD]] =\n    classOf[IdentityPreparator[TD]]\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/LAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport _root_.org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.predictionio.core.BaseAlgorithm\nimport org.apache.predictionio.workflow.PersistentModelManifest\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\nimport scala.reflect._\n\n/** Base class of a local algorithm.\n  *\n  * A local algorithm runs locally within a single machine and produces a model\n  * that can fit within a single machine.\n  *\n  * If your input query class requires custom JSON4S serialization, the most\n  * idiomatic way is to implement a trait that extends [[CustomQuerySerializer]],\n  * and mix that into your algorithm class, instead of overriding\n  * [[querySerializer]] directly.\n  *\n  * @tparam PD Prepared data class.\n  * @tparam M Trained model class.\n  * @tparam Q Input query class.\n  * @tparam P Output prediction class.\n  * @group Algorithm\n  */\nabstract class LAlgorithm[PD, M : ClassTag, Q, P]\n  extends BaseAlgorithm[RDD[PD], RDD[M], Q, P] {\n\n  override def trainBase(sc: SparkContext, pd: RDD[PD]): RDD[M] = pd.map(train)\n\n  /** Implement this method to produce a model from prepared data.\n    *\n    * @param pd Prepared data for model training.\n    * @return Trained model.\n    */\n  def train(pd: PD): M\n\n  override def batchPredictBase(sc: SparkContext, bm: Any, qs: RDD[(Long, Q)])\n  : RDD[(Long, P)] = {\n    val mRDD = bm.asInstanceOf[RDD[M]]\n    batchPredict(mRDD, qs)\n  }\n\n  /** This is a default implementation to perform batch prediction. Override\n    * this method for a custom implementation.\n    *\n    * @param mRDD A single model wrapped inside an RDD\n    * @param qs An RDD of index-query tuples. The index is used to keep track of\n    *           predicted results with corresponding queries.\n    * @return Batch of predicted results\n    */\n  def batchPredict(mRDD: RDD[M], qs: RDD[(Long, Q)]): RDD[(Long, P)] = {\n    val glomQs: RDD[Array[(Long, Q)]] = qs.glom()\n    val cartesian: RDD[(M, Array[(Long, Q)])] = mRDD.cartesian(glomQs)\n    cartesian.flatMap { case (m, qArray) =>\n      qArray.map { case (qx, q) => (qx, predict(m, q)) }\n    }\n  }\n\n  override def predictBase(localBaseModel: Any, q: Q): P = {\n    predict(localBaseModel.asInstanceOf[M], q)\n  }\n\n  /** Implement this method to produce a prediction from a query and trained\n    * model.\n    *\n    * @param m Trained model produced by [[train]].\n    * @param q An input query.\n    * @return A prediction.\n    */\n  def predict(m: M, q: Q): P\n\n  /** :: DeveloperApi ::\n    * Engine developers should not use this directly (read on to see how local\n    * algorithm models are persisted).\n    *\n    * Local algorithms produce local models. By default, models will be\n    * serialized and stored automatically. Engine developers can override this behavior by\n    * mixing the [[PersistentModel]] trait into the model class, and\n    * PredictionIO will call [[PersistentModel.save]] instead. If it returns\n    * true, a [[org.apache.predictionio.workflow.PersistentModelManifest]] will be\n    * returned so that during deployment, PredictionIO will use\n    * [[PersistentModelLoader]] to retrieve the model. Otherwise, Unit will be\n    * returned and the model will be re-trained on-the-fly.\n    *\n    * @param sc Spark context\n    * @param modelId Model ID\n    * @param algoParams Algorithm parameters that trained this model\n    * @param bm Model\n    * @return The model itself for automatic persistence, an instance of\n    *         [[org.apache.predictionio.workflow.PersistentModelManifest]] for manual\n    *         persistence, or Unit for re-training on deployment\n    */\n  @DeveloperApi\n  override\n  def makePersistentModel(\n    sc: SparkContext,\n    modelId: String,\n    algoParams: Params,\n    bm: Any): Any = {\n    // Check RDD[M].count == 1\n    val m = bm.asInstanceOf[RDD[M]].first()\n    m match {\n      case m: PersistentModel[Params] @unchecked =>\n        if(m.save(modelId, algoParams, sc)){\n          PersistentModelManifest(className = m.getClass.getName)\n        } else ()\n      case _ => m\n    }\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/LAverageServing.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.core.BaseAlgorithm\n\n/** A concrete implementation of [[LServing]] returning the average of all\n  * algorithms' predictions, where their classes are expected to be all Double.\n  *\n  * @group Serving\n  */\nclass LAverageServing[Q] extends LServing[Q, Double] {\n  /** Returns the average of all algorithms' predictions. */\n  override def serve(query: Q, predictions: Seq[Double]): Double = {\n    predictions.sum / predictions.length\n  }\n}\n\n/** A concrete implementation of [[LServing]] returning the average of all\n  * algorithms' predictions, where their classes are expected to be all Double.\n  *\n  * @group Serving\n  */\nobject LAverageServing {\n  /** Returns an instance of [[LAverageServing]]. */\n  def apply[Q](a: Class[_ <: BaseAlgorithm[_, _, Q, _]]): Class[LAverageServing[Q]] =\n    classOf[LAverageServing[Q]]\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/LDataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.core.BaseDataSource\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\nimport scala.reflect._\n\n/** Base class of a local data source.\n  *\n  * A local data source runs locally within a single machine and return data\n  * that can fit within a single machine.\n  *\n  * @tparam TD Training data class.\n  * @tparam EI Evaluation Info class.\n  * @tparam Q Input query class.\n  * @tparam A Actual value class.\n  * @group Data Source\n  */\nabstract class LDataSource[TD: ClassTag, EI, Q, A]\n  extends BaseDataSource[RDD[TD], EI, Q, A] {\n\n  override def readTrainingBase(sc: SparkContext): RDD[TD] = {\n    sc.parallelize(Seq(None)).map(_ => readTraining())\n  }\n\n  /** Implement this method to only return training data from a data source */\n  def readTraining(): TD\n\n  override def readEvalBase(sc: SparkContext): Seq[(RDD[TD], EI, RDD[(Q, A)])] = {\n    val localEvalData: Seq[(TD, EI, Seq[(Q, A)])] = readEval()\n\n    localEvalData.map { case (td, ei, qaSeq) => {\n      val tdRDD = sc.parallelize(Seq(None)).map(_ => td)\n      val qaRDD = sc.parallelize(qaSeq)\n      (tdRDD, ei, qaRDD)\n    }}\n  }\n\n  /** To provide evaluation feature for your engine, your must override this\n    * method to return data for evaluation from a data source. Returned data can\n    * optionally include a sequence of query and actual value pairs for\n    * evaluation purpose.\n    *\n    * The default implementation returns an empty sequence as a stub, so that\n    * an engine can be compiled without implementing evaluation.\n    */\n  def readEval(): Seq[(TD, EI, Seq[(Q, A)])] = Seq[(TD, EI, Seq[(Q, A)])]()\n\n  @deprecated(\"Use readEval() instead.\", \"0.9.0\")\n  def read(): Seq[(TD, EI, Seq[(Q, A)])] = readEval()\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/LFirstServing.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.core.BaseAlgorithm\n\n/** A concrete implementation of [[LServing]] returning the first algorithm's\n  * prediction result directly without any modification.\n  *\n  * @group Serving\n  */\nclass LFirstServing[Q, P] extends LServing[Q, P] {\n  /** Returns the first algorithm's prediction. */\n  override def serve(query: Q, predictions: Seq[P]): P = predictions.head\n}\n\n/** A concrete implementation of [[LServing]] returning the first algorithm's\n  * prediction result directly without any modification.\n  *\n  * @group Serving\n  */\nobject LFirstServing {\n  /** Returns an instance of [[LFirstServing]]. */\n  def apply[Q, P](a: Class[_ <: BaseAlgorithm[_, _, Q, P]]): Class[LFirstServing[Q, P]] =\n    classOf[LFirstServing[Q, P]]\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/LPreparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.core.BasePreparator\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\nimport scala.reflect._\n\n/** Base class of a local preparator.\n  *\n  * A local preparator runs locally within a single machine and produces\n  * prepared data that can fit within a single machine.\n  *\n  * @tparam TD Training data class.\n  * @tparam PD Prepared data class.\n  * @group Preparator\n  */\nabstract class LPreparator[TD, PD : ClassTag]\n  extends BasePreparator[RDD[TD], RDD[PD]] {\n\n  override def prepareBase(sc: SparkContext, rddTd: RDD[TD]): RDD[PD] = {\n    rddTd.map(prepare)\n  }\n\n  /** Implement this method to produce prepared data that is ready for model\n    * training.\n    *\n    * @param trainingData Training data to be prepared.\n    */\n  def prepare(trainingData: TD): PD\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/LServing.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.annotation.Experimental\nimport org.apache.predictionio.core.BaseServing\n\n/** Base class of serving.\n  *\n  * @tparam Q Input query class.\n  * @tparam P Output prediction class.\n  * @group Serving\n  */\nabstract class LServing[Q, P] extends BaseServing[Q, P] {\n  override def supplementBase(q: Q): Q = supplement(q)\n\n  /** :: Experimental ::\n    * Implement this method to supplement the query before sending it to\n    * algorithms.\n    *\n    * @param q Query\n    * @return A supplemented Query\n    */\n  @Experimental\n  def supplement(q: Q): Q = q\n\n  override def serveBase(q: Q, ps: Seq[P]): P = {\n    serve(q, ps)\n  }\n\n  /** Implement this method to combine multiple algorithms' predictions to\n    * produce a single final prediction. The query is the original query sent to\n    * the engine, not the supplemented produced by [[LServing.supplement]].\n    *\n    * @param query Original input query.\n    * @param predictions A list of algorithms' predictions.\n    */\n  def serve(query: Q, predictions: Seq[P]): P\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/LocalFileSystemPersistentModel.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.spark.SparkContext\n\n/** This trait is a convenience helper for persisting your model to the local\n  * filesystem. This trait and [[LocalFileSystemPersistentModelLoader]] contain\n  * concrete implementation and need not be implemented.\n  *\n  * The underlying implementation is [[Utils.save]].\n  *\n  * {{{\n  * class MyModel extends LocalFileSystemPersistentModel[MyParams] {\n  *   ...\n  * }\n  *\n  * object MyModel extends LocalFileSystemPersistentModelLoader[MyParams, MyModel] {\n  *   ...\n  * }\n  * }}}\n  *\n  * @tparam AP Algorithm parameters class.\n  * @see [[LocalFileSystemPersistentModelLoader]]\n  * @group Algorithm\n  */\ntrait LocalFileSystemPersistentModel[AP <: Params] extends PersistentModel[AP] {\n  override def save(id: String, params: AP, sc: SparkContext): Boolean = {\n    Utils.save(id, this)\n    true\n  }\n}\n\n/** Implement an object that extends this trait for PredictionIO to support\n  * loading a persisted model from local filesystem during serving deployment.\n  *\n  * The underlying implementation is [[Utils.load]].\n  *\n  * @tparam AP Algorithm parameters class.\n  * @tparam M Model class.\n  * @see [[LocalFileSystemPersistentModel]]\n  * @group Algorithm\n  */\ntrait LocalFileSystemPersistentModelLoader[AP <: Params, M]\n  extends PersistentModelLoader[AP, M] {\n  override def apply(id: String, params: AP, sc: Option[SparkContext]): M = {\n    Utils.load(id).asInstanceOf[M]\n  }\n}\n\n/** DEPRECATED. Use [[LocalFileSystemPersistentModel]] instead.\n  *\n  * @group Algorithm */\n@deprecated(\"Use LocalFileSystemPersistentModel instead.\", \"0.9.2\")\ntrait IFSPersistentModel[AP <: Params] extends LocalFileSystemPersistentModel[AP]\n\n/** DEPRECATED. Use [[LocalFileSystemPersistentModelLoader]] instead.\n  *\n  * @group Algorithm */\n@deprecated(\"Use LocalFileSystemPersistentModelLoader instead.\", \"0.9.2\")\ntrait IFSPersistentModelLoader[AP <: Params, M] extends LocalFileSystemPersistentModelLoader[AP, M]\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/Metric.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport _root_.org.apache.predictionio.controller.java.SerializableComparator\nimport org.apache.predictionio.core.BaseEngine\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.util.StatCounter\n\nimport scala.Numeric.Implicits._\nimport scala.reflect._\n\n/** Base class of a [[Metric]].\n  *\n  * @tparam EI Evaluation information\n  * @tparam Q Query\n  * @tparam P Predicted result\n  * @tparam A Actual result\n  * @tparam R Metric result\n  * @group Evaluation\n  */\nabstract class Metric[EI, Q, P, A, R](implicit rOrder: Ordering[R])\nextends Serializable {\n  /** Java friendly constructor\n    *\n    * @param comparator A serializable comparator for sorting the metric results.\n    *\n    */\n  def this(comparator: SerializableComparator[R]) = {\n    this()(Ordering.comparatorToOrdering(comparator))\n  }\n\n  /** Class name of this [[Metric]]. */\n  def header: String = this.getClass.getSimpleName\n\n  /** Calculates the result of this [[Metric]]. */\n  def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])]): R\n\n  /** Comparison function for R's ordering. */\n  def compare(r0: R, r1: R): Int = rOrder.compare(r0, r1)\n}\n\nprivate[predictionio] trait StatsMetricHelper[EI, Q, P, A] {\n  def calculate(q: Q, p: P, a: A): Double\n\n  def calculateStats(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])])\n  : StatCounter = {\n    val doubleRDD = sc.union(\n      evalDataSet.map { case (_, qpaRDD) =>\n        qpaRDD.map { case (q, p, a) => calculate(q, p, a) }\n      }\n    )\n\n    doubleRDD.stats()\n  }\n}\n\nprivate[predictionio] trait StatsOptionMetricHelper[EI, Q, P, A] {\n  def calculate(q: Q, p: P, a: A): Option[Double]\n\n  def calculateStats(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])])\n  : StatCounter = {\n    val doubleRDD = sc.union(\n      evalDataSet.map { case (_, qpaRDD) =>\n        qpaRDD.flatMap { case (q, p, a) => calculate(q, p, a) }\n      }\n    )\n\n    doubleRDD.stats()\n  }\n}\n\n/** Returns the global average of the score returned by the calculate method.\n  *\n  * @tparam EI Evaluation information\n  * @tparam Q Query\n  * @tparam P Predicted result\n  * @tparam A Actual result\n  *\n  * @group Evaluation\n  */\nabstract class AverageMetric[EI, Q, P, A]\n    extends Metric[EI, Q, P, A, Double]\n    with StatsMetricHelper[EI, Q, P, A]\n    with QPAMetric[Q, P, A, Double] {\n  /** Implement this method to return a score that will be used for averaging\n    * across all QPA tuples.\n    */\n  override def calculate(q: Q, p: P, a: A): Double\n\n  override def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])])\n  : Double = {\n    calculateStats(sc, evalDataSet).mean\n  }\n}\n\n/** Returns the global average of the non-None score returned by the calculate\n  * method.\n  *\n  * @tparam EI Evaluation information\n  * @tparam Q Query\n  * @tparam P Predicted result\n  * @tparam A Actual result\n  *\n  * @group Evaluation\n  */\nabstract class OptionAverageMetric[EI, Q, P, A]\n    extends Metric[EI, Q, P, A, Double]\n    with StatsOptionMetricHelper[EI, Q, P, A]\n    with QPAMetric[Q, P, A, Option[Double]] {\n  /** Implement this method to return a score that will be used for averaging\n    * across all QPA tuples.\n    */\n  override def calculate(q: Q, p: P, a: A): Option[Double]\n\n  override def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])])\n  : Double = {\n    calculateStats(sc, evalDataSet).mean\n  }\n}\n\n/** Returns the global standard deviation of the score returned by the calculate method\n  *\n  * This method uses org.apache.spark.util.StatCounter library, a one pass\n  * method is used for calculation\n  *\n  * @tparam EI Evaluation information\n  * @tparam Q Query\n  * @tparam P Predicted result\n  * @tparam A Actual result\n  *\n  * @group Evaluation\n  */\nabstract class StdevMetric[EI, Q, P, A]\n    extends Metric[EI, Q, P, A, Double]\n    with StatsMetricHelper[EI, Q, P, A]\n    with QPAMetric[Q, P, A, Double] {\n  /** Implement this method to return a score that will be used for calculating\n    * the stdev\n    * across all QPA tuples.\n    */\n  override def calculate(q: Q, p: P, a: A): Double\n\n  override def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])])\n  : Double = {\n    calculateStats(sc, evalDataSet).stdev\n  }\n}\n\n/** Returns the global standard deviation of the non-None score returned by the calculate method\n  *\n  * This method uses org.apache.spark.util.StatCounter library, a one pass\n  * method is used for calculation\n  *\n  * @tparam EI Evaluation information\n  * @tparam Q Query\n  * @tparam P Predicted result\n  * @tparam A Actual result\n  *\n  * @group Evaluation\n  */\nabstract class OptionStdevMetric[EI, Q, P, A]\n    extends Metric[EI, Q, P, A, Double]\n    with StatsOptionMetricHelper[EI, Q, P, A]\n    with QPAMetric[Q, P, A, Option[Double]] {\n  /** Implement this method to return a score that will be used for calculating\n    * the stdev\n    * across all QPA tuples.\n    */\n  override def calculate(q: Q, p: P, a: A): Option[Double]\n\n  override def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])])\n  : Double = {\n    calculateStats(sc, evalDataSet).stdev\n  }\n}\n\n/** Returns the sum of the score returned by the calculate method.\n  *\n  * @tparam EI Evaluation information\n  * @tparam Q Query\n  * @tparam P Predicted result\n  * @tparam A Actual result\n  * @tparam R Result, output of the function calculate, must be Numeric\n  *\n  * @group Evaluation\n  */\nabstract class SumMetric[EI, Q, P, A, R: ClassTag](implicit num: Numeric[R])\n    extends Metric[EI, Q, P, A, R]()(num)\n    with QPAMetric[Q, P, A, R] {\n  /** Implement this method to return a score that will be used for summing\n    * across all QPA tuples.\n    */\n  override def calculate(q: Q, p: P, a: A): R\n\n  override def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])])\n  : R = {\n    val union: RDD[R] = sc.union(\n      evalDataSet.map { case (_, qpaRDD) =>\n        qpaRDD.map { case (q, p, a) => calculate(q, p, a) }\n      }\n    )\n\n    union.aggregate[R](num.zero)(_ + _, _ + _)\n  }\n}\n\n/** Returns zero. Useful as a placeholder during evaluation development when not all components are\n  * implemented.\n  * @tparam EI Evaluation information\n  * @tparam Q Query\n  * @tparam P Predicted result\n  * @tparam A Actual result\n  *\n  * @group Evaluation\n  */\nclass ZeroMetric[EI, Q, P, A] extends Metric[EI, Q, P, A, Double]() {\n  override def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])]): Double = 0.0\n}\n\n/** Companion object of [[ZeroMetric]]\n  *\n  * @group Evaluation\n  */\nobject ZeroMetric {\n  /** Returns a ZeroMetric instance using Engine's type parameters. */\n  def apply[EI, Q, P, A](engine: BaseEngine[EI, Q, P, A]): ZeroMetric[EI, Q, P, A] = {\n    new ZeroMetric[EI, Q, P, A]()\n  }\n}\n\n\n/** Trait for metric which returns a score based on Query, PredictedResult,\n  * and ActualResult\n  *\n  * @tparam Q Query class\n  * @tparam P Predicted result class\n  * @tparam A Actual result class\n  * @tparam R Metric result class\n  * @group Evaluation\n  */\ntrait QPAMetric[Q, P, A, R] {\n  /** Calculate a metric result based on query, predicted result, and actual\n    * result\n    *\n    * @param q Query\n    * @param p Predicted result\n    * @param a Actual result\n    * @return Metric result\n    */\n  def calculate(q: Q, p: P, a: A): R\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/MetricEvaluator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport _root_.java.io.File\nimport _root_.java.io.PrintWriter\n\nimport com.github.nscala_time.time.Imports.DateTime\nimport grizzled.slf4j.Logger\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.predictionio.core.BaseEvaluator\nimport org.apache.predictionio.core.BaseEvaluatorResult\nimport org.apache.predictionio.data.storage.Storage\nimport org.apache.predictionio.workflow.JsonExtractor\nimport org.apache.predictionio.workflow.JsonExtractorOption.Both\nimport org.apache.predictionio.workflow.NameParamsSerializer\nimport org.apache.predictionio.workflow.WorkflowParams\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\nimport org.json4s.native.Serialization.write\nimport org.json4s.native.Serialization.writePretty\n\nimport scala.language.existentials\n\n/** Case class storing a primary score, and other scores\n  *\n  * @param score Primary metric score\n  * @param otherScores Other scores this metric might have\n  * @tparam R Type of the primary metric score\n  * @group Evaluation\n  */\ncase class MetricScores[R](\n  score: R,\n  otherScores: Seq[Any])\n\n/** Contains all results of a [[MetricEvaluator]]\n  *\n  * @param bestScore The best score among all iterations\n  * @param bestEngineParams The set of engine parameters that yielded the best score\n  * @param bestIdx The index of iteration that yielded the best score\n  * @param metricHeader Brief description of the primary metric score\n  * @param otherMetricHeaders Brief descriptions of other metric scores\n  * @param engineParamsScores All sets of engine parameters and corresponding metric scores\n  * @param outputPath An optional output path where scores are saved\n  * @tparam R Type of the primary metric score\n  * @group Evaluation\n  */\ncase class MetricEvaluatorResult[R](\n  bestScore: MetricScores[R],\n  bestEngineParams: EngineParams,\n  bestIdx: Int,\n  metricHeader: String,\n  otherMetricHeaders: Seq[String],\n  engineParamsScores: Seq[(EngineParams, MetricScores[R])],\n  outputPath: Option[String])\nextends BaseEvaluatorResult {\n\n  override def toOneLiner(): String = {\n    val idx = engineParamsScores.map(_._1).indexOf(bestEngineParams)\n    s\"Best Params Index: $idx Score: ${bestScore.score}\"\n  }\n\n  override def toJSON(): String = {\n    implicit lazy val formats = Utils.json4sDefaultFormats +\n      new NameParamsSerializer\n    write(this)\n  }\n\n  override def toHTML(): String = html.metric_evaluator().toString()\n\n  override def toString: String = {\n    implicit lazy val formats = Utils.json4sDefaultFormats +\n      new NameParamsSerializer\n\n    val bestEPStr = JsonExtractor.engineParamstoPrettyJson(Both, bestEngineParams)\n\n    val strings = Seq(\n      \"MetricEvaluatorResult:\",\n      s\"  # engine params evaluated: ${engineParamsScores.size}\") ++\n      Seq(\n        \"Optimal Engine Params:\",\n        s\"  $bestEPStr\",\n        \"Metrics:\",\n        s\"  $metricHeader: ${bestScore.score}\") ++\n      otherMetricHeaders.zip(bestScore.otherScores).map {\n        case (h, s) => s\"  $h: $s\"\n      } ++\n      outputPath.toSeq.map {\n        p => s\"The best variant params can be found in $p\"\n      }\n\n    strings.mkString(\"\\n\")\n  }\n}\n\n/** Companion object of [[MetricEvaluator]]\n  *\n  * @group Evaluation\n  */\nobject MetricEvaluator {\n  def apply[EI, Q, P, A, R](\n    metric: Metric[EI, Q, P, A, R],\n    otherMetrics: Seq[Metric[EI, Q, P, A, _]],\n    outputPath: String): MetricEvaluator[EI, Q, P, A, R] = {\n    new MetricEvaluator[EI, Q, P, A, R](\n      metric,\n      otherMetrics,\n      Some(outputPath))\n  }\n\n  def apply[EI, Q, P, A, R](\n    metric: Metric[EI, Q, P, A, R],\n    otherMetrics: Seq[Metric[EI, Q, P, A, _]])\n  : MetricEvaluator[EI, Q, P, A, R] = {\n    new MetricEvaluator[EI, Q, P, A, R](\n      metric,\n      otherMetrics,\n      None)\n  }\n\n  def apply[EI, Q, P, A, R](metric: Metric[EI, Q, P, A, R])\n  : MetricEvaluator[EI, Q, P, A, R] = {\n    new MetricEvaluator[EI, Q, P, A, R](\n      metric,\n      Seq[Metric[EI, Q, P, A, _]](),\n      None)\n  }\n\n  case class NameParams(name: String, params: Params) {\n    def this(np: (String, Params)) = this(np._1, np._2)\n  }\n\n  case class EngineVariant(\n    id: String,\n    description: String,\n    engineFactory: String,\n    datasource: NameParams,\n    preparator: NameParams,\n    algorithms: Seq[NameParams],\n    serving: NameParams) {\n\n    def this(evaluation: Evaluation, engineParams: EngineParams) = this(\n      id = \"\",\n      description = \"\",\n      engineFactory = evaluation.getClass.getName,\n      datasource = new NameParams(engineParams.dataSourceParams),\n      preparator = new NameParams(engineParams.preparatorParams),\n      algorithms = engineParams.algorithmParamsList.map(np => new NameParams(np)),\n      serving = new NameParams(engineParams.servingParams))\n  }\n}\n\n/** :: DeveloperApi ::\n  * Do no use this directly. Use [[MetricEvaluator$]] instead. This is an\n  * implementation of [[org.apache.predictionio.core.BaseEvaluator]] that evaluates\n  * prediction performance based on metric scores.\n  *\n  * @param metric Primary metric\n  * @param otherMetrics Other metrics\n  * @param outputPath Optional output path to save evaluation results\n  * @tparam EI Evaluation information type\n  * @tparam Q Query class\n  * @tparam P Predicted result class\n  * @tparam A Actual result class\n  * @tparam R Metric result class\n  * @group Evaluation\n  */\n@DeveloperApi\nclass MetricEvaluator[EI, Q, P, A, R] (\n  val metric: Metric[EI, Q, P, A, R],\n  val otherMetrics: Seq[Metric[EI, Q, P, A, _]],\n  val outputPath: Option[String])\n  extends BaseEvaluator[EI, Q, P, A, MetricEvaluatorResult[R]] {\n  @transient lazy val logger = Logger[this.type]\n  @transient val engineInstances = Storage.getMetaDataEngineInstances()\n\n  def saveEngineJson(\n    evaluation: Evaluation,\n    engineParams: EngineParams,\n    outputPath: String) {\n\n    val now = DateTime.now\n    val evalClassName = evaluation.getClass.getName\n\n    val variant = MetricEvaluator.EngineVariant(\n      id = s\"$evalClassName $now\",\n      description = \"\",\n      engineFactory = evalClassName,\n      datasource = new MetricEvaluator.NameParams(engineParams.dataSourceParams),\n      preparator = new MetricEvaluator.NameParams(engineParams.preparatorParams),\n      algorithms = engineParams.algorithmParamsList.map(np => new MetricEvaluator.NameParams(np)),\n      serving = new MetricEvaluator.NameParams(engineParams.servingParams))\n\n    implicit lazy val formats = Utils.json4sDefaultFormats\n\n    logger.info(s\"Writing best variant params to disk ($outputPath)...\")\n    val writer = new PrintWriter(new File(outputPath))\n    writer.write(writePretty(variant))\n    writer.close()\n  }\n\n  override def evaluateBase(\n    sc: SparkContext,\n    evaluation: Evaluation,\n    engineEvalDataSet: Seq[(EngineParams, Seq[(EI, RDD[(Q, P, A)])])],\n    params: WorkflowParams): MetricEvaluatorResult[R] = {\n\n    val evalResultList: Seq[(EngineParams, MetricScores[R])] = engineEvalDataSet\n    .par\n    .map { case (engineParams, evalDataSet) =>\n      val metricScores = MetricScores[R](\n        metric.calculate(sc, evalDataSet),\n        otherMetrics.map(_.calculate(sc, evalDataSet)))\n      (engineParams, metricScores)\n    }\n    .seq\n\n    implicit lazy val formats = Utils.json4sDefaultFormats +\n      new NameParamsSerializer\n\n    val evalResultListWithIndex = evalResultList.zipWithIndex\n\n    evalResultListWithIndex.foreach { case ((ep, r), idx) =>\n      logger.info(s\"Iteration $idx\")\n      logger.info(s\"EngineParams: ${JsonExtractor.engineParamsToJson(Both, ep)}\")\n      logger.info(s\"Result: $r\")\n    }\n\n    // use max. take implicit from Metric.\n    val ((bestEngineParams, bestScore), bestIdx) = evalResultListWithIndex\n    .reduce { (x, y) =>\n      if (metric.compare(x._1._2.score, y._1._2.score) >= 0) x else y\n    }\n\n    // save engine params if it is set.\n    outputPath.foreach { path => saveEngineJson(evaluation, bestEngineParams, path) }\n\n    MetricEvaluatorResult(\n      bestScore = bestScore,\n      bestEngineParams = bestEngineParams,\n      bestIdx = bestIdx,\n      metricHeader = metric.header,\n      otherMetricHeaders = otherMetrics.map(_.header),\n      engineParamsScores = evalResultList,\n      outputPath = outputPath)\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/P2LAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport _root_.org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.predictionio.core.BaseAlgorithm\nimport org.apache.predictionio.workflow.PersistentModelManifest\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nimport scala.reflect._\n\n/** Base class of a parallel-to-local algorithm.\n  *\n  * A parallel-to-local algorithm can be run in parallel on a cluster and\n  * produces a model that can fit within a single machine.\n  *\n  * If your input query class requires custom JSON4S serialization, the most\n  * idiomatic way is to implement a trait that extends [[CustomQuerySerializer]],\n  * and mix that into your algorithm class, instead of overriding\n  * [[querySerializer]] directly.\n  *\n  * @tparam PD Prepared data class.\n  * @tparam M Trained model class.\n  * @tparam Q Input query class.\n  * @tparam P Output prediction class.\n  * @group Algorithm\n  */\nabstract class P2LAlgorithm[PD, M: ClassTag, Q: ClassTag, P]\n  extends BaseAlgorithm[PD, M, Q, P] {\n\n  override def trainBase(sc: SparkContext, pd: PD): M = train(sc, pd)\n\n  /** Implement this method to produce a model from prepared data.\n    *\n    * @param pd Prepared data for model training.\n    * @return Trained model.\n    */\n  def train(sc: SparkContext, pd: PD): M\n\n  override def batchPredictBase(sc: SparkContext, bm: Any, qs: RDD[(Long, Q)])\n  : RDD[(Long, P)] = batchPredict(bm.asInstanceOf[M], qs)\n\n  /** This is a default implementation to perform batch prediction. Override\n    * this method for a custom implementation.\n    *\n    * @param m A model\n    * @param qs An RDD of index-query tuples. The index is used to keep track of\n    *           predicted results with corresponding queries.\n    * @return Batch of predicted results\n    */\n  def batchPredict(m: M, qs: RDD[(Long, Q)]): RDD[(Long, P)] = {\n    qs.mapValues { q => predict(m, q) }\n  }\n\n  override def predictBase(bm: Any, q: Q): P = predict(bm.asInstanceOf[M], q)\n\n  /** Implement this method to produce a prediction from a query and trained\n    * model.\n    *\n    * @param model Trained model produced by [[train]].\n    * @param query An input query.\n    * @return A prediction.\n    */\n  def predict(model: M, query: Q): P\n\n  /** :: DeveloperApi ::\n    * Engine developers should not use this directly (read on to see how\n    * parallel-to-local algorithm models are persisted).\n    *\n    * Parallel-to-local algorithms produce local models. By default, models will be\n    * serialized and stored automatically. Engine developers can override this behavior by\n    * mixing the [[PersistentModel]] trait into the model class, and\n    * PredictionIO will call [[PersistentModel.save]] instead. If it returns\n    * true, a [[org.apache.predictionio.workflow.PersistentModelManifest]] will be\n    * returned so that during deployment, PredictionIO will use\n    * [[PersistentModelLoader]] to retrieve the model. Otherwise, Unit will be\n    * returned and the model will be re-trained on-the-fly.\n    *\n    * @param sc Spark context\n    * @param modelId Model ID\n    * @param algoParams Algorithm parameters that trained this model\n    * @param bm Model\n    * @return The model itself for automatic persistence, an instance of\n    *         [[org.apache.predictionio.workflow.PersistentModelManifest]] for manual\n    *         persistence, or Unit for re-training on deployment\n    */\n  @DeveloperApi\n  override\n  def makePersistentModel(\n    sc: SparkContext,\n    modelId: String,\n    algoParams: Params,\n    bm: Any): Any = {\n    val m = bm.asInstanceOf[M]\n    m match {\n      case m: PersistentModel[Params] @unchecked =>\n        if(m.save(modelId, algoParams, sc)){\n          PersistentModelManifest(className = m.getClass.getName)\n        } else ()\n      case _ => m\n    }\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/PAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.predictionio.core.BaseAlgorithm\nimport org.apache.predictionio.workflow.PersistentModelManifest\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\n/** Base class of a parallel algorithm.\n  *\n  * A parallel algorithm can be run in parallel on a cluster and produces a\n  * model that can also be distributed across a cluster.\n  *\n  * If your input query class requires custom JSON4S serialization, the most\n  * idiomatic way is to implement a trait that extends [[CustomQuerySerializer]],\n  * and mix that into your algorithm class, instead of overriding\n  * [[querySerializer]] directly.\n  *\n  * To provide evaluation feature, one must override and implement the\n  * [[batchPredict]] method. Otherwise, an exception will be thrown when pio eval`\n  * is used.\n  *\n  * @tparam PD Prepared data class.\n  * @tparam M Trained model class.\n  * @tparam Q Input query class.\n  * @tparam P Output prediction class.\n  * @group Algorithm\n  */\nabstract class PAlgorithm[PD, M, Q, P]\n  extends BaseAlgorithm[PD, M, Q, P] {\n\n  override def trainBase(sc: SparkContext, pd: PD): M = train(sc, pd)\n\n  /** Implement this method to produce a model from prepared data.\n    *\n    * @param pd Prepared data for model training.\n    * @return Trained model.\n    */\n  def train(sc: SparkContext, pd: PD): M\n\n  override def batchPredictBase(sc: SparkContext, bm: Any, qs: RDD[(Long, Q)])\n  : RDD[(Long, P)] = batchPredict(bm.asInstanceOf[M], qs)\n\n  /** To provide evaluation feature, one must override and implement this method\n    * to generate many predictions in batch. Otherwise, an exception will be\n    * thrown when `pio eval` is used.\n    *\n    * The default implementation throws an exception.\n    *\n    * @param m Trained model produced by [[train]].\n    * @param qs An RDD of index-query tuples. The index is used to keep track of\n    *           predicted results with corresponding queries.\n    */\n  def batchPredict(m: M, qs: RDD[(Long, Q)]): RDD[(Long, P)] =\n    throw new NotImplementedError(\"batchPredict not implemented\")\n\n  override def predictBase(baseModel: Any, query: Q): P = {\n    predict(baseModel.asInstanceOf[M], query)\n  }\n\n  /** Implement this method to produce a prediction from a query and trained\n    * model.\n    *\n    * @param model Trained model produced by [[train]].\n    * @param query An input query.\n    * @return A prediction.\n    */\n  def predict(model: M, query: Q): P\n\n  /** :: DeveloperApi ::\n    * Engine developers should not use this directly (read on to see how parallel\n    * algorithm models are persisted).\n    *\n    * In general, parallel models may contain multiple RDDs. It is not easy to\n    * infer and persist them programmatically since these RDDs may be\n    * potentially huge. To persist these models, engine developers need to  mix\n    * the [[PersistentModel]] trait into the model class and implement\n    * [[PersistentModel.save]]. If it returns true, a\n    * [[org.apache.predictionio.workflow.PersistentModelManifest]] will be\n    * returned so that during deployment, PredictionIO will use\n    * [[PersistentModelLoader]] to retrieve the model. Otherwise, Unit will be\n    * returned and the model will be re-trained on-the-fly.\n    *\n    * @param sc Spark context\n    * @param modelId Model ID\n    * @param algoParams Algorithm parameters that trained this model\n    * @param bm Model\n    * @return The model itself for automatic persistence, an instance of\n    *         [[org.apache.predictionio.workflow.PersistentModelManifest]] for manual\n    *         persistence, or Unit for re-training on deployment\n    */\n  @DeveloperApi\n  override\n  def makePersistentModel(\n    sc: SparkContext,\n    modelId: String,\n    algoParams: Params,\n    bm: Any): Any = {\n    val m = bm.asInstanceOf[M]\n    m match {\n      case m: PersistentModel[Params] @unchecked =>\n        if(m.save(modelId, algoParams, sc)){\n          PersistentModelManifest(className = m.getClass.getName)\n        } else ()\n      case _ => ()\n    }\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/PDataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.core.BaseDataSource\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\n/** Base class of a parallel data source.\n  *\n  * A parallel data source runs locally within a single machine, or in parallel\n  * on a cluster, to return data that is distributed across a cluster.\n  *\n  * @tparam TD Training data class.\n  * @tparam EI Evaluation Info class.\n  * @tparam Q Input query class.\n  * @tparam A Actual value class.\n  * @group Data Source\n  */\n\nabstract class PDataSource[TD, EI, Q, A]\n  extends BaseDataSource[TD, EI, Q, A] {\n\n  override def readTrainingBase(sc: SparkContext): TD = readTraining(sc)\n\n  /** Implement this method to only return training data from a data source */\n  def readTraining(sc: SparkContext): TD\n\n  override def readEvalBase(sc: SparkContext): Seq[(TD, EI, RDD[(Q, A)])] = readEval(sc)\n\n  /** To provide evaluation feature for your engine, your must override this\n    * method to return data for evaluation from a data source. Returned data can\n    * optionally include a sequence of query and actual value pairs for\n    * evaluation purpose.\n    *\n    * The default implementation returns an empty sequence as a stub, so that\n    * an engine can be compiled without implementing evaluation.\n    */\n  def readEval(sc: SparkContext): Seq[(TD, EI, RDD[(Q, A)])] =\n    Seq[(TD, EI, RDD[(Q, A)])]()\n\n  @deprecated(\"Use readEval() instead.\", \"0.9.0\")\n  def read(sc: SparkContext): Seq[(TD, EI, RDD[(Q, A)])] = readEval(sc)\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/PPreparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.core.BasePreparator\nimport org.apache.spark.SparkContext\n\n/** Base class of a parallel preparator.\n  *\n  * A parallel preparator can be run in parallel on a cluster and produces a\n  * prepared data that is distributed across a cluster.\n  *\n  * @tparam TD Training data class.\n  * @tparam PD Prepared data class.\n  * @group Preparator\n  */\nabstract class PPreparator[TD, PD]\n  extends BasePreparator[TD, PD] {\n\n  override def prepareBase(sc: SparkContext, td: TD): PD = {\n    prepare(sc, td)\n  }\n\n  /** Implement this method to produce prepared data that is ready for model\n    * training.\n    *\n    * @param sc An Apache Spark context.\n    * @param trainingData Training data to be prepared.\n    */\n  def prepare(sc: SparkContext, trainingData: TD): PD\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/Params.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\n/** Base trait for all kinds of parameters that will be passed to constructors\n  * of different controller classes.\n  *\n  * @group Helper\n  */\ntrait Params extends Serializable {}\n\n/** A concrete implementation of [[Params]] representing empty parameters.\n  *\n  * @group Helper\n  */\ncase class EmptyParams() extends Params {\n  override def toString(): String = \"Empty\"\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/PersistentModel.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.spark.SparkContext\n\n/** Mix in and implement this trait if your model cannot be persisted by\n  * PredictionIO automatically. A companion object extending\n  * IPersistentModelLoader is required for PredictionIO to load the persisted\n  * model automatically during deployment.\n  *\n  * Notice that models generated by [[PAlgorithm]] cannot be persisted\n  * automatically by nature and must implement these traits if model persistence\n  * is desired.\n  *\n  * {{{\n  * class MyModel extends PersistentModel[MyParams] {\n  *   def save(id: String, params: MyParams, sc: SparkContext): Boolean = {\n  *     ...\n  *   }\n  * }\n  *\n  * object MyModel extends PersistentModelLoader[MyParams, MyModel] {\n  *   def apply(id: String, params: MyParams, sc: Option[SparkContext]): MyModel = {\n  *     ...\n  *   }\n  * }\n  * }}}\n  *\n  * In Java, all you need to do is to implement this interface, and add a static\n  * method with 3 arguments of type String, [[Params]], and SparkContext.\n  *\n  * {{{\n  * public class MyModel implements PersistentModel<MyParams>, Serializable {\n  *   ...\n  *   public boolean save(String id, MyParams params, SparkContext sc) {\n  *     ...\n  *   }\n  *\n  *   public static MyModel load(String id, Params params, SparkContext sc) {\n  *     ...\n  *   }\n  *   ...\n  * }\n  * }}}\n  *\n  * @tparam AP Algorithm parameters class.\n  * @see [[PersistentModelLoader]]\n  * @group Algorithm\n  */\ntrait PersistentModel[AP <: Params] {\n  /** Save the model to some persistent storage.\n    *\n    * This method should return true if the model has been saved successfully so\n    * that PredictionIO knows that it can be restored later during deployment.\n    * This method should return false if the model cannot be saved (or should\n    * not be saved due to configuration) so that PredictionIO will re-train the\n    * model during deployment. All arguments of this method are provided by\n    * automatically by PredictionIO.\n    *\n    * @param id ID of the run that trained this model.\n    * @param params Algorithm parameters that were used to train this model.\n    * @param sc An Apache Spark context.\n    */\n  def save(id: String, params: AP, sc: SparkContext): Boolean\n}\n\n/** Implement an object that extends this trait for PredictionIO to support\n  * loading a persisted model during serving deployment.\n  *\n  * @tparam AP Algorithm parameters class.\n  * @tparam M Model class.\n  * @see [[PersistentModel]]\n  * @group Algorithm\n  */\ntrait PersistentModelLoader[AP <: Params, M] {\n  /** Implement this method to restore a persisted model that extends the\n    * [[PersistentModel]] trait. All arguments of this method are provided\n    * automatically by PredictionIO.\n    *\n    * @param id ID of the run that trained this model.\n    * @param params Algorithm parameters that were used to train this model.\n    * @param sc An optional Apache Spark context. This will be injected if the\n    *           model was generated by a [[PAlgorithm]].\n    */\n  def apply(id: String, params: AP, sc: Option[SparkContext]): M\n}\n\n/** DEPRECATED. Use [[PersistentModel]] instead.\n  *\n  * @group Algorithm */\n@deprecated(\"Use PersistentModel instead.\", \"0.9.2\")\ntrait IPersistentModel[AP <: Params] extends PersistentModel[AP]\n\n/** DEPRECATED. Use [[PersistentModelLoader]] instead.\n  *\n  * @group Algorithm */\n@deprecated(\"Use PersistentModelLoader instead.\", \"0.9.2\")\ntrait IPersistentModelLoader[AP <: Params, M] extends PersistentModelLoader[AP, M]\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/SanityCheck.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\n/** Extends a data class with this trait if you want PredictionIO to\n  * automatically perform sanity check on your data classes during training.\n  * This is very useful when you need to debug your engine.\n  *\n  * @group Helper\n  */\ntrait SanityCheck {\n  /** Implement this method to perform checks on your data. This method should\n    * contain assertions that throw exceptions when your data does not meet\n    * your pre-defined requirement.\n    */\n  def sanityCheck(): Unit\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/Utils.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.workflow.KryoInstantiator\n\nimport org.json4s._\nimport org.json4s.ext.JodaTimeSerializers\n\nimport scala.io.Source\n\nimport _root_.java.io.File\nimport _root_.java.io.FileOutputStream\n\n/** Controller utilities.\n  *\n  * @group Helper\n  */\nobject Utils {\n  /** Default JSON4S serializers for PredictionIO controllers. */\n  val json4sDefaultFormats = DefaultFormats.lossless ++ JodaTimeSerializers.all\n\n  /** Save a model object as a file to a temporary location on local filesystem.\n    * It will first try to use the location indicated by the environmental\n    * variable PIO_FS_TMPDIR, then fall back to the java.io.tmpdir property.\n    *\n    * @param id Used as the filename of the file.\n    * @param model Model object.\n    */\n  def save(id: String, model: Any): Unit = {\n    val tmpdir = sys.env.getOrElse(\"PIO_FS_TMPDIR\", System.getProperty(\"java.io.tmpdir\"))\n    val modelFile = tmpdir + File.separator + id\n    (new File(tmpdir)).mkdirs\n    val fos = new FileOutputStream(modelFile)\n    val kryo = KryoInstantiator.newKryoInjection\n    fos.write(kryo(model))\n    fos.close\n  }\n\n  /** Load a model object from a file in a temporary location on local\n    * filesystem. It will first try to use the location indicated by the\n    * environmental variable PIO_FS_TMPDIR, then fall back to the java.io.tmpdir\n    * property.\n    *\n    * @param id Used as the filename of the file.\n    */\n  def load(id: String): Any = {\n    val tmpdir = sys.env.getOrElse(\"PIO_FS_TMPDIR\", System.getProperty(\"java.io.tmpdir\"))\n    val modelFile = tmpdir + File.separator + id\n    val src = Source.fromFile(modelFile)(scala.io.Codec.ISO8859)\n    val kryo = KryoInstantiator.newKryoInjection\n    val m = kryo.invert(src.map(_.toByte).toArray).get\n    src.close\n    m\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/java/JavaEngineParamsGenerator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller.java\n\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.EngineParamsGenerator\n\nimport scala.collection.JavaConversions.asScalaBuffer\n\n/** Define an engine parameter generator in Java\n  *\n  * Implementations of this abstract class can be supplied to \"pio eval\" as the second\n  * command line argument.\n  *\n  * @group Evaluation\n  */\nabstract class JavaEngineParamsGenerator extends EngineParamsGenerator {\n\n  /** Set the list of [[EngineParams]].\n    *\n    * @param engineParams A list of engine params\n    */\n  def setEngineParamsList(engineParams: java.util.List[_ <: EngineParams]) {\n    engineParamsList = asScalaBuffer(engineParams)\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/java/JavaEvaluation.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller.java\n\nimport org.apache.predictionio.controller.Evaluation\nimport org.apache.predictionio.controller.Metric\nimport org.apache.predictionio.core.BaseEngine\n\nimport scala.collection.JavaConversions.asScalaBuffer\n\n/** Define an evaluation in Java.\n  *\n  * Implementations of this abstract class can be supplied to \"pio eval\" as the first\n  * argument.\n  *\n  * @group Evaluation\n  */\n\nabstract class JavaEvaluation extends Evaluation {\n  /** Set the [[BaseEngine]] and [[Metric]] for this [[Evaluation]]\n    *\n    * @param baseEngine [[BaseEngine]] for this [[JavaEvaluation]]\n    * @param metric [[Metric]] for this [[JavaEvaluation]]\n    * @tparam EI Evaluation information class\n    * @tparam Q Query class\n    * @tparam P Predicted result class\n    * @tparam A Actual result class\n    */\n  def setEngineMetric[EI, Q, P, A](\n    baseEngine: BaseEngine[EI, Q, P, A],\n    metric: Metric[EI, Q, P, A, _]) {\n\n    engineMetric = (baseEngine, metric)\n  }\n\n  /** Set the [[BaseEngine]] and [[Metric]]s for this [[JavaEvaluation]]\n    *\n    * @param baseEngine [[BaseEngine]] for this [[JavaEvaluation]]\n    * @param metric [[Metric]] for this [[JavaEvaluation]]\n    * @param metrics Other [[Metric]]s for this [[JavaEvaluation]]\n    * @tparam EI Evaluation information class\n    * @tparam Q Query class\n    * @tparam P Predicted result class\n    * @tparam A Actual result class\n    */\n  def setEngineMetrics[EI, Q, P, A](\n    baseEngine: BaseEngine[EI, Q, P, A],\n    metric: Metric[EI, Q, P, A, _],\n    metrics: java.util.List[_ <: Metric[EI, Q, P, A, _]]) {\n\n    engineMetrics = (baseEngine, metric, asScalaBuffer(metrics))\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/java/LJavaAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller.java\n\nimport org.apache.predictionio.controller.LAlgorithm\n\nimport scala.reflect.ClassTag\n\n/** Base class of a Java local algorithm. Refer to [[LAlgorithm]] for documentation.\n  *\n  * @tparam PD Prepared data class.\n  * @tparam M Trained model class.\n  * @tparam Q Input query class.\n  * @tparam P Output prediction class.\n  * @group Algorithm\n  */\nabstract class LJavaAlgorithm[PD, M, Q, P]\n  extends LAlgorithm[PD, M, Q, P]()(ClassTag.AnyRef.asInstanceOf[ClassTag[M]])\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/java/LJavaDataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller.java\n\nimport org.apache.predictionio.controller.LDataSource\n\nimport scala.reflect.ClassTag\n\n/** Base class of a Java local data source. Refer to [[LDataSource]] for documentation.\n  *\n  * @tparam TD Training data class.\n  * @tparam EI Evaluation Info class.\n  * @tparam Q Input query class.\n  * @tparam A Actual value class.\n  * @group Data Source\n  */\nabstract class LJavaDataSource[TD, EI, Q, A]\n  extends LDataSource[TD, EI, Q, A]()(ClassTag.AnyRef.asInstanceOf[ClassTag[TD]])\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/java/LJavaPreparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller.java\n\nimport org.apache.predictionio.controller.LPreparator\n\nimport scala.reflect.ClassTag\n\n/** Base class of a Java local preparator. Refer to [[LPreparator]] for documentation.\n  *\n  * @tparam TD Training data class.\n  * @tparam PD Prepared data class.\n  * @group Preparator\n  */\nabstract class LJavaPreparator[TD, PD]\n  extends LPreparator[TD, PD]()(ClassTag.AnyRef.asInstanceOf[ClassTag[PD]])\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/java/LJavaServing.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller.java\n\nimport org.apache.predictionio.controller.LServing\n\n/** Base class of Java local serving. Refer to [[LServing]] for documentation.\n  *\n  * @tparam Q Input query class.\n  * @tparam P Output prediction class.\n  * @group Serving\n  */\nabstract class LJavaServing[Q, P] extends LServing[Q, P]\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/java/P2LJavaAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller.java\n\nimport org.apache.predictionio.controller.P2LAlgorithm\n\nimport scala.reflect.ClassTag\n\n/** Base class of a Java parallel-to-local algorithm. Refer to [[P2LAlgorithm]] for documentation.\n  *\n  * @tparam PD Prepared data class.\n  * @tparam M Trained model class.\n  * @tparam Q Input query class.\n  * @tparam P Output prediction class.\n  * @group Algorithm\n  */\nabstract class P2LJavaAlgorithm[PD, M, Q, P]\n  extends P2LAlgorithm[PD, M, Q, P]()(\n    ClassTag.AnyRef.asInstanceOf[ClassTag[M]],\n    ClassTag.AnyRef.asInstanceOf[ClassTag[Q]])\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/java/PJavaAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller.java\n\nimport org.apache.predictionio.controller.PAlgorithm\n\n/** Base class of a Java parallel algorithm. Refer to [[PAlgorithm]] for documentation.\n  *\n  * @tparam PD Prepared data class.\n  * @tparam M Trained model class.\n  * @tparam Q Input query class.\n  * @tparam P Output prediction class.\n  * @group Algorithm\n  */\nabstract class PJavaAlgorithm[PD, M, Q, P] extends PAlgorithm[PD, M, Q, P]\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/java/PJavaDataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller.java\n\nimport org.apache.predictionio.controller.PDataSource\n\n/** Base class of a Java parallel data source. Refer to [[PDataSource]] for documentation.\n  *\n  * @tparam TD Training data class.\n  * @tparam EI Evaluation Info class.\n  * @tparam Q Input query class.\n  * @tparam A Actual value class.\n  * @group Data Source\n  */\nabstract class PJavaDataSource[TD, EI, Q, A] extends PDataSource[TD, EI, Q, A]\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/java/PJavaPreparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller.java\n\nimport org.apache.predictionio.controller.PPreparator\n\n/** Base class of a Java parallel preparator. Refer to [[PPreparator]] for documentation\n  *\n  * @tparam TD Training data class.\n  * @tparam PD Prepared data class.\n  * @group Preparator\n  */\nabstract class PJavaPreparator[TD, PD] extends PPreparator[TD, PD]\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/java/SerializableComparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller.java\n\nimport java.util.Comparator\n\ntrait SerializableComparator[T] extends Comparator[T] with java.io.Serializable\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/controller/package.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio\n\n/** Provides building blocks for writing a complete prediction engine\n  * consisting of DataSource, Preparator, Algorithm, Serving, and Evaluation.\n  *\n  * == Start Building an Engine ==\n  * The starting point of a prediction engine is the [[Engine]] class.\n  *\n  * == The DASE Paradigm ==\n  * The building blocks together form the DASE paradigm. Learn more about DASE\n  * [[http://predictionio.apache.org/customize/ here]].\n  *\n  * == Types of Building Blocks ==\n  * Depending on the problem you are solving, you would need to pick appropriate\n  * flavors of building blocks.\n  *\n  * === Engines ===\n  * There are 3 typical engine configurations:\n  *\n  *  1. [[PDataSource]], [[PPreparator]], [[P2LAlgorithm]], [[LServing]]\n  *  2. [[PDataSource]], [[PPreparator]], [[PAlgorithm]], [[LServing]]\n  *  3. [[LDataSource]], [[LPreparator]], [[LAlgorithm]], [[LServing]]\n  *\n  * In both configurations 1 and 2, data is sourced and prepared in a\n  * parallelized fashion, with data type as RDD.\n  *\n  * The difference between configurations 1 and 2 come at the algorithm stage.\n  * In configuration 1, the algorithm operates on potentially large data as RDDs\n  * in the Spark cluster, and eventually outputs a model that is small enough to\n  * fit in a single machine.\n  *\n  * On the other hand, configuration 2 outputs a model that is potentially too\n  * large to fit in a single machine, and must reside in the Spark cluster as\n  * RDD(s).\n  *\n  * With configuration 1 ([[P2LAlgorithm]]), PredictionIO will automatically\n  * try to persist the model to local disk or HDFS if the model is serializable.\n  *\n  * With configuration 2 ([[PAlgorithm]]), PredictionIO will not automatically\n  * try to persist the model, unless the model implements the [[PersistentModel]]\n  * trait.\n  *\n  * In special circumstances where both the data and the model are small,\n  * configuration 3 may be used. Beware that RDDs cannot be used with\n  * configuration 3.\n  *\n  * === Data Source ===\n  * [[PDataSource]] is probably the most used data source base class with the\n  * ability to process RDD-based data. [[LDataSource]] '''cannot''' handle\n  * RDD-based data. Use only when you have a special requirement.\n  *\n  * === Preparator ===\n  * With [[PDataSource]], you must pick [[PPreparator]]. The same applies to\n  * [[LDataSource]] and [[LPreparator]].\n  *\n  * === Algorithm ===\n  * The workhorse of the engine comes in 3 different flavors.\n  *\n  * ==== P2LAlgorithm ====\n  * Produces a model that is small enough to fit in a single machine from\n  * [[PDataSource]] and [[PPreparator]]. The model '''cannot''' contain any RDD.\n  * If the produced model is serializable, PredictionIO will try to\n  * automatically persist it. In addition, P2LAlgorithm.batchPredict is\n  * already implemented for [[Evaluation]] purpose.\n  *\n  * ==== PAlgorithm ====\n  * Produces a model that could contain RDDs from [[PDataSource]] and\n  * [[PPreparator]]. PredictionIO will not try to persist it automatically\n  * unless the model implements [[PersistentModel]]. [[PAlgorithm.batchPredict]]\n  * must be implemented for [[Evaluation]].\n  *\n  * ==== LAlgorithm ====\n  * Produces a model that is small enough to fit in a single machine from\n  * [[LDataSource]] and [[LPreparator]]. The model '''cannot''' contain any RDD.\n  * If the produced model is serializable, PredictionIO will try to\n  * automatically persist it. In addition, LAlgorithm.batchPredict is\n  * already implemented for [[Evaluation]] purpose.\n  *\n  * === Serving ===\n  * The serving component comes with only 1 flavor--[[LServing]]. At the serving\n  * stage, it is assumed that the result being served is already at a human-\n  * consumable size.\n  *\n  * == Model Persistence ==\n  * PredictionIO tries its best to persist trained models automatically. Please\n  * refer to [[LAlgorithm.makePersistentModel]],\n  * [[P2LAlgorithm.makePersistentModel]], and [[PAlgorithm.makePersistentModel]]\n  * for descriptions on different strategies.\n  */\npackage object controller {\n\n  /** Base class of several helper types that represent emptiness\n    *\n    * @group Helper\n    */\n  class SerializableClass() extends Serializable\n\n  /** Empty data source parameters.\n    * @group Helper\n    */\n  type EmptyDataSourceParams = EmptyParams\n\n  /** Empty data parameters.\n    * @group Helper\n    */\n  type EmptyDataParams = EmptyParams\n\n  /** Empty evaluation info.\n    * @group Helper\n    */\n  type EmptyEvaluationInfo = SerializableClass\n\n  /** Empty preparator parameters.\n    * @group Helper\n    */\n  type EmptyPreparatorParams = EmptyParams\n\n  /** Empty algorithm parameters.\n    * @group Helper\n    */\n  type EmptyAlgorithmParams = EmptyParams\n\n  /** Empty serving parameters.\n    * @group Helper\n    */\n  type EmptyServingParams = EmptyParams\n\n  /** Empty metrics parameters.\n    * @group Helper\n    */\n  type EmptyMetricsParams = EmptyParams\n\n  /** Empty training data.\n    * @group Helper\n    */\n  type EmptyTrainingData = SerializableClass\n\n  /** Empty prepared data.\n    * @group Helper\n    */\n  type EmptyPreparedData = SerializableClass\n\n  /** Empty model.\n    * @group Helper\n    */\n  type EmptyModel = SerializableClass\n\n  /** Empty actual result.\n    * @group Helper\n    */\n  type EmptyActualResult = SerializableClass\n\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/core/AbstractDoer.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.core\n\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.predictionio.controller.Params\n\n/** :: DeveloperApi ::\n  * Base class for all controllers\n  */\n@DeveloperApi\nabstract class AbstractDoer extends Serializable\n\n/** :: DeveloperApi ::\n  * Provides facility to instantiate controller classes\n  */\n@DeveloperApi\nobject Doer extends Logging {\n  /** :: DeveloperApi ::\n    * Instantiates a controller class using supplied controller parameters as\n    * constructor parameters\n    *\n    * @param cls Class of the controller class\n    * @param params Parameters of the controller class\n    * @tparam C Controller class\n    * @return An instance of the controller class\n    */\n  @DeveloperApi\n  def apply[C <: AbstractDoer] (\n    cls: Class[_ <: C], params: Params): C = {\n\n    // Subclasses only allows two kind of constructors.\n    // 1. Constructor with P <: Params.\n    // 2. Empty constructor.\n    // First try (1), if failed, try (2).\n    try {\n      val constr = cls.getConstructor(params.getClass)\n      constr.newInstance(params)\n    } catch {\n      case e: NoSuchMethodException => try {\n        val zeroConstr = cls.getConstructor()\n        zeroConstr.newInstance()\n      } catch {\n        case e: NoSuchMethodException =>\n          error(s\"${params.getClass.getName} was used as the constructor \" +\n            s\"argument to ${e.getMessage}, but no constructor can handle it. \" +\n            \"Aborting.\")\n          sys.exit(1)\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/core/BaseAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.core\n\nimport com.google.gson.TypeAdapterFactory\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.controller.Utils\nimport net.jodah.typetools.TypeResolver\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\n/** :: DeveloperApi ::\n  * Base trait with default custom query serializer, exposed to engine developer\n  * via [[org.apache.predictionio.controller.CustomQuerySerializer]]\n  */\n@DeveloperApi\ntrait BaseQuerySerializer {\n  /** :: DeveloperApi ::\n    * Serializer for Scala query classes using\n    * [[org.apache.predictionio.controller.Utils.json4sDefaultFormats]]\n    */\n  @DeveloperApi\n  @transient lazy val querySerializer = Utils.json4sDefaultFormats\n\n  /** :: DeveloperApi ::\n    * Serializer for Java query classes using Gson\n    */\n  @DeveloperApi\n  @transient lazy val gsonTypeAdapterFactories = Seq.empty[TypeAdapterFactory]\n}\n\n/** :: DeveloperApi ::\n  * Base class of all algorithm controllers\n  *\n  * @tparam PD Prepared data class\n  * @tparam M Model class\n  * @tparam Q Query class\n  * @tparam P Predicted result class\n  */\n@DeveloperApi\nabstract class BaseAlgorithm[PD, M, Q, P]\n  extends AbstractDoer with BaseQuerySerializer {\n  /** :: DeveloperApi ::\n    * Engine developers should not use this directly. This is called by workflow\n    * to train a model.\n    *\n    * @param sc Spark context\n    * @param pd Prepared data\n    * @return Trained model\n    */\n  @DeveloperApi\n  def trainBase(sc: SparkContext, pd: PD): M\n\n  /** :: DeveloperApi ::\n    * Engine developers should not use this directly. This is called by\n    * evaluation workflow to perform batch prediction.\n    *\n    * @param sc Spark context\n    * @param bm Model\n    * @param qs Batch of queries\n    * @return Batch of predicted results\n    */\n  @DeveloperApi\n  def batchPredictBase(sc: SparkContext, bm: Any, qs: RDD[(Long, Q)])\n  : RDD[(Long, P)]\n\n  /** :: DeveloperApi ::\n    * Engine developers should not use this directly. Called by serving to\n    * perform a single prediction.\n    *\n    * @param bm Model\n    * @param q Query\n    * @return Predicted result\n    */\n  @DeveloperApi\n  def predictBase(bm: Any, q: Q): P\n\n  /** :: DeveloperApi ::\n    * Engine developers should not use this directly. Prepare a model for\n    * persistence in the downstream consumer. PredictionIO supports 3 types of\n    * model persistence: automatic persistence, manual persistence, and\n    * re-training on deployment. This method provides a way for downstream\n    * modules to determine which mode the model should be persisted.\n    *\n    * @param sc Spark context\n    * @param modelId Model ID\n    * @param algoParams Algorithm parameters that trained this model\n    * @param bm Model\n    * @return The model itself for automatic persistence, an instance of\n    *         [[org.apache.predictionio.workflow.PersistentModelManifest]] for manual\n    *         persistence, or Unit for re-training on deployment\n    */\n  @DeveloperApi\n  def makePersistentModel(\n    sc: SparkContext,\n    modelId: String,\n    algoParams: Params,\n    bm: Any): Any = ()\n\n  /** :: DeveloperApi ::\n    * Obtains the type signature of query for this algorithm\n    *\n    * @return Type signature of query\n    */\n  def queryClass: Class[Q] = {\n    val types = TypeResolver.resolveRawArguments(classOf[BaseAlgorithm[PD, M, Q, P]], getClass)\n    types(2).asInstanceOf[Class[Q]]\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/core/BaseDataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.core\n\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\n/** :: DeveloperApi ::\n  * Base class of all data source controllers\n  *\n  * @tparam TD Training data class\n  * @tparam EI Evaluation information class\n  * @tparam Q Query class\n  * @tparam A Actual result class\n  */\n@DeveloperApi\nabstract class BaseDataSource[TD, EI, Q, A] extends AbstractDoer {\n  /** :: DeveloperApi ::\n    * Engine developer should not use this directly. This is called by workflow\n    * to read training data.\n    *\n    * @param sc Spark context\n    * @return Training data\n    */\n  @DeveloperApi\n  def readTrainingBase(sc: SparkContext): TD\n\n  /** :: DeveloperApi ::\n    * Engine developer should not use this directly. This is called by\n    * evaluation workflow to read training and validation data.\n    *\n    * @param sc Spark context\n    * @return Sets of training data, evaluation information, queries, and actual\n    *         results\n    */\n  @DeveloperApi\n  def readEvalBase(sc: SparkContext): Seq[(TD, EI, RDD[(Q, A)])]\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/core/BaseEngine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.core\n\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.workflow.JsonExtractorOption.JsonExtractorOption\nimport org.apache.predictionio.workflow.WorkflowParams\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\nimport org.json4s.JValue\n\n/** :: DeveloperApi ::\n  * Base class of all engine controller classes\n  *\n  * @tparam EI Evaluation information class\n  * @tparam Q Query class\n  * @tparam P Predicted result class\n  * @tparam A Actual result class\n  */\n@DeveloperApi\nabstract class BaseEngine[EI, Q, P, A] extends Serializable {\n  /** :: DeveloperApi ::\n    * Implement this method so that training this engine would return a list of\n    * models.\n    *\n    * @param sc An instance of SparkContext.\n    * @param engineParams An instance of [[EngineParams]] for running a single training.\n    * @param params An instance of [[WorkflowParams]] that controls the workflow.\n    * @return A list of models.\n    */\n  @DeveloperApi\n  def train(\n    sc: SparkContext,\n    engineParams: EngineParams,\n    engineInstanceId: String,\n    params: WorkflowParams): Seq[Any]\n\n  /** :: DeveloperApi ::\n    * Implement this method so that [[org.apache.predictionio.controller.Evaluation]] can\n    * use this method to generate inputs for [[org.apache.predictionio.controller.Metric]].\n    *\n    * @param sc An instance of SparkContext.\n    * @param engineParams An instance of [[EngineParams]] for running a single evaluation.\n    * @param params An instance of [[WorkflowParams]] that controls the workflow.\n    * @return A list of evaluation information and RDD of query, predicted\n    *         result, and actual result tuple tuple.\n    */\n  @DeveloperApi\n  def eval(\n    sc: SparkContext,\n    engineParams: EngineParams,\n    params: WorkflowParams): Seq[(EI, RDD[(Q, P, A)])]\n\n  /** :: DeveloperApi ::\n    * Override this method to further optimize the process that runs multiple\n    * evaluations (during tuning, for example). By default, this method calls\n    * [[eval]] for each element in the engine parameters list.\n    *\n    * @param sc An instance of SparkContext.\n    * @param engineParamsList A list of [[EngineParams]] for running batch evaluation.\n    * @param params An instance of [[WorkflowParams]] that controls the workflow.\n    * @return A list of engine parameters and evaluation result (from [[eval]]) tuples.\n    */\n  @DeveloperApi\n  def batchEval(\n    sc: SparkContext,\n    engineParamsList: Seq[EngineParams],\n    params: WorkflowParams)\n  : Seq[(EngineParams, Seq[(EI, RDD[(Q, P, A)])])] = {\n    engineParamsList.map { engineParams =>\n      (engineParams, eval(sc, engineParams, params))\n    }\n  }\n\n  /** :: DeveloperApi ::\n    * Implement this method to convert a JValue (read from an engine variant\n    * JSON file) to an instance of [[EngineParams]].\n    *\n    * @param variantJson Content of the engine variant JSON as JValue.\n    * @param jsonExtractor Content of the engine variant JSON as JValue.\n    * @return An instance of [[EngineParams]] converted from JSON.\n    */\n  @DeveloperApi\n  def jValueToEngineParams(variantJson: JValue, jsonExtractor: JsonExtractorOption): EngineParams =\n    throw new NotImplementedError(\"JSON to EngineParams is not implemented.\")\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/core/BaseEvaluator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.core\n\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.predictionio.annotation.Experimental\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.Evaluation\nimport org.apache.predictionio.workflow.WorkflowParams\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\n/** :: DeveloperApi ::\n  * Base class of all evaluator controller classes\n  *\n  * @tparam EI Evaluation information class\n  * @tparam Q Query class\n  * @tparam P Predicted result class\n  * @tparam A Actual result class\n  * @tparam ER Evaluation result class\n  */\n@DeveloperApi\nabstract class BaseEvaluator[EI, Q, P, A, ER <: BaseEvaluatorResult]\n  extends AbstractDoer {\n  /** :: DeveloperApi ::\n    * Engine developers should not use this directly. This is called by\n    * evaluation workflow to perform evaluation.\n    *\n    * @param sc Spark context\n    * @param evaluation Evaluation to run\n    * @param engineEvalDataSet Sets of engine parameters and data for evaluation\n    * @param params Evaluation workflow parameters\n    * @return Evaluation result\n    */\n  @DeveloperApi\n  def evaluateBase(\n    sc: SparkContext,\n    evaluation: Evaluation,\n    engineEvalDataSet: Seq[(EngineParams, Seq[(EI, RDD[(Q, P, A)])])],\n    params: WorkflowParams): ER\n}\n\n/** Base trait of evaluator result */\ntrait BaseEvaluatorResult extends Serializable {\n  /** A short description of the result */\n  def toOneLiner(): String = \"\"\n\n  /** HTML portion of the rendered evaluator results */\n  def toHTML(): String = \"\"\n\n  /** JSON portion of the rendered evaluator results */\n  def toJSON(): String = \"\"\n\n  /** :: Experimental ::\n    * Indicate if this result is inserted into database\n    */\n  @Experimental\n  val noSave: Boolean = false\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/core/BasePreparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.core\n\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.spark.SparkContext\n\n/** :: DeveloperApi ::\n  * Base class of all preparator controller classes\n  *\n  * Dev note: Probably will add an extra parameter for ad hoc JSON formatter\n  *\n  * @tparam TD Training data class\n  * @tparam PD Prepared data class\n  */\n@DeveloperApi\nabstract class BasePreparator[TD, PD]\n  extends AbstractDoer {\n  /** :: DeveloperApi ::\n    * Engine developers should not use this directly. This is called by training\n    * workflow to prepare data before handing it over to algorithm\n    *\n    * @param sc Spark context\n    * @param td Training data\n    * @return Prepared data\n    */\n  @DeveloperApi\n  def prepareBase(sc: SparkContext, td: TD): PD\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/core/BaseServing.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.core\n\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.predictionio.annotation.Experimental\n\n/** :: DeveloperApi ::\n  * Base class of all serving controller classes\n  *\n  * @tparam Q Query class\n  * @tparam P Predicted result class\n  */\n@DeveloperApi\nabstract class BaseServing[Q, P]\n  extends AbstractDoer {\n  /** :: Experimental ::\n    * Engine developers should not use this directly. This is called by serving\n    * layer to supplement process the query before sending it to algorithms.\n    *\n    * @param q Query\n    * @return A supplement Query\n    */\n  @Experimental\n  def supplementBase(q: Q): Q\n\n  /** :: DeveloperApi ::\n    * Engine developers should not use this directly. This is called by serving\n    * layer to combine multiple predicted results from multiple algorithms, and\n    * custom business logic before serving to the end user.\n    *\n    * @param q Query\n    * @param ps List of predicted results\n    * @return A single predicted result\n    */\n  @DeveloperApi\n  def serveBase(q: Q, ps: Seq[P]): P\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/core/SelfCleaningDataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.core\n\nimport grizzled.slf4j.Logger\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.predictionio.data.storage.{DataMap, Event,Storage}\nimport org.apache.predictionio.data.store.{Common, LEventStore, PEventStore}\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\nimport org.joda.time.DateTime\nimport org.json4s._\n\nimport scala.concurrent.ExecutionContext.Implicits.global\nimport scala.concurrent.{Await, Future}\nimport scala.concurrent.duration.Duration\n\n/** :: DeveloperApi ::\n  * Base class of cleaned data source.\n  *\n  * A cleaned data source consists tools for cleaning events that happened earlier that\n  * specified duration in seconds from train moment. Also it can remove duplicates and compress\n  * properties(flat set/unset events to one)\n  *\n  */\n@DeveloperApi\ntrait SelfCleaningDataSource {\n\n  implicit object DateTimeOrdering extends Ordering[DateTime] {\n    def compare(d1: DateTime, d2: DateTime): Int = d2.compareTo(d1)\n  }\n\n\n  @transient lazy private val pEventsDb = Storage.getPEvents()\n  @transient lazy private val lEventsDb = Storage.getLEvents()\n\n  /** :: DeveloperApi ::\n    * Current App name which events will be cleaned.\n    *\n    * @return App name\n    */\n  @DeveloperApi\n  def appName: String\n\n  /** :: DeveloperApi ::\n    * Param list that used for cleanup.\n    *\n    * @return current event windows that will be used to clean up events.\n    */\n  @DeveloperApi\n  def eventWindow: Option[EventWindow] = None\n\n  @transient lazy val logger = Logger[this.type]\n\n  /** :: DeveloperApi ::\n    *\n    * Returns RDD of events happened after duration in event window params.\n    *\n    * @return RDD[Event] most recent PEvents.\n    */\n  @DeveloperApi\n  def getCleanedPEvents(pEvents: RDD[Event]): RDD[Event] = {\n    eventWindow\n      .flatMap(_.duration)\n      .map { duration =>\n        val fd = Duration(duration)\n        pEvents.filter(e =>\n          e.eventTime.isAfter(DateTime.now().minus(fd.toMillis)) || isSetEvent(e)\n        )\n      }.getOrElse(pEvents)\n  }\n\n  /** :: DeveloperApi ::\n    *\n    * Returns Iterator of events happened after duration in event window params.\n    *\n    * @return Iterator[Event] most recent LEvents.\n    */\n  @DeveloperApi\n  def getCleanedLEvents(lEvents: Iterable[Event]): Iterable[Event] = {\n    eventWindow\n      .flatMap(_.duration)\n      .map { duration =>\n        val fd = Duration(duration)\n        lEvents.filter(e =>\n          e.eventTime.isAfter(DateTime.now().minus(fd.toMillis)) || isSetEvent(e)\n        )\n      }.getOrElse(lEvents)\n  }\n\n  def compressPProperties(sc: SparkContext, rdd: RDD[Event]): RDD[Event] = {\n    rdd.filter(isSetEvent)\n      .groupBy(_.entityType)\n      .flatMap { pair =>\n        val (_, ls) = pair\n        ls.groupBy(_.entityId).map { anotherpair =>\n          val (_, anotherls) = anotherpair\n          compress(anotherls)\n        }\n      } ++ rdd.filter(!isSetEvent(_))\n  }\n\n  def compressLProperties(events: Iterable[Event]): Iterable[Event] = {\n    events.filter(isSetEvent)\n      .groupBy(_.entityType)\n      .map { pair =>\n        val (_, ls) = pair\n        compress(ls)\n      } ++ events.filter(!isSetEvent(_))\n  }\n\n  def removePDuplicates(sc: SparkContext, rdd: RDD[Event]): RDD[Event] = {\n    val now = DateTime.now()\n    rdd.sortBy(_.eventTime, true).map(x =>\n      (recreateEvent(x, None, now), (x.eventId, x.eventTime)))\n      .groupByKey\n      .map{case (x, y) => recreateEvent(x, y.head._1, y.head._2)}\n\n  }\n\n  def recreateEvent(x: Event, eventId: Option[String], creationTime: DateTime): Event = {\n    Event(eventId = eventId, event = x.event, entityType = x.entityType,\n      entityId = x.entityId, targetEntityType = x.targetEntityType,\n      targetEntityId = x.targetEntityId, properties = x.properties,\n      eventTime = creationTime, tags = x.tags, prId= x.prId,\n      creationTime = creationTime)\n  }\n\n  def removeLDuplicates(ls: Iterable[Event]): Iterable[Event] = {\n    val now = DateTime.now()\n    ls.toList.reverse.map(x =>\n      (recreateEvent(x, None, now), (x.eventId, x.eventTime)))\n      .groupBy(_._1).mapValues( _.map( _._2 ) )\n      .map(x => recreateEvent(x._1, x._2.head._1, x._2.head._2))\n\n  }\n\n  /** :: DeveloperApi ::\n    *\n    * Filters most recent, compress properties and removes duplicates of PEvents\n    *\n    * @return RDD[Event] most recent PEvents\n    */\n  @DeveloperApi\n  def cleanPersistedPEvents(sc: SparkContext): Unit ={\n    eventWindow match {\n      case Some(ew) =>\n        val result = cleanPEvents(sc)\n        val originalEvents = PEventStore.find(appName)(sc)\n        val newEvents = result subtract originalEvents\n        val eventsToRemove = (originalEvents subtract result).map { e =>\n          e.eventId.getOrElse(\"\")\n        }\n\n        wipePEvents(newEvents, eventsToRemove, sc)\n      case None =>\n    }\n  }\n\n  /** Replace events in Event Store */\n  def wipePEvents(\n    newEvents: RDD[Event],\n    eventsToRemove: RDD[String],\n    sc: SparkContext\n  ): Unit = {\n    val (appId, channelId) = Common.appNameToId(appName, None)\n    pEventsDb.write(newEvents.map(x => recreateEvent(x, None, x.eventTime)), appId)(sc)\n\n    removePEvents(eventsToRemove, appId, sc)\n  }\n\n  def removeEvents(eventsToRemove: Set[String], appId: Int) {\n    val listOfFuture: List[Future[Boolean]] = eventsToRemove\n      .filter(x =>  x != \"\").toList.map { eventId =>\n      lEventsDb.futureDelete(eventId, appId)\n    }\n\n    val futureOfList: Future[List[Boolean]] = Future.sequence(listOfFuture)\n    Await.result(futureOfList, scala.concurrent.duration.Duration(60, \"minutes\"))\n  }\n\n  def removePEvents(eventsToRemove: RDD[String], appId: Int, sc: SparkContext) {\n    pEventsDb.delete(eventsToRemove.filter(x =>  x != \"\"), appId, None)(sc)\n  }\n\n\n  /** Replace events in Event Store\n    *\n    * @param newEvents new events\n    * @param eventsToRemove event ids to remove\n    */\n  def wipe(\n    newEvents: Set[Event],\n    eventsToRemove: Set[String]\n  ): Unit = {\n    val (appId, channelId) = Common.appNameToId(appName, None)\n\n    val listOfFutureNewEvents: List[Future[String]] = newEvents.toList.map { event =>\n      lEventsDb.futureInsert(recreateEvent(event, None, event.eventTime), appId)\n    }\n\n    val futureOfListNewEvents: Future[List[String]] = Future.sequence(listOfFutureNewEvents)\n    Await.result(futureOfListNewEvents, scala.concurrent.duration.Duration(60, \"minutes\"))\n\n    removeEvents(eventsToRemove, appId)\n  }\n\n\n  /** :: DeveloperApi ::\n    *\n    * Filters most recent, compress properties of PEvents\n    */\n  @DeveloperApi\n  def cleanPEvents(sc: SparkContext): RDD[Event] = {\n    val pEvents = getCleanedPEvents(PEventStore.find(appName)(sc).sortBy(_.eventTime, false))\n\n    val rdd = eventWindow match {\n      case Some(ew) =>\n        val updated =\n          if (ew.compressProperties) compressPProperties(sc, pEvents) else pEvents\n\n        val deduped = if (ew.removeDuplicates) removePDuplicates(sc, updated) else updated\n        deduped\n      case None =>\n        pEvents\n    }\n    rdd\n  }\n\n  /** :: DeveloperApi ::\n    *\n    * Filters most recent, compress properties and removes duplicates of LEvents\n    *\n    * @return Iterator[Event] most recent LEvents\n    */\n  @DeveloperApi\n  def cleanPersistedLEvents: Unit = {\n    eventWindow match {\n      case Some(ew) =>\n\n        val result = cleanLEvents().toSet\n        val originalEvents = LEventStore.find(appName).toSet\n        val newEvents = result -- originalEvents\n        val eventsToRemove = (originalEvents -- result).map { e =>\n          e.eventId.getOrElse(\"\")\n        }\n\n        wipe(newEvents, eventsToRemove)\n\n      case None =>\n    }\n  }\n\n  /** :: DeveloperApi ::\n    *\n    * Filters most recent, compress properties of LEvents\n    */\n  @DeveloperApi\n  def cleanLEvents(): Iterable[Event] = {\n    val lEvents = getCleanedLEvents(LEventStore.find(appName).toList.sortBy(_.eventTime).reverse)\n\n    val events = eventWindow match {\n      case Some(ew) =>\n        val updated =\n          if (ew.compressProperties) compressLProperties(lEvents) else lEvents\n        val deduped = if (ew.removeDuplicates) removeLDuplicates(updated) else updated\n        deduped\n      case None =>\n        lEvents\n    }\n    events\n  }\n\n\n  private def isSetEvent(e: Event): Boolean = {\n    e.event == \"$set\" || e.event == \"$unset\"\n  }\n\n  private def compress(events: Iterable[Event]): Event = {\n    events.find(_.event == \"$set\") match {\n\n      case Some(first) =>\n        events.reduce { (e1, e2) =>\n          val props = e2.event match {\n            case \"$set\" =>\n              e1.properties.fields ++ e2.properties.fields\n            case \"$unset\" =>\n              e1.properties.fields\n                .filterKeys(f => !e2.properties.fields.contains(f))\n          }\n          e1.copy(properties = DataMap(props), eventTime = e2.eventTime)\n        }\n\n      case None =>\n        events.reduce { (e1, e2) =>\n          e1.copy(properties =\n            DataMap(e1.properties.fields ++ e2.properties.fields),\n            eventTime = e2.eventTime\n          )\n        }\n    }\n  }\n}\n\ncase class EventWindow(\n  duration: Option[String] = None,\n  removeDuplicates: Boolean = false,\n  compressProperties: Boolean = false\n)\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/core/package.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio\n\n/** Core base classes of PredictionIO controller components. Engine developers\n  * should not use these directly.\n  */\npackage object core {}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/package.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache\n\n/** PredictionIO Scala API */\npackage object predictionio {}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/BatchPredict.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\nimport java.io.Serializable\n\nimport com.twitter.bijection.Injection\nimport com.twitter.chill.{KryoBase, KryoInjection, ScalaKryoInstantiator}\nimport de.javakaffee.kryoserializers.SynchronizedCollectionsSerializer\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.controller.{Engine, Utils}\nimport org.apache.predictionio.core.{BaseAlgorithm, BaseServing, Doer}\nimport org.apache.predictionio.data.storage.{EngineInstance, Storage}\nimport org.apache.predictionio.workflow.JsonExtractorOption.JsonExtractorOption\nimport org.apache.predictionio.workflow.CleanupFunctions\nimport org.apache.spark.rdd.RDD\nimport org.json4s._\nimport org.json4s.native.JsonMethods._\nimport scala.language.existentials\n\ncase class BatchPredictConfig(\n  inputFilePath: String = \"batchpredict-input.json\",\n  outputFilePath: String = \"batchpredict-output.json\",\n  queryPartitions: Option[Int] = None,\n  engineInstanceId: String = \"\",\n  engineId: Option[String] = None,\n  engineVersion: Option[String] = None,\n  engineVariant: String = \"\",\n  env: Option[String] = None,\n  verbose: Boolean = false,\n  debug: Boolean = false,\n  jsonExtractor: JsonExtractorOption = JsonExtractorOption.Both)\n\nobject BatchPredict extends Logging {\n\n  class KryoInstantiator(classLoader: ClassLoader) extends ScalaKryoInstantiator {\n    override def newKryo(): KryoBase = {\n      val kryo = super.newKryo()\n      kryo.setClassLoader(classLoader)\n      SynchronizedCollectionsSerializer.registerSerializers(kryo)\n      kryo\n    }\n  }\n\n  object KryoInstantiator extends Serializable {\n    def newKryoInjection : Injection[Any, Array[Byte]] = {\n      val kryoInstantiator = new KryoInstantiator(getClass.getClassLoader)\n      KryoInjection.instance(kryoInstantiator)\n    }\n  }\n\n  val engineInstances = Storage.getMetaDataEngineInstances\n  val modeldata = Storage.getModelDataModels\n\n  def main(args: Array[String]): Unit = {\n    val parser = new scopt.OptionParser[BatchPredictConfig](\"BatchPredict\") {\n      opt[String](\"input\") action { (x, c) =>\n        c.copy(inputFilePath = x)\n      } text(\"Path to file containing input queries; a \" +\n        \"multi-object JSON file with one object per line.\")\n      opt[String](\"output\") action { (x, c) =>\n        c.copy(outputFilePath = x)\n      } text(\"Path to file containing output predictions; a \" +\n        \"multi-object JSON file with one object per line.\")\n      opt[Int](\"query-partitions\") action { (x, c) =>\n        c.copy(queryPartitions = Some(x))\n      } text(\"Limit concurrency of predictions by setting the number \" +\n        \"of partitions used internally for the RDD of queries.\")\n      opt[String](\"engineId\") action { (x, c) =>\n        c.copy(engineId = Some(x))\n      } text(\"Engine ID.\")\n      opt[String](\"engineId\") action { (x, c) =>\n        c.copy(engineId = Some(x))\n      } text(\"Engine ID.\")\n      opt[String](\"engineVersion\") action { (x, c) =>\n        c.copy(engineVersion = Some(x))\n      } text(\"Engine version.\")\n      opt[String](\"engine-variant\") required() action { (x, c) =>\n        c.copy(engineVariant = x)\n      } text(\"Engine variant JSON.\")\n      opt[String](\"env\") action { (x, c) =>\n        c.copy(env = Some(x))\n      } text(\"Comma-separated list of environmental variables (in 'FOO=BAR' \" +\n        \"format) to pass to the Spark execution environment.\")\n      opt[String](\"engineInstanceId\") required() action { (x, c) =>\n        c.copy(engineInstanceId = x)\n      } text(\"Engine instance ID.\")\n      opt[Unit](\"verbose\") action { (x, c) =>\n        c.copy(verbose = true)\n      } text(\"Enable verbose output.\")\n      opt[Unit](\"debug\") action { (x, c) =>\n        c.copy(debug = true)\n      } text(\"Enable debug output.\")\n      opt[String](\"json-extractor\") action { (x, c) =>\n        c.copy(jsonExtractor = JsonExtractorOption.withName(x))\n      }\n    }\n\n    parser.parse(args, BatchPredictConfig()) map { config =>\n      WorkflowUtils.modifyLogging(config.verbose)\n      engineInstances.get(config.engineInstanceId) map { engineInstance =>\n\n        val engine = getEngine(engineInstance)\n\n        run(config, engineInstance, engine)\n\n      } getOrElse {\n        error(s\"Invalid engine instance ID. Aborting batch predict.\")\n      }\n    }\n  }\n\n  def getEngine(engineInstance: EngineInstance): Engine[_, _, _, _, _, _] = {\n\n    val engineFactoryName = engineInstance.engineFactory\n\n    val (engineLanguage, engineFactory) =\n      WorkflowUtils.getEngine(engineFactoryName, getClass.getClassLoader)\n    val maybeEngine = engineFactory()\n\n    // EngineFactory return a base engine, which may not be deployable.\n    maybeEngine match {\n      case e: Engine[_, _, _, _, _, _] => e\n      case _ => throw new NoSuchMethodException(\n        s\"Engine $maybeEngine cannot be used for batch predict\")\n    }\n  }\n\n  def run[Q, P](\n    config: BatchPredictConfig,\n    engineInstance: EngineInstance,\n    engine: Engine[_, _, _, Q, P, _]): Unit = {\n\n    try {\n      val engineParams = engine.engineInstanceToEngineParams(\n        engineInstance, config.jsonExtractor)\n\n      val kryo = KryoInstantiator.newKryoInjection\n\n      val modelsFromEngineInstance =\n        kryo.invert(modeldata.get(engineInstance.id).get.models).get.\n        asInstanceOf[Seq[Any]]\n\n      val prepareSparkContext = WorkflowContext(\n        batch = engineInstance.engineFactory,\n        executorEnv = engineInstance.env,\n        mode = \"Batch Predict (model)\",\n        sparkEnv = engineInstance.sparkConf)\n\n      val models = engine.prepareDeploy(\n        prepareSparkContext,\n        engineParams,\n        engineInstance.id,\n        modelsFromEngineInstance,\n        params = WorkflowParams()\n      )\n\n      val algorithms = engineParams.algorithmParamsList.map { case (n, p) =>\n        Doer(engine.algorithmClassMap(n), p)\n      }\n\n      val servingParamsWithName = engineParams.servingParams\n\n      val serving = Doer(engine.servingClassMap(servingParamsWithName._1),\n        servingParamsWithName._2)\n\n      val runSparkContext = WorkflowContext(\n        batch = engineInstance.engineFactory,\n        executorEnv = engineInstance.env,\n        mode = \"Batch Predict (runner)\",\n        sparkEnv = engineInstance.sparkConf)\n\n      val inputRDD: RDD[String] = runSparkContext.\n        textFile(config.inputFilePath).\n        filter(_.trim.nonEmpty)\n      val queriesRDD: RDD[String] = config.queryPartitions match {\n        case Some(p) => inputRDD.repartition(p)\n        case None => inputRDD\n      }\n\n      val predictionsRDD: RDD[String] = queriesRDD.map { queryString =>\n        val jsonExtractorOption = config.jsonExtractor\n        // Extract Query from Json\n        val query = JsonExtractor.extract(\n          jsonExtractorOption,\n          queryString,\n          algorithms.head.queryClass,\n          algorithms.head.querySerializer,\n          algorithms.head.gsonTypeAdapterFactories\n        )\n        // Deploy logic. First call Serving.supplement, then Algo.predict,\n        // finally Serving.serve.\n        val supplementedQuery = serving.supplementBase(query)\n        // TODO: Parallelize the following.\n        val predictions = algorithms.zip(models).map { case (a, m) =>\n          a.predictBase(m, supplementedQuery)\n        }\n        // Notice that it is by design to call Serving.serve with the\n        // *original* query.\n        val prediction = serving.serveBase(query, predictions)\n        // Combine query with prediction, so the batch results are\n        // self-descriptive.\n        val predictionJValue = JsonExtractor.toJValue(\n          jsonExtractorOption,\n          Map(\"query\" -> query,\n              \"prediction\" -> prediction),\n          algorithms.head.querySerializer,\n          algorithms.head.gsonTypeAdapterFactories)\n        // Return JSON string\n        compact(render(predictionJValue))\n      }\n\n      predictionsRDD.saveAsTextFile(config.outputFilePath)\n\n    } finally {\n      CleanupFunctions.run()\n    }\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/CleanupFunctions.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\n/** :: DeveloperApi ::\n  * Singleton object that collects anonymous functions to be\n  * executed to allow the process to end gracefully.\n  *\n  * For example, the Elasticsearch REST storage client\n  * maintains an internal connection pool that must\n  * be closed to allow the process to exit.\n  */\nobject CleanupFunctions {\n  @volatile private var functions: Seq[() => Unit] = Seq.empty[() => Unit]\n\n  /** Add a function to be called during cleanup.\n    *\n    * {{{\n    * import org.apache.predictionio.workflow.CleanupFunctions\n    *\n    * CleanupFunctions.add { MyStorageClass.close }\n    * }}}\n    *\n    * @param f function containing cleanup code.\n    */\n  def add(f: () => Unit): Seq[() => Unit] = {\n    functions = functions :+ f\n    functions\n  }\n\n  /** Call all cleanup functions in order added.\n    *\n    * {{{\n    * import org.apache.predictionio.workflow.CleanupFunctions\n    *\n    * try {\n    *   // Much code that needs cleanup\n    *   // whether successful or error thrown.\n    * } finally {\n    *   CleanupFunctions.run()\n    * }\n    * }}}\n    */\n  def run(): Unit = {\n    functions.foreach { f => f() }\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/CoreWorkflow.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.Evaluation\nimport org.apache.predictionio.core.BaseEngine\nimport org.apache.predictionio.core.BaseEvaluator\nimport org.apache.predictionio.core.BaseEvaluatorResult\nimport org.apache.predictionio.data.storage.EngineInstance\nimport org.apache.predictionio.data.storage.EvaluationInstance\nimport org.apache.predictionio.data.storage.Model\nimport org.apache.predictionio.data.storage.Storage\n\nimport com.github.nscala_time.time.Imports.DateTime\nimport grizzled.slf4j.Logger\n\nimport scala.language.existentials\n\n/** CoreWorkflow handles PredictionIO metadata and environment variables of\n  * training and evaluation.\n  */\nobject CoreWorkflow {\n  @transient lazy val logger = Logger[this.type]\n  @transient lazy val engineInstances = Storage.getMetaDataEngineInstances\n  @transient lazy val evaluationInstances =\n    Storage.getMetaDataEvaluationInstances()\n\n  def runTrain[EI, Q, P, A](\n      engine: BaseEngine[EI, Q, P, A],\n      engineParams: EngineParams,\n      engineInstance: EngineInstance,\n      env: Map[String, String] = WorkflowUtils.pioEnvVars,\n      params: WorkflowParams = WorkflowParams()) {\n    logger.debug(\"Starting SparkContext\")\n    val mode = \"training\"\n\n    val batch = if (params.batch.nonEmpty) {\n      s\"{engineInstance.engineFactory} (${params.batch}})\"\n    } else {\n      engineInstance.engineFactory\n    }\n    val sc = WorkflowContext(\n      batch,\n      env,\n      params.sparkEnv,\n      mode.capitalize)\n\n    try {\n\n      val models: Seq[Any] = engine.train(\n        sc = sc,\n        engineParams = engineParams,\n        engineInstanceId = engineInstance.id,\n        params = params\n      )\n\n      val instanceId = Storage.getMetaDataEngineInstances\n\n      val kryo = KryoInstantiator.newKryoInjection\n\n      logger.info(\"Inserting persistent model\")\n      Storage.getModelDataModels.insert(Model(\n        id = engineInstance.id,\n        models = kryo(models)))\n\n      logger.info(\"Updating engine instance\")\n      val engineInstances = Storage.getMetaDataEngineInstances\n      engineInstances.update(engineInstance.copy(\n        status = \"COMPLETED\",\n        endTime = DateTime.now\n        ))\n\n      logger.info(\"Training completed successfully.\")\n    } catch {\n      case e @(\n          _: StopAfterReadInterruption |\n          _: StopAfterPrepareInterruption) => {\n        logger.info(s\"Training interrupted by $e.\")\n      }\n    } finally {\n      logger.debug(\"Stopping SparkContext\")\n      CleanupFunctions.run()\n      sc.stop()\n    }\n  }\n\n  def runEvaluation[EI, Q, P, A, R <: BaseEvaluatorResult](\n      evaluation: Evaluation,\n      engine: BaseEngine[EI, Q, P, A],\n      engineParamsList: Seq[EngineParams],\n      evaluationInstance: EvaluationInstance,\n      evaluator: BaseEvaluator[EI, Q, P, A, R],\n      env: Map[String, String] = WorkflowUtils.pioEnvVars,\n      params: WorkflowParams = WorkflowParams()) {\n    logger.info(\"runEvaluation started\")\n    logger.debug(\"Start SparkContext\")\n\n    val mode = \"evaluation\"\n\n    val batch = if (params.batch.nonEmpty) {\n      s\"{evaluation.getClass.getName} (${params.batch}})\"\n    } else {\n      evaluation.getClass.getName\n    }\n    val sc = WorkflowContext(\n      batch,\n      env,\n      params.sparkEnv,\n      mode.capitalize)\n\n    try {\n      val evaluationInstanceId = evaluationInstances.insert(evaluationInstance)\n\n      logger.info(s\"Starting evaluation instance ID: $evaluationInstanceId\")\n\n      val evaluatorResult: BaseEvaluatorResult = EvaluationWorkflow.runEvaluation(\n        sc,\n        evaluation,\n        engine,\n        engineParamsList,\n        evaluator,\n        params)\n\n      if (evaluatorResult.noSave) {\n        logger.info(s\"This evaluation result is not inserted into database: $evaluatorResult\")\n      } else {\n        val evaluatedEvaluationInstance = evaluationInstance.copy(\n          status = \"EVALCOMPLETED\",\n          id = evaluationInstanceId,\n          endTime = DateTime.now,\n          evaluatorResults = evaluatorResult.toOneLiner,\n          evaluatorResultsHTML = evaluatorResult.toHTML,\n          evaluatorResultsJSON = evaluatorResult.toJSON\n        )\n\n        logger.info(s\"Updating evaluation instance with result: $evaluatorResult\")\n\n        evaluationInstances.update(evaluatedEvaluationInstance)\n      }\n      logger.info(\"runEvaluation completed\")\n\n    } finally {\n      logger.debug(\"Stop SparkContext\")\n      CleanupFunctions.run()\n      sc.stop()\n    }\n  }\n}\n\n\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\nimport java.io.Serializable\nimport java.util.concurrent.TimeUnit\n\nimport akka.event.Logging\nimport com.github.nscala_time.time.Imports.DateTime\nimport com.twitter.bijection.Injection\nimport com.twitter.chill.{KryoBase, KryoInjection, ScalaKryoInstantiator}\nimport com.typesafe.config.ConfigFactory\nimport de.javakaffee.kryoserializers.SynchronizedCollectionsSerializer\nimport grizzled.slf4j.Logging\nimport org.apache.commons.lang3.exception.ExceptionUtils\nimport org.apache.predictionio.authentication.KeyAuthentication\nimport org.apache.predictionio.controller.{Engine, Params, Utils, WithPrId}\nimport org.apache.predictionio.core.{BaseAlgorithm, BaseServing, Doer}\nimport org.apache.predictionio.data.storage.{EngineInstance, Storage}\nimport org.apache.predictionio.workflow.JsonExtractorOption.JsonExtractorOption\nimport org.json4s._\nimport org.json4s.native.JsonMethods._\nimport org.json4s.native.Serialization.write\nimport akka.actor._\nimport akka.http.scaladsl.{ConnectionContext, Http, HttpsConnectionContext}\nimport akka.http.scaladsl.Http.ServerBinding\nimport akka.http.scaladsl.model.ContentTypes._\nimport akka.http.scaladsl.model.{HttpEntity, HttpResponse, StatusCodes}\nimport akka.http.scaladsl.server.Directives.complete\nimport akka.http.scaladsl.server.directives._\nimport akka.http.scaladsl.server._\nimport akka.pattern.ask\nimport akka.util.Timeout\nimport akka.http.scaladsl.server.Directives._\nimport akka.stream.ActorMaterializer\nimport org.apache.predictionio.akkahttpjson4s.Json4sSupport._\nimport org.apache.predictionio.configuration.SSLConfiguration\n\nimport scala.concurrent.ExecutionContext.Implicits.global\nimport scala.concurrent.{Await, Future}\nimport scala.concurrent.duration._\nimport scala.language.existentials\nimport scala.util.{Failure, Random, Success}\nimport scalaj.http.HttpOptions\n\nclass KryoInstantiator(classLoader: ClassLoader) extends ScalaKryoInstantiator {\n  override def newKryo(): KryoBase = {\n    val kryo = super.newKryo()\n    kryo.setClassLoader(classLoader)\n    SynchronizedCollectionsSerializer.registerSerializers(kryo)\n    kryo\n  }\n}\n\nobject KryoInstantiator extends Serializable {\n  def newKryoInjection : Injection[Any, Array[Byte]] = {\n    val kryoInstantiator = new KryoInstantiator(getClass.getClassLoader)\n    KryoInjection.instance(kryoInstantiator)\n  }\n}\n\ncase class ServerConfig(\n  batch: String = \"\",\n  engineInstanceId: String = \"\",\n  engineId: Option[String] = None,\n  engineVersion: Option[String] = None,\n  engineVariant: String = \"\",\n  env: Option[String] = None,\n  ip: String = \"0.0.0.0\",\n  port: Int = 8000,\n  feedback: Boolean = false,\n  eventServerIp: String = \"0.0.0.0\",\n  eventServerPort: Int = 7070,\n  accessKey: Option[String] = None,\n  logUrl: Option[String] = None,\n  logPrefix: Option[String] = None,\n  logFile: Option[String] = None,\n  verbose: Boolean = false,\n  debug: Boolean = false,\n  jsonExtractor: JsonExtractorOption = JsonExtractorOption.Both)\n\ncase class StartServer()\ncase class BindServer()\ncase class StopServer()\ncase class ReloadServer()\n\n\nobject CreateServer extends Logging {\n  val actorSystem = ActorSystem(\"pio-server\")\n  val engineInstances = Storage.getMetaDataEngineInstances\n  val modeldata = Storage.getModelDataModels\n\n  def main(args: Array[String]): Unit = {\n    val parser = new scopt.OptionParser[ServerConfig](\"CreateServer\") {\n      opt[String](\"batch\") action { (x, c) =>\n        c.copy(batch = x)\n      } text(\"Batch label of the deployment.\")\n      opt[String](\"engineId\") action { (x, c) =>\n        c.copy(engineId = Some(x))\n      } text(\"Engine ID.\")\n      opt[String](\"engineVersion\") action { (x, c) =>\n        c.copy(engineVersion = Some(x))\n      } text(\"Engine version.\")\n      opt[String](\"engine-variant\") required() action { (x, c) =>\n        c.copy(engineVariant = x)\n      } text(\"Engine variant JSON.\")\n      opt[String](\"ip\") action { (x, c) =>\n        c.copy(ip = x)\n      }\n      opt[String](\"env\") action { (x, c) =>\n        c.copy(env = Some(x))\n      } text(\"Comma-separated list of environmental variables (in 'FOO=BAR' \" +\n        \"format) to pass to the Spark execution environment.\")\n      opt[Int](\"port\") action { (x, c) =>\n        c.copy(port = x)\n      } text(\"Port to bind to (default: 8000).\")\n      opt[String](\"engineInstanceId\") required() action { (x, c) =>\n        c.copy(engineInstanceId = x)\n      } text(\"Engine instance ID.\")\n      opt[Unit](\"feedback\") action { (_, c) =>\n        c.copy(feedback = true)\n      } text(\"Enable feedback loop to event server.\")\n      opt[String](\"event-server-ip\") action { (x, c) =>\n        c.copy(eventServerIp = x)\n      }\n      opt[Int](\"event-server-port\") action { (x, c) =>\n        c.copy(eventServerPort = x)\n      } text(\"Event server port. Default: 7070\")\n      opt[String](\"accesskey\") action { (x, c) =>\n        c.copy(accessKey = Some(x))\n      } text(\"Event server access key.\")\n      opt[String](\"log-url\") action { (x, c) =>\n        c.copy(logUrl = Some(x))\n      }\n      opt[String](\"log-prefix\") action { (x, c) =>\n        c.copy(logPrefix = Some(x))\n      }\n      opt[String](\"log-file\") action { (x, c) =>\n        c.copy(logFile = Some(x))\n      }\n      opt[Unit](\"verbose\") action { (x, c) =>\n        c.copy(verbose = true)\n      } text(\"Enable verbose output.\")\n      opt[Unit](\"debug\") action { (x, c) =>\n        c.copy(debug = true)\n      } text(\"Enable debug output.\")\n      opt[String](\"json-extractor\") action { (x, c) =>\n        c.copy(jsonExtractor = JsonExtractorOption.withName(x))\n      }\n    }\n\n    parser.parse(args, ServerConfig()) map { sc =>\n      WorkflowUtils.modifyLogging(sc.verbose)\n      engineInstances.get(sc.engineInstanceId) map { engineInstance =>\n        val engineId = sc.engineId.getOrElse(engineInstance.engineId)\n        val engineVersion = sc.engineVersion.getOrElse(\n          engineInstance.engineVersion)\n        val engineFactoryName = engineInstance.engineFactory\n        val master = actorSystem.actorOf(Props(\n          classOf[MasterActor],\n          sc,\n          engineInstance,\n          engineFactoryName),\n        \"master\")\n        implicit val timeout = Timeout(5.seconds)\n        master ? StartServer()\n\n        val f = actorSystem.whenTerminated\n        Await.ready(f, Duration.Inf)\n\n      } getOrElse {\n        error(s\"Invalid engine instance ID. Aborting server.\")\n      }\n    }\n  }\n\n  def createPredictionServerWithEngine[TD, EIN, PD, Q, P, A](\n    sc: ServerConfig,\n    engineInstance: EngineInstance,\n    engine: Engine[TD, EIN, PD, Q, P, A],\n    engineLanguage: EngineLanguage.Value): PredictionServer[Q, P] = {\n\n    val engineParams = engine.engineInstanceToEngineParams(\n      engineInstance, sc.jsonExtractor)\n\n    val kryo = KryoInstantiator.newKryoInjection\n\n    val modelsFromEngineInstance =\n      kryo.invert(modeldata.get(engineInstance.id).get.models).get.\n      asInstanceOf[Seq[Any]]\n\n    val batch = if (engineInstance.batch.nonEmpty) {\n      s\"${engineInstance.engineFactory} (${engineInstance.batch})\"\n    } else {\n      engineInstance.engineFactory\n    }\n\n    val sparkContext = WorkflowContext(\n      batch = batch,\n      executorEnv = engineInstance.env,\n      mode = \"Serving\",\n      sparkEnv = engineInstance.sparkConf)\n\n    val models = engine.prepareDeploy(\n      sparkContext,\n      engineParams,\n      engineInstance.id,\n      modelsFromEngineInstance,\n      params = WorkflowParams()\n    )\n\n    val algorithms = engineParams.algorithmParamsList.map { case (n, p) =>\n      Doer(engine.algorithmClassMap(n), p)\n    }\n\n    val servingParamsWithName = engineParams.servingParams\n\n    val serving = Doer(engine.servingClassMap(servingParamsWithName._1),\n      servingParamsWithName._2)\n\n    new PredictionServer(\n      sc,\n      engineInstance,\n      engine,\n      engineLanguage,\n      engineParams.dataSourceParams._2,\n      engineParams.preparatorParams._2,\n      algorithms,\n      engineParams.algorithmParamsList.map(_._2),\n      models,\n      serving,\n      engineParams.servingParams._2,\n      actorSystem)\n  }\n}\n\n\nobject EngineServerJson4sSupport {\n  implicit val serialization = org.json4s.jackson.Serialization\n  implicit def json4sFormats: Formats = DefaultFormats\n}\n\nclass MasterActor (\n    sc: ServerConfig,\n    engineInstance: EngineInstance,\n    engineFactoryName: String) extends Actor with KeyAuthentication with SSLConfiguration {\n\n  val log = Logging(context.system, this)\n\n  implicit val system = context.system\n  implicit val materializer = ActorMaterializer()\n\n  var currentServerBinding: Option[Future[ServerBinding]] = None\n  var retry = 3\n  val serverConfig = ConfigFactory.load(\"server.conf\")\n  val sslEnforced = serverConfig.getBoolean(\"org.apache.predictionio.server.ssl-enforced\")\n  val protocol = if (sslEnforced) \"https://\" else \"http://\"\n\n  val https: Option[HttpsConnectionContext] = if(sslEnforced){\n    val https = ConnectionContext.https(sslContext)\n    Http().setDefaultServerHttpContext(https)\n    Some(https)\n  } else None\n\n  def undeploy(ip: String, port: Int): Unit = {\n    val serverUrl = s\"${protocol}${ip}:${port}\"\n    log.info(\n      s\"Undeploying any existing engine instance at $serverUrl\")\n    try {\n      val code = scalaj.http.Http(s\"$serverUrl/stop\")\n        .option(HttpOptions.allowUnsafeSSL)\n        .param(ServerKey.param, ServerKey.get)\n        .method(\"POST\").asString.code\n      code match {\n        case 200 => ()\n        case 404 => log.error(\n          s\"Another process is using $serverUrl. Unable to undeploy.\")\n        case _ => log.error(\n          s\"Another process is using $serverUrl, or an existing \" +\n          s\"engine server is not responding properly (HTTP $code). \" +\n          \"Unable to undeploy.\")\n      }\n    } catch {\n      case e: java.net.ConnectException =>\n        log.warning(s\"Nothing at $serverUrl\")\n      case _: Throwable =>\n        log.error(\"Another process might be occupying \" +\n          s\"$ip:$port. Unable to undeploy.\")\n    }\n  }\n\n  def receive: Actor.Receive = {\n    case x: StartServer =>\n      undeploy(sc.ip, sc.port)\n      self ! BindServer()\n    case x: BindServer =>\n      currentServerBinding match {\n        case Some(_) =>\n          log.error(\"Cannot bind a non-existing server backend.\")\n        case None =>\n          val server = createServer(sc, engineInstance, engineFactoryName)\n          val route = server.createRoute()\n          val binding = https match {\n            case Some(https) =>\n              Http().bindAndHandle(route, sc.ip, sc.port, connectionContext = https)\n            case None =>\n              Http().bindAndHandle(route, sc.ip, sc.port)\n          }\n          currentServerBinding = Some(binding)\n\n          val serverUrl = s\"${protocol}${sc.ip}:${sc.port}\"\n          log.info(s\"Engine is deployed and running. Engine API is live at ${serverUrl}.\")\n      }\n    case x: StopServer =>\n      log.info(s\"Stop server command received.\")\n      currentServerBinding match {\n        case Some(f) =>\n          f.flatMap { binding =>\n            binding.unbind()\n          }.foreach { _ =>\n            system.terminate()\n          }\n        case None =>\n          log.warning(\"No active server is running.\")\n      }\n    case x: ReloadServer =>\n      log.info(\"Reload server command received.\")\n        currentServerBinding match {\n          case Some(f) =>\n            f.flatMap { binding =>\n              binding.unbind()\n            }\n            val latestEngineInstance =\n              CreateServer.engineInstances.getLatestCompleted(\n                engineInstance.engineId,\n                engineInstance.engineVersion,\n                engineInstance.engineVariant)\n            latestEngineInstance map { lr =>\n              val server = createServer(sc, lr, engineFactoryName)\n              val route = server.createRoute()\n              val binding = https match {\n                case Some(https) =>\n                  Http().bindAndHandle(route, sc.ip, sc.port, connectionContext = https)\n                case None =>\n                  Http().bindAndHandle(route, sc.ip, sc.port)\n              }\n              currentServerBinding = Some(binding)\n            } getOrElse {\n              log.warning(\n                s\"No latest completed engine instance for ${engineInstance.engineId} \" +\n                  s\"${engineInstance.engineVersion}. Abort reloading.\")\n            }\n          case None =>\n            log.warning(\"No active server is running. Abort reloading.\")\n        }\n  }\n\n  def createServer(\n      sc: ServerConfig,\n      engineInstance: EngineInstance,\n      engineFactoryName: String): PredictionServer[_, _] = {\n    val (engineLanguage, engineFactory) =\n      WorkflowUtils.getEngine(engineFactoryName, getClass.getClassLoader)\n    val engine = engineFactory()\n\n    // EngineFactory return a base engine, which may not be deployable.\n    if (!engine.isInstanceOf[Engine[_,_,_,_,_,_]]) {\n      throw new NoSuchMethodException(s\"Engine $engine is not deployable\")\n    }\n\n    val deployableEngine = engine.asInstanceOf[Engine[_,_,_,_,_,_]]\n\n    CreateServer.createPredictionServerWithEngine(\n      sc,\n      engineInstance,\n      // engine,\n      deployableEngine,\n      engineLanguage)\n  }\n}\n\nclass PredictionServer[Q, P](\n    val args: ServerConfig,\n    val engineInstance: EngineInstance,\n    val engine: Engine[_, _, _, Q, P, _],\n    val engineLanguage: EngineLanguage.Value,\n    val dataSourceParams: Params,\n    val preparatorParams: Params,\n    val algorithms: Seq[BaseAlgorithm[_, _, Q, P]],\n    val algorithmsParams: Seq[Params],\n    val models: Seq[Any],\n    val serving: BaseServing[Q, P],\n    val servingParams: Params,\n    val system: ActorSystem) extends KeyAuthentication {\n\n  val log = Logging(system, getClass)\n  val serverStartTime = DateTime.now\n\n  var requestCount: Int = 0\n  var avgServingSec: Double = 0.0\n  var lastServingSec: Double = 0.0\n\n  implicit val timeout = Timeout(5, TimeUnit.SECONDS)\n\n  val pluginsActorRef =\n    system.actorOf(Props(classOf[PluginsActor], args.engineVariant), \"PluginsActor\")\n\n  val pluginContext = EngineServerPluginContext(log, args.engineVariant)\n\n  val feedbackEnabled = if (args.feedback) {\n    if (args.accessKey.isEmpty) {\n      log.error(\"Feedback loop cannot be enabled because accessKey is empty.\")\n      false\n    } else {\n      true\n    }\n  } else false\n\n  def remoteLog(logUrl: String, logPrefix: String, message: String): Unit = {\n    implicit val formats = Utils.json4sDefaultFormats\n    try {\n      scalaj.http.Http(logUrl).postData(\n        logPrefix + write(Map(\n          \"engineInstance\" -> engineInstance,\n          \"message\" -> message))).asString\n    } catch {\n      case e: Throwable =>\n        log.error(s\"Unable to send remote log: ${e.getMessage}\")\n    }\n  }\n\n  def authenticate[T](authenticator: RequestContext => Future[Either[Rejection, T]]):\n      AuthenticationDirective[T] = {\n    extractRequestContext.flatMap { requestContext =>\n      onSuccess(authenticator(requestContext)).flatMap {\n        case Right(x) => provide(x)\n        case Left(x)  => reject(x): Directive1[T]\n      }\n    }\n  }\n\n  def createRoute(): Route = {\n    val myRoute =\n      path(\"\") {\n        get {\n          complete(HttpResponse(entity = HttpEntity(\n            `text/html(UTF-8)`,\n            html.index(\n              args,\n              engineInstance,\n              algorithms.map(_.toString),\n              algorithmsParams.map(_.toString),\n              models.map(_.toString),\n              dataSourceParams.toString,\n              preparatorParams.toString,\n              servingParams.toString,\n              serverStartTime,\n              feedbackEnabled,\n              args.eventServerIp,\n              args.eventServerPort,\n              requestCount,\n              avgServingSec,\n              lastServingSec\n            ).toString\n          )))\n        }\n      } ~\n      path(\"queries.json\") {\n        post {\n          entity(as[String]) { queryString =>\n            try {\n              val servingStartTime = DateTime.now\n              val jsonExtractorOption = args.jsonExtractor\n              val queryTime = DateTime.now\n              // Extract Query from Json\n              val query = JsonExtractor.extract(\n                jsonExtractorOption,\n                queryString,\n                algorithms.head.queryClass,\n                algorithms.head.querySerializer,\n                algorithms.head.gsonTypeAdapterFactories\n              )\n              val queryJValue = JsonExtractor.toJValue(\n                jsonExtractorOption,\n                query,\n                algorithms.head.querySerializer,\n                algorithms.head.gsonTypeAdapterFactories)\n              // Deploy logic. First call Serving.supplement, then Algo.predict,\n              // finally Serving.serve.\n              val supplementedQuery = serving.supplementBase(query)\n              // TODO: Parallelize the following.\n              val predictions = algorithms.zip(models).map { case (a, m) =>\n                a.predictBase(m, supplementedQuery)\n              }\n              // Notice that it is by design to call Serving.serve with the\n              // *original* query.\n              val prediction = serving.serveBase(query, predictions)\n              val predictionJValue = JsonExtractor.toJValue(\n                jsonExtractorOption,\n                prediction,\n                algorithms.head.querySerializer,\n                algorithms.head.gsonTypeAdapterFactories)\n              /** Handle feedback to Event Server\n                * Send the following back to the Event Server\n                * - appId\n                * - engineInstanceId\n                * - query\n                * - prediction\n                * - prId\n                */\n              val result = if (feedbackEnabled) {\n                implicit val formats =\n                  algorithms.headOption map { alg =>\n                    alg.querySerializer\n                  } getOrElse {\n                    Utils.json4sDefaultFormats\n                  }\n                // val genPrId = Random.alphanumeric.take(64).mkString\n                def genPrId: String = Random.alphanumeric.take(64).mkString\n                val newPrId = prediction match {\n                  case id: WithPrId =>\n                    val org = id.prId\n                    if (org.isEmpty) genPrId else org\n                  case _ => genPrId\n                }\n\n                // also save Query's prId as prId of this pio_pr predict events\n                val queryPrId =\n                  query match {\n                    case id: WithPrId =>\n                      Map(\"prId\" -> id.prId)\n                    case _ =>\n                      Map.empty\n                  }\n                val data = Map(\n                  // \"appId\" -> dataSourceParams.asInstanceOf[ParamsWithAppId].appId,\n                  \"event\" -> \"predict\",\n                  \"eventTime\" -> queryTime.toString(),\n                  \"entityType\" -> \"pio_pr\", // prediction result\n                  \"entityId\" -> newPrId,\n                  \"properties\" -> Map(\n                    \"engineInstanceId\" -> engineInstance.id,\n                    \"query\" -> query,\n                    \"prediction\" -> prediction)) ++ queryPrId\n                // At this point args.accessKey should be Some(String).\n                val accessKey = args.accessKey.getOrElse(\"\")\n                val f: Future[Int] = Future {\n                  scalaj.http.Http(\n                    s\"http://${args.eventServerIp}:${args.eventServerPort}/\" +\n                    s\"events.json?accessKey=$accessKey\").postData(\n                    write(data)).header(\n                    \"content-type\", \"application/json\").asString.code\n                }\n                f onComplete {\n                  case Success(code) => {\n                    if (code != 201) {\n                      log.error(s\"Feedback event failed. Status code: $code.\"\n                        + s\"Data: ${write(data)}.\")\n                    }\n                  }\n                  case Failure(t) => {\n                    log.error(s\"Feedback event failed: ${t.getMessage}\") }\n                }\n                // overwrite prId in predictedResult\n                // - if it is WithPrId,\n                //   then overwrite with new prId\n                // - if it is not WithPrId, no prId injection\n                if (prediction.isInstanceOf[WithPrId]) {\n                  predictionJValue merge parse(s\"\"\"{\"prId\" : \"$newPrId\"}\"\"\")\n                } else {\n                  predictionJValue\n                }\n              } else predictionJValue\n\n              val pluginResult =\n                pluginContext.outputBlockers.values.foldLeft(result) { case (r, p) =>\n                  p.process(engineInstance, queryJValue, r, pluginContext)\n                }\n              pluginsActorRef ! (engineInstance, queryJValue, result)\n\n              // Bookkeeping\n              val servingEndTime = DateTime.now\n              lastServingSec =\n                (servingEndTime.getMillis - servingStartTime.getMillis) / 1000.0\n              avgServingSec =\n                ((avgServingSec * requestCount) + lastServingSec) /\n                (requestCount + 1)\n              requestCount += 1\n\n              complete(compact(render(pluginResult)))\n\n            } catch {\n              case e: MappingException =>\n                val msg = s\"Query:\\n$queryString\\n\\nStack Trace:\\n\" +\n                  s\"${ExceptionUtils.getStackTrace(e)}\\n\\n\"\n                log.error(msg)\n                args.logUrl map { url =>\n                  remoteLog(\n                    url,\n                    args.logPrefix.getOrElse(\"\"),\n                    msg)\n                  }\n                complete(StatusCodes.BadRequest, e.getMessage)\n              case e: Throwable =>\n                val msg = s\"Query:\\n$queryString\\n\\nStack Trace:\\n\" +\n                  s\"${ExceptionUtils.getStackTrace(e)}\\n\\n\"\n                log.error(msg)\n                args.logUrl map { url =>\n                  remoteLog(\n                    url,\n                    args.logPrefix.getOrElse(\"\"),\n                    msg)\n                  }\n                complete(StatusCodes.InternalServerError, msg)\n            }\n          }\n        }\n      } ~\n      path(\"reload\") {\n        authenticate(withAccessKeyFromFile) { request =>\n          post {\n            system.actorSelection(\"/user/master\") ! ReloadServer()\n            complete(\"Reloading...\")\n          }\n        }\n      } ~\n      path(\"stop\") {\n        authenticate(withAccessKeyFromFile) { request =>\n          post {\n            system.scheduler.scheduleOnce(1.seconds) {\n              system.actorSelection(\"/user/master\") ! StopServer()\n            }\n            complete(\"Shutting down...\")\n          }\n        }\n      } ~\n      pathPrefix(\"assets\") {\n        getFromResourceDirectory(\"assets\")\n      } ~\n      path(\"plugins.json\") {\n        import EngineServerJson4sSupport._\n        get {\n          complete(\n            Map(\"plugins\" -> Map(\n              \"outputblockers\" -> pluginContext.outputBlockers.map { case (n, p) =>\n                n -> Map(\n                  \"name\"        -> p.pluginName,\n                  \"description\" -> p.pluginDescription,\n                  \"class\"       -> p.getClass.getName,\n                  \"params\"      -> pluginContext.pluginParams(p.pluginName))\n              },\n              \"outputsniffers\" -> pluginContext.outputSniffers.map { case (n, p) =>\n                n -> Map(\n                  \"name\"        -> p.pluginName,\n                  \"description\" -> p.pluginDescription,\n                  \"class\"       -> p.getClass.getName,\n                  \"params\"      -> pluginContext.pluginParams(p.pluginName))\n              }\n            ))\n          )\n        }\n      } ~\n      path(\"plugins\" / Segments) { segments =>\n        import EngineServerJson4sSupport._\n        get {\n          val pluginArgs = segments.drop(2)\n          val pluginType = segments(0)\n          val pluginName = segments(1)\n          pluginType match {\n            case EngineServerPlugin.outputBlocker =>\n              complete(HttpResponse(entity = HttpEntity(\n                  `application/json`,\n                  pluginContext.outputBlockers(pluginName).handleREST(pluginArgs))))\n\n            case EngineServerPlugin.outputSniffer =>\n              complete(pluginsActorRef ? PluginsActor.HandleREST(\n                pluginName = pluginName,\n                pluginArgs = pluginArgs) map { json =>\n                HttpResponse(entity = HttpEntity(\n                  `application/json`,\n                  json.asInstanceOf[String]\n                ))\n              })\n          }\n        }\n      }\n\n    myRoute\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/CreateWorkflow.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\nimport java.net.URI\n\nimport com.github.nscala_time.time.Imports._\nimport com.google.common.io.ByteStreams\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.controller.Engine\nimport org.apache.predictionio.core.BaseEngine\nimport org.apache.predictionio.data.storage.EngineInstance\nimport org.apache.predictionio.data.storage.EvaluationInstance\nimport org.apache.predictionio.data.storage.Storage\nimport org.apache.predictionio.workflow.JsonExtractorOption.JsonExtractorOption\nimport org.apache.hadoop.conf.Configuration\nimport org.apache.hadoop.fs.FileSystem\nimport org.apache.hadoop.fs.Path\nimport org.json4s.JValue\nimport org.json4s.JString\nimport org.json4s.native.JsonMethods.parse\n\nimport scala.language.existentials\n\nobject CreateWorkflow extends Logging {\n\n  case class WorkflowConfig(\n    deployMode: String = \"\",\n    batch: String = \"\",\n    engineId: String = \"\",\n    engineVersion: String = \"\",\n    engineVariant: String = \"\",\n    engineFactory: String = \"\",\n    engineParamsKey: String = \"\",\n    evaluationClass: Option[String] = None,\n    engineParamsGeneratorClass: Option[String] = None,\n    env: Option[String] = None,\n    skipSanityCheck: Boolean = false,\n    stopAfterRead: Boolean = false,\n    stopAfterPrepare: Boolean = false,\n    verbosity: Int = 0,\n    verbose: Boolean = false,\n    debug: Boolean = false,\n    logFile: Option[String] = None,\n    jsonExtractor: JsonExtractorOption = JsonExtractorOption.Both)\n\n  case class AlgorithmParams(name: String, params: JValue)\n\n  private def stringFromFile(filePath: String): String = {\n    try {\n      val uri = new URI(filePath)\n      val fs = FileSystem.get(uri, new Configuration())\n      new String(ByteStreams.toByteArray(fs.open(new Path(uri))).map(_.toChar))\n    } catch {\n      case e: java.io.IOException =>\n        error(s\"Error reading from file: ${e.getMessage}. Aborting workflow.\")\n        sys.exit(1)\n    }\n  }\n\n  val parser = new scopt.OptionParser[WorkflowConfig](\"CreateWorkflow\") {\n    override def errorOnUnknownArgument: Boolean = false\n    opt[String](\"batch\") action { (x, c) =>\n      c.copy(batch = x)\n    } text(\"Batch label of the workflow run.\")\n    opt[String](\"engine-id\") required() action { (x, c) =>\n      c.copy(engineId = x)\n    } text(\"Engine's ID.\")\n    opt[String](\"engine-version\") required() action { (x, c) =>\n      c.copy(engineVersion = x)\n    } text(\"Engine's version.\")\n    opt[String](\"engine-variant\") required() action { (x, c) =>\n      c.copy(engineVariant = x)\n    } text(\"Engine variant JSON.\")\n    opt[String](\"evaluation-class\") action { (x, c) =>\n      c.copy(evaluationClass = Some(x))\n    } text(\"Class name of the run's evaluator.\")\n    opt[String](\"engine-params-generator-class\") action { (x, c) =>\n      c.copy(engineParamsGeneratorClass = Some(x))\n    } text(\"Path to evaluator parameters\")\n    opt[String](\"env\") action { (x, c) =>\n      c.copy(env = Some(x))\n    } text(\"Comma-separated list of environmental variables (in 'FOO=BAR' \" +\n      \"format) to pass to the Spark execution environment.\")\n    opt[Unit](\"verbose\") action { (x, c) =>\n      c.copy(verbose = true)\n    } text(\"Enable verbose output.\")\n    opt[Unit](\"debug\") action { (x, c) =>\n      c.copy(debug = true)\n    } text(\"Enable debug output.\")\n    opt[Unit](\"skip-sanity-check\") action { (x, c) =>\n      c.copy(skipSanityCheck = true)\n    }\n    opt[Unit](\"stop-after-read\") action { (x, c) =>\n      c.copy(stopAfterRead = true)\n    }\n    opt[Unit](\"stop-after-prepare\") action { (x, c) =>\n      c.copy(stopAfterPrepare = true)\n    }\n    opt[String](\"deploy-mode\") action { (x, c) =>\n      c.copy(deployMode = x)\n    }\n    opt[Int](\"verbosity\") action { (x, c) =>\n      c.copy(verbosity = x)\n    }\n    opt[String](\"engine-factory\") action { (x, c) =>\n      c.copy(engineFactory = x)\n    }\n    opt[String](\"engine-params-key\") action { (x, c) =>\n      c.copy(engineParamsKey = x)\n    }\n    opt[String](\"log-file\") action { (x, c) =>\n      c.copy(logFile = Some(x))\n    }\n    opt[String](\"json-extractor\") action { (x, c) =>\n      c.copy(jsonExtractor = JsonExtractorOption.withName(x))\n    }\n  }\n\n  def main(args: Array[String]): Unit = {\n    try {\n      val wfcOpt = parser.parse(args, WorkflowConfig())\n      if (wfcOpt.isEmpty) {\n        logger.error(\"WorkflowConfig is empty. Quitting\")\n        return\n      }\n\n      val wfc = wfcOpt.get\n\n      WorkflowUtils.modifyLogging(wfc.verbose)\n\n      val evaluation = wfc.evaluationClass.map { ec =>\n        try {\n          WorkflowUtils.getEvaluation(ec, getClass.getClassLoader)._2\n        } catch {\n          case e @ (_: ClassNotFoundException | _: NoSuchMethodException) =>\n            error(s\"Unable to obtain evaluation $ec. Aborting workflow.\", e)\n            sys.exit(1)\n        }\n      }\n\n      val engineParamsGenerator = wfc.engineParamsGeneratorClass.map { epg =>\n        try {\n          WorkflowUtils.getEngineParamsGenerator(epg, getClass.getClassLoader)._2\n        } catch {\n          case e @ (_: ClassNotFoundException | _: NoSuchMethodException) =>\n            error(s\"Unable to obtain engine parameters generator $epg. \" +\n              \"Aborting workflow.\", e)\n            sys.exit(1)\n        }\n      }\n\n      val pioEnvVars = wfc.env.map { e =>\n        e.split(',').flatMap { p =>\n          p.split('=') match {\n            case Array(k, v) => List(k -> v)\n            case _ => Nil\n          }\n        }.toMap\n      }.getOrElse(Map.empty)\n\n      if (evaluation.isEmpty) {\n        val variantJson = parse(stringFromFile(wfc.engineVariant))\n        val engineFactory = if (wfc.engineFactory == \"\") {\n          variantJson \\ \"engineFactory\" match {\n            case JString(s) => s\n            case _ =>\n              error(\"Unable to read engine factory class name from \" +\n                s\"${wfc.engineVariant}. Aborting.\")\n              sys.exit(1)\n          }\n        } else wfc.engineFactory\n        val variantId = variantJson \\ \"id\" match {\n          case JString(s) => s\n          case _ =>\n            error(\"Unable to read engine variant ID from \" +\n              s\"${wfc.engineVariant}. Aborting.\")\n            sys.exit(1)\n        }\n        val (engineLanguage, engineFactoryObj) = try {\n          WorkflowUtils.getEngine(engineFactory, getClass.getClassLoader)\n        } catch {\n          case e @ (_: ClassNotFoundException | _: NoSuchMethodException) =>\n            error(s\"Unable to obtain engine: ${e.getMessage}. Aborting workflow.\")\n            sys.exit(1)\n        }\n\n        val engine: BaseEngine[_, _, _, _] = engineFactoryObj()\n\n        val customSparkConf = WorkflowUtils.extractSparkConf(variantJson)\n        val workflowParams = WorkflowParams(\n          verbose = wfc.verbosity,\n          skipSanityCheck = wfc.skipSanityCheck,\n          stopAfterRead = wfc.stopAfterRead,\n          stopAfterPrepare = wfc.stopAfterPrepare,\n          sparkEnv = WorkflowParams().sparkEnv ++ customSparkConf)\n\n        // Evaluator Not Specified. Do training.\n        if (!engine.isInstanceOf[Engine[_,_,_,_,_,_]]) {\n          throw new NoSuchMethodException(s\"Engine $engine is not trainable\")\n        }\n\n        val trainableEngine = engine.asInstanceOf[Engine[_, _, _, _, _, _]]\n\n        val engineParams = if (wfc.engineParamsKey == \"\") {\n          trainableEngine.jValueToEngineParams(variantJson, wfc.jsonExtractor)\n        } else {\n          engineFactoryObj.engineParams(wfc.engineParamsKey)\n        }\n\n        val engineInstance = EngineInstance(\n          id = \"\",\n          status = \"INIT\",\n          startTime = DateTime.now,\n          endTime = DateTime.now,\n          engineId = wfc.engineId,\n          engineVersion = wfc.engineVersion,\n          engineVariant = variantId,\n          engineFactory = engineFactory,\n          batch = wfc.batch,\n          env = pioEnvVars,\n          sparkConf = workflowParams.sparkEnv,\n          dataSourceParams =\n            JsonExtractor.paramToJson(wfc.jsonExtractor, engineParams.dataSourceParams),\n          preparatorParams =\n            JsonExtractor.paramToJson(wfc.jsonExtractor, engineParams.preparatorParams),\n          algorithmsParams =\n            JsonExtractor.paramsToJson(wfc.jsonExtractor, engineParams.algorithmParamsList),\n          servingParams =\n            JsonExtractor.paramToJson(wfc.jsonExtractor, engineParams.servingParams))\n\n        val engineInstanceId = Storage.getMetaDataEngineInstances.insert(\n          engineInstance)\n\n        CoreWorkflow.runTrain(\n          env = pioEnvVars,\n          params = workflowParams,\n          engine = trainableEngine,\n          engineParams = engineParams,\n          engineInstance = engineInstance.copy(id = engineInstanceId))\n      } else {\n        val workflowParams = WorkflowParams(\n          verbose = wfc.verbosity,\n          skipSanityCheck = wfc.skipSanityCheck,\n          stopAfterRead = wfc.stopAfterRead,\n          stopAfterPrepare = wfc.stopAfterPrepare,\n          sparkEnv = WorkflowParams().sparkEnv)\n        val evaluationInstance = EvaluationInstance(\n          evaluationClass = wfc.evaluationClass.get,\n          engineParamsGeneratorClass = wfc.engineParamsGeneratorClass.get,\n          batch = wfc.batch,\n          env = pioEnvVars,\n          sparkConf = workflowParams.sparkEnv\n        )\n        Workflow.runEvaluation(\n          evaluation = evaluation.get,\n          engineParamsGenerator = engineParamsGenerator.get,\n          evaluationInstance = evaluationInstance,\n          params = workflowParams)\n      }\n    } finally {\n      CleanupFunctions.run()\n    }\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/EngineServerPlugin.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\nimport org.apache.predictionio.data.storage.EngineInstance\nimport org.json4s._\n\ntrait EngineServerPlugin {\n  val pluginName: String\n  val pluginDescription: String\n  val pluginType: String\n\n  def process(\n    engineInstance: EngineInstance,\n    query: JValue,\n    prediction: JValue,\n    context: EngineServerPluginContext): JValue\n\n  def handleREST(arguments: Seq[String]): String\n}\n\nobject EngineServerPlugin {\n  val outputBlocker = \"outputblocker\"\n  val outputSniffer = \"outputsniffer\"\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/EngineServerPluginContext.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\nimport java.net.URI\nimport java.util.ServiceLoader\n\nimport akka.event.LoggingAdapter\nimport com.google.common.io.ByteStreams\nimport grizzled.slf4j.Logging\nimport org.apache.hadoop.conf.Configuration\nimport org.apache.hadoop.fs.FileSystem\nimport org.apache.hadoop.fs.Path\nimport org.json4s.DefaultFormats\nimport org.json4s.Formats\nimport org.json4s.JObject\nimport org.json4s.JValue\nimport org.json4s.native.JsonMethods._\n\nimport scala.collection.JavaConversions._\nimport scala.collection.mutable\n\nclass EngineServerPluginContext(\n    val plugins: mutable.Map[String, mutable.Map[String, EngineServerPlugin]],\n    val pluginParams: mutable.Map[String, JValue],\n    val log: LoggingAdapter) {\n  def outputBlockers: Map[String, EngineServerPlugin] =\n    plugins.getOrElse(EngineServerPlugin.outputBlocker, Map.empty).toMap\n  def outputSniffers: Map[String, EngineServerPlugin] =\n    plugins.getOrElse(EngineServerPlugin.outputSniffer, Map.empty).toMap\n}\n\nobject EngineServerPluginContext extends Logging {\n  implicit val formats: Formats = DefaultFormats\n\n  def apply(log: LoggingAdapter, engineVariant: String): EngineServerPluginContext = {\n    val plugins = mutable.Map[String, mutable.Map[String, EngineServerPlugin]](\n      EngineServerPlugin.outputBlocker -> mutable.Map(),\n      EngineServerPlugin.outputSniffer -> mutable.Map())\n    val pluginParams = mutable.Map[String, JValue]()\n    val serviceLoader = ServiceLoader.load(classOf[EngineServerPlugin])\n    stringFromFile(engineVariant).foreach { variantJson =>\n      (parse(variantJson) \\ \"plugins\").extractOpt[JObject].foreach { pluginDefs =>\n        pluginDefs.obj.foreach { pluginParams += _ }\n      }\n    }\n    serviceLoader foreach { service =>\n      pluginParams.get(service.pluginName) map { params =>\n        if ((params \\ \"enabled\").extractOrElse(false)) {\n          info(s\"Plugin ${service.pluginName} is enabled.\")\n          plugins(service.pluginType) += service.pluginName -> service\n        } else {\n          info(s\"Plugin ${service.pluginName} is disabled.\")\n        }\n      } getOrElse {\n        info(s\"Plugin ${service.pluginName} is disabled.\")\n      }\n    }\n    new EngineServerPluginContext(\n      plugins,\n      pluginParams,\n      log)\n  }\n\n  private def stringFromFile(filePath: String): Option[String] = {\n    try {\n      val uri = new URI(filePath)\n      val fs = FileSystem.get(uri, new Configuration())\n      val path = new Path(uri)\n      if (fs.exists(path)) {\n        Some(new String(ByteStreams.toByteArray(fs.open(path)).map(_.toChar)))\n      } else {\n        None\n      }\n    } catch {\n      case e: java.io.IOException =>\n        error(s\"Error reading from file: ${e.getMessage}. Aborting.\")\n        sys.exit(1)\n    }\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/EngineServerPluginsActor.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\nimport akka.actor.Actor\nimport akka.event.Logging\nimport org.apache.predictionio.data.storage.EngineInstance\nimport org.json4s.JValue\n\nclass PluginsActor(engineVariant: String) extends Actor {\n  implicit val system = context.system\n  val log = Logging(system, this)\n\n  val pluginContext = EngineServerPluginContext(log, engineVariant)\n\n  override def receive: PartialFunction[Any, Unit] = {\n    case (ei: EngineInstance, q: JValue, p: JValue) =>\n      pluginContext.outputSniffers.values.foreach(_.process(ei, q, p, pluginContext))\n    case h: PluginsActor.HandleREST =>\n      try {\n        sender() ! pluginContext.outputSniffers(h.pluginName).handleREST(h.pluginArgs)\n      } catch {\n        case e: Exception =>\n          sender() ! s\"\"\"{\"message\":\"${e.getMessage}\"}\"\"\"\n      }\n    case _ =>\n      log.error(\"Unknown message sent to the Engine Server output sniffer plugin host.\")\n  }\n}\n\nobject PluginsActor {\n  case class HandleREST(pluginName: String, pluginArgs: Seq[String])\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/EvaluationWorkflow.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.Evaluation\nimport org.apache.predictionio.core.BaseEvaluator\nimport org.apache.predictionio.core.BaseEvaluatorResult\nimport org.apache.predictionio.core.BaseEngine\n\nimport grizzled.slf4j.Logger\nimport org.apache.spark.SparkContext\n\nimport scala.language.existentials\n\nobject EvaluationWorkflow {\n  @transient lazy val logger = Logger[this.type]\n  def runEvaluation[EI, Q, P, A, R <: BaseEvaluatorResult](\n      sc: SparkContext,\n      evaluation: Evaluation,\n      engine: BaseEngine[EI, Q, P, A],\n      engineParamsList: Seq[EngineParams],\n      evaluator: BaseEvaluator[EI, Q, P, A, R],\n      params: WorkflowParams)\n    : R = {\n    val engineEvalDataSet = engine.batchEval(sc, engineParamsList, params)\n    evaluator.evaluateBase(sc, evaluation, engineEvalDataSet, params)\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/FakeWorkflow.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\nimport org.apache.predictionio.annotation.Experimental\n// FIXME(yipjustin): Remove wildcard import.\nimport org.apache.predictionio.core._\nimport org.apache.predictionio.controller._\n\nimport grizzled.slf4j.Logger\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\n\n@Experimental\nprivate[predictionio] class FakeEngine\nextends BaseEngine[EmptyParams, EmptyParams, EmptyParams, EmptyParams] {\n  @transient lazy val logger = Logger[this.type]\n\n  override def train(\n    sc: SparkContext,\n    engineParams: EngineParams,\n    engineInstanceId: String,\n    params: WorkflowParams): Seq[Any] = {\n    throw new StopAfterReadInterruption()\n  }\n\n  override def eval(\n    sc: SparkContext,\n    engineParams: EngineParams,\n    params: WorkflowParams)\n  : Seq[(EmptyParams, RDD[(EmptyParams, EmptyParams, EmptyParams)])] = {\n    return Seq[(EmptyParams, RDD[(EmptyParams, EmptyParams, EmptyParams)])]()\n  }\n}\n\n@Experimental\nprivate[predictionio] class FakeRunner(f: (SparkContext => Unit))\n    extends BaseEvaluator[EmptyParams, EmptyParams, EmptyParams, EmptyParams,\n      FakeEvalResult] {\n  @transient private lazy val logger = Logger[this.type]\n  override def evaluateBase(\n    sc: SparkContext,\n    evaluation: Evaluation,\n    engineEvalDataSet:\n        Seq[(EngineParams, Seq[(EmptyParams, RDD[(EmptyParams, EmptyParams, EmptyParams)])])],\n    params: WorkflowParams): FakeEvalResult = {\n    f(sc)\n    FakeEvalResult()\n  }\n}\n\n@Experimental\nprivate[predictionio] case class FakeEvalResult() extends BaseEvaluatorResult {\n  override val noSave: Boolean = true\n}\n\n/** FakeRun allows user to implement custom function under the exact environment\n  * as other PredictionIO workflow.\n  *\n  * Useful for developing new features. Only need to extend this trait and\n  * implement a function: (SparkContext => Unit). For example, the code below\n  * can be run with `pio eval HelloWorld`.\n  *\n  * {{{\n  * object HelloWorld extends FakeRun {\n  *   // func defines the function pio runs, must have signature (SparkContext => Unit).\n  *   func = f\n  *\n  *   def f(sc: SparkContext): Unit {\n  *     val logger = Logger[this.type]\n  *     logger.info(\"HelloWorld\")\n  *   }\n  * }\n  * }}}\n  *\n  */\n@Experimental\ntrait FakeRun extends Evaluation with EngineParamsGenerator {\n  private[this] var _runner: FakeRunner = _\n\n  def runner: FakeRunner = _runner\n  def runner_=(r: FakeRunner) {\n    engineEvaluator = (new FakeEngine(), r)\n    engineParamsList = Seq(new EngineParams())\n  }\n\n  def func: (SparkContext => Unit) = { (sc: SparkContext) => () }\n  def func_=(f: SparkContext => Unit) {\n    runner = new FakeRunner(f)\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/JsonExtractor.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\nimport com.google.gson.Gson\nimport com.google.gson.GsonBuilder\nimport com.google.gson.TypeAdapterFactory\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.controller.Utils\nimport org.apache.predictionio.workflow.JsonExtractorOption.JsonExtractorOption\nimport org.json4s.Extraction\nimport org.json4s.Formats\nimport org.json4s.JsonAST.{JArray, JValue}\nimport org.json4s.native.JsonMethods.compact\nimport org.json4s.native.JsonMethods.pretty\nimport org.json4s.native.JsonMethods.parse\nimport org.json4s.native.JsonMethods.render\n\nobject JsonExtractor {\n\n  def toJValue(\n    extractorOption: JsonExtractorOption,\n    o: Any,\n    json4sFormats: Formats = Utils.json4sDefaultFormats,\n    gsonTypeAdapterFactories: Seq[TypeAdapterFactory] = Seq.empty[TypeAdapterFactory]): JValue = {\n\n    extractorOption match {\n      case JsonExtractorOption.Both =>\n\n          val json4sResult = Extraction.decompose(o)(json4sFormats)\n          json4sResult.children.size match {\n            case 0 => parse(gson(gsonTypeAdapterFactories).toJson(o))\n            case _ => json4sResult\n          }\n      case JsonExtractorOption.Json4sNative =>\n        Extraction.decompose(o)(json4sFormats)\n      case JsonExtractorOption.Gson =>\n        parse(gson(gsonTypeAdapterFactories).toJson(o))\n    }\n  }\n\n  def extract[T](\n    extractorOption: JsonExtractorOption,\n    json: String,\n    clazz: Class[T],\n    json4sFormats: Formats = Utils.json4sDefaultFormats,\n    gsonTypeAdapterFactories: Seq[TypeAdapterFactory] = Seq.empty[TypeAdapterFactory]): T = {\n\n    extractorOption match {\n      case JsonExtractorOption.Both =>\n        try {\n          extractWithJson4sNative(json, json4sFormats, clazz)\n        } catch {\n          case e: Exception =>\n            extractWithGson(json, clazz, gsonTypeAdapterFactories)\n        }\n      case JsonExtractorOption.Json4sNative =>\n        extractWithJson4sNative(json, json4sFormats, clazz)\n      case JsonExtractorOption.Gson =>\n        extractWithGson(json, clazz, gsonTypeAdapterFactories)\n    }\n  }\n\n  def paramToJson(extractorOption: JsonExtractorOption, param: (String, Params)): String = {\n    // to be replaced JValue needs to be done by Json4s, otherwise the tuple JValue will be wrong\n    val toBeReplacedJValue =\n      JsonExtractor.toJValue(JsonExtractorOption.Json4sNative, (param._1, null))\n    val paramJValue = JsonExtractor.toJValue(extractorOption, param._2)\n\n    compact(render(toBeReplacedJValue.replace(param._1 :: Nil, paramJValue)))\n  }\n\n  def paramsToJson(extractorOption: JsonExtractorOption, params: Seq[(String, Params)]): String = {\n    compact(render(paramsToJValue(extractorOption, params)))\n  }\n\n  def engineParamsToJson(extractorOption: JsonExtractorOption, params: EngineParams) : String = {\n    compact(render(engineParamsToJValue(extractorOption, params)))\n  }\n\n  def engineParamstoPrettyJson(\n    extractorOption: JsonExtractorOption,\n    params: EngineParams) : String = {\n\n    pretty(render(engineParamsToJValue(extractorOption, params)))\n  }\n\n  private def engineParamsToJValue(extractorOption: JsonExtractorOption, params: EngineParams) = {\n    var jValue = toJValue(JsonExtractorOption.Json4sNative, params)\n\n    val dataSourceParamsJValue = toJValue(extractorOption, params.dataSourceParams._2)\n    jValue = jValue.replace(\n      \"dataSourceParams\" :: params.dataSourceParams._1 :: Nil,\n      dataSourceParamsJValue)\n\n    val preparatorParamsJValue = toJValue(extractorOption, params.preparatorParams._2)\n    jValue = jValue.replace(\n      \"preparatorParams\" :: params.preparatorParams._1 :: Nil,\n      preparatorParamsJValue)\n\n    val algorithmParamsJValue = paramsToJValue(extractorOption, params.algorithmParamsList)\n    jValue = jValue.replace(\"algorithmParamsList\" :: Nil, algorithmParamsJValue)\n\n    val servingParamsJValue = toJValue(extractorOption, params.servingParams._2)\n    jValue = jValue.replace(\"servingParams\" :: params.servingParams._1 :: Nil, servingParamsJValue)\n\n    jValue\n  }\n\n  private\n  def paramsToJValue(extractorOption: JsonExtractorOption, params: Seq[(String, Params)]) = {\n    val jValues = params.map { case (name, param) =>\n      // to be replaced JValue needs to be done by Json4s, otherwise the tuple JValue will be wrong\n      val toBeReplacedJValue =\n        JsonExtractor.toJValue(JsonExtractorOption.Json4sNative, (name, null))\n      val paramJValue = JsonExtractor.toJValue(extractorOption, param)\n\n      toBeReplacedJValue.replace(name :: Nil, paramJValue)\n    }\n\n    JArray(jValues.toList)\n  }\n\n  private def extractWithJson4sNative[T](\n    json: String,\n    formats: Formats,\n    clazz: Class[T]): T = {\n\n    implicit val f = formats\n    implicit val m = if (clazz == classOf[Map[_, _]]) {\n      Manifest.classType(clazz, manifest[String], manifest[Any])\n    } else {\n      Manifest.classType(clazz)\n    }\n    Extraction.extract(parse(json))\n  }\n\n  private def extractWithGson[T](\n    json: String,\n    clazz: Class[T],\n    gsonTypeAdapterFactories: Seq[TypeAdapterFactory]): T = {\n\n    gson(gsonTypeAdapterFactories).fromJson(json, clazz)\n  }\n\n  private def gson(gsonTypeAdapterFactories: Seq[TypeAdapterFactory]): Gson = {\n    val gsonBuilder = new GsonBuilder()\n    gsonTypeAdapterFactories.foreach { typeAdapterFactory =>\n      gsonBuilder.registerTypeAdapterFactory(typeAdapterFactory)\n    }\n\n    gsonBuilder.create()\n  }\n\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/JsonExtractorOption.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\nobject JsonExtractorOption extends Enumeration {\n  type JsonExtractorOption = Value\n  val Json4sNative = Value\n  val Gson = Value\n  val Both = Value\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/PersistentModelManifest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\ncase class PersistentModelManifest(className: String)\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/Workflow.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\nimport org.apache.predictionio.annotation.Experimental\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.EngineParamsGenerator\nimport org.apache.predictionio.controller.Evaluation\nimport org.apache.predictionio.core.BaseEngine\nimport org.apache.predictionio.core.BaseEvaluator\nimport org.apache.predictionio.core.BaseEvaluatorResult\nimport org.apache.predictionio.data.storage.EvaluationInstance\n\n/** Collection of workflow creation methods.\n  * @group Workflow\n  */\nobject Workflow {\n  // evaluator is already instantiated.\n  // This is an undocumented way of using evaluator. Still experimental.\n  // evaluatorParams is used to write into EngineInstance, will be shown in\n  // dashboard.\n  /*\n  def runEval[EI, Q, P, A, ER <: AnyRef](\n      engine: BaseEngine[EI, Q, P, A],\n      engineParams: EngineParams,\n      evaluator: BaseEvaluator[EI, Q, P, A, ER],\n      evaluatorParams: Params,\n      env: Map[String, String] = WorkflowUtils.pioEnvVars,\n      params: WorkflowParams = WorkflowParams()) {\n\n    implicit lazy val formats = Utils.json4sDefaultFormats +\n      new NameParamsSerializer\n\n    val engineInstance = EngineInstance(\n      id = \"\",\n      status = \"INIT\",\n      startTime = DateTime.now,\n      endTime = DateTime.now,\n      engineId = \"\",\n      engineVersion = \"\",\n      engineVariant = \"\",\n      engineFactory = \"FIXME\",\n      evaluatorClass = evaluator.getClass.getName(),\n      batch = params.batch,\n      env = env,\n      sparkConf = params.sparkEnv,\n      dataSourceParams = write(engineParams.dataSourceParams),\n      preparatorParams = write(engineParams.preparatorParams),\n      algorithmsParams = write(engineParams.algorithmParamsList),\n      servingParams = write(engineParams.servingParams),\n      evaluatorParams = write(evaluatorParams),\n      evaluatorResults = \"\",\n      evaluatorResultsHTML = \"\",\n      evaluatorResultsJSON = \"\")\n\n    CoreWorkflow.runEval(\n      engine = engine,\n      engineParams = engineParams,\n      engineInstance = engineInstance,\n      evaluator = evaluator,\n      evaluatorParams = evaluatorParams,\n      env = env,\n      params = params)\n  }\n  */\n\n  def runEvaluation(\n      evaluation: Evaluation,\n      engineParamsGenerator: EngineParamsGenerator,\n      env: Map[String, String] = WorkflowUtils.pioEnvVars,\n      evaluationInstance: EvaluationInstance = EvaluationInstance(),\n      params: WorkflowParams = WorkflowParams()) {\n    runEvaluationTypeless(\n      evaluation = evaluation,\n      engine = evaluation.engine,\n      engineParamsList = engineParamsGenerator.engineParamsList,\n      evaluationInstance = evaluationInstance,\n      evaluator = evaluation.evaluator,\n      env = env,\n      params = params\n    )\n  }\n\n  def runEvaluationTypeless[\n      EI, Q, P, A, EEI, EQ, EP, EA, ER <: BaseEvaluatorResult](\n      evaluation: Evaluation,\n      engine: BaseEngine[EI, Q, P, A],\n      engineParamsList: Seq[EngineParams],\n      evaluationInstance: EvaluationInstance,\n      evaluator: BaseEvaluator[EEI, EQ, EP, EA, ER],\n      env: Map[String, String] = WorkflowUtils.pioEnvVars,\n      params: WorkflowParams = WorkflowParams()) {\n    runEvaluationViaCoreWorkflow(\n      evaluation = evaluation,\n      engine = engine,\n      engineParamsList = engineParamsList,\n      evaluationInstance = evaluationInstance,\n      evaluator = evaluator.asInstanceOf[BaseEvaluator[EI, Q, P, A, ER]],\n      env = env,\n      params = params)\n  }\n\n  /** :: Experimental :: */\n  @Experimental\n  def runEvaluationViaCoreWorkflow[EI, Q, P, A, R <: BaseEvaluatorResult](\n      evaluation: Evaluation,\n      engine: BaseEngine[EI, Q, P, A],\n      engineParamsList: Seq[EngineParams],\n      evaluationInstance: EvaluationInstance,\n      evaluator: BaseEvaluator[EI, Q, P, A, R],\n      env: Map[String, String] = WorkflowUtils.pioEnvVars,\n      params: WorkflowParams = WorkflowParams()) {\n    CoreWorkflow.runEvaluation(\n      evaluation = evaluation,\n      engine = engine,\n      engineParamsList = engineParamsList,\n      evaluationInstance = evaluationInstance,\n      evaluator = evaluator,\n      env = env,\n      params = params)\n  }\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/WorkflowContext.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\nimport grizzled.slf4j.Logging\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkConf\n\nimport scala.language.existentials\n\n// FIXME: move to better location.\nobject WorkflowContext extends Logging {\n  def apply(\n      batch: String = \"\",\n      executorEnv: Map[String, String] = Map.empty,\n      sparkEnv: Map[String, String] = Map.empty,\n      mode: String = \"\"\n    ): SparkContext = {\n    val conf = new SparkConf()\n    val prefix = if (mode == \"\") \"PredictionIO\" else s\"PredictionIO ${mode}\"\n    conf.setAppName(s\"${prefix}: ${batch}\")\n    debug(s\"Executor environment received: ${executorEnv}\")\n    executorEnv.map(kv => conf.setExecutorEnv(kv._1, kv._2))\n    debug(s\"SparkConf executor environment: ${conf.getExecutorEnv}\")\n    debug(s\"Application environment received: ${sparkEnv}\")\n    conf.setAll(sparkEnv)\n    val sparkConfString = conf.getAll.toSeq\n    debug(s\"SparkConf environment: $sparkConfString\")\n    new SparkContext(conf)\n  }\n}\n\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/WorkflowParams.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\n/** Workflow parameters.\n  *\n  * @param batch Batch label of the run.\n  * @param verbose Verbosity level.\n  * @param saveModel Controls whether trained models are persisted.\n  * @param sparkEnv Spark properties that will be set in SparkConf.setAll().\n  * @param skipSanityCheck Skips all data sanity check.\n  * @param stopAfterRead Stops workflow after reading from data source.\n  * @param stopAfterPrepare Stops workflow after data preparation.\n  * @group Workflow\n  */\ncase class WorkflowParams(\n  batch: String = \"\",\n  verbose: Int = 2,\n  saveModel: Boolean = true,\n  sparkEnv: Map[String, String] =\n    Map[String, String](\"spark.executor.extraClassPath\" -> \".\"),\n  skipSanityCheck: Boolean = false,\n  stopAfterRead: Boolean = false,\n  stopAfterPrepare: Boolean = false) {\n  // Temporary workaround for WorkflowParamsBuilder for Java. It doesn't support\n  // custom spark environment yet.\n  def this(batch: String, verbose: Int, saveModel: Boolean)\n  = this(batch, verbose, saveModel, Map[String, String]())\n}\n"
  },
  {
    "path": "core/src/main/scala/org/apache/predictionio/workflow/WorkflowUtils.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\nimport java.io.File\nimport java.net.URI\n\nimport com.google.gson.{Gson, JsonSyntaxException}\nimport grizzled.slf4j.Logging\nimport org.apache.log4j.{Level, LogManager}\nimport org.apache.predictionio.controller._\nimport org.apache.predictionio.workflow.JsonExtractorOption.JsonExtractorOption\nimport org.apache.spark.SparkContext\nimport org.apache.spark.api.java.JavaRDDLike\nimport org.apache.spark.rdd.RDD\nimport org.json4s.JsonAST.JValue\nimport org.json4s._\nimport org.json4s.native.JsonMethods._\n\nimport scala.language.existentials\nimport scala.reflect.runtime.universe\n\n/** Collection of reusable workflow related utilities. */\nobject WorkflowUtils extends Logging {\n  @transient private lazy val gson = new Gson\n\n  /** Obtains an Engine object in Scala, or instantiate an Engine in Java.\n    *\n    * @param engine Engine factory name.\n    * @param cl A Java ClassLoader to look for engine-related classes.\n    *\n    * @throws ClassNotFoundException\n    *         Thrown when engine factory class does not exist.\n    * @throws NoSuchMethodException\n    *         Thrown when engine factory's apply() method is not implemented.\n    */\n  def getEngine(engine: String, cl: ClassLoader): (EngineLanguage.Value, EngineFactory) = {\n    val runtimeMirror = universe.runtimeMirror(cl)\n    val engineModule = runtimeMirror.staticModule(engine)\n    val engineObject = runtimeMirror.reflectModule(engineModule)\n    try {\n      (\n        EngineLanguage.Scala,\n        engineObject.instance.asInstanceOf[EngineFactory]\n      )\n    } catch {\n      case e @ (_: NoSuchFieldException | _: ClassNotFoundException) =>\n        (\n          EngineLanguage.Java,\n          Class.forName(engine).newInstance.asInstanceOf[EngineFactory]\n        )\n    }\n  }\n\n  def getEngineParamsGenerator(epg: String, cl: ClassLoader):\n    (EngineLanguage.Value, EngineParamsGenerator) = {\n    val runtimeMirror = universe.runtimeMirror(cl)\n    val epgModule = runtimeMirror.staticModule(epg)\n    val epgObject = runtimeMirror.reflectModule(epgModule)\n    try {\n      (\n        EngineLanguage.Scala,\n        epgObject.instance.asInstanceOf[EngineParamsGenerator]\n      )\n    } catch {\n      case e @ (_: NoSuchFieldException | _: ClassNotFoundException) =>\n        (\n          EngineLanguage.Java,\n          Class.forName(epg).newInstance.asInstanceOf[EngineParamsGenerator]\n        )\n    }\n  }\n\n  def getEvaluation(evaluation: String, cl: ClassLoader): (EngineLanguage.Value, Evaluation) = {\n    val runtimeMirror = universe.runtimeMirror(cl)\n    val evaluationModule = runtimeMirror.staticModule(evaluation)\n    val evaluationObject = runtimeMirror.reflectModule(evaluationModule)\n    try {\n      (\n        EngineLanguage.Scala,\n        evaluationObject.instance.asInstanceOf[Evaluation]\n      )\n    } catch {\n      case e @ (_: NoSuchFieldException | _: ClassNotFoundException) =>\n        (\n          EngineLanguage.Java,\n          Class.forName(evaluation).newInstance.asInstanceOf[Evaluation]\n        )\n    }\n  }\n\n  /** Converts a JSON document to an instance of Params.\n    *\n    * @param language Engine's programming language.\n    * @param json JSON document.\n    * @param clazz Class of the component that is going to receive the resulting\n    *              Params instance as a constructor argument.\n    * @param jsonExtractor JSON extractor option.\n    * @param formats JSON4S serializers for deserialization.\n    *\n    * @throws MappingException Thrown when JSON4S fails to perform conversion.\n    * @throws JsonSyntaxException Thrown when GSON fails to perform conversion.\n    */\n  def extractParams(\n      language: EngineLanguage.Value = EngineLanguage.Scala,\n      json: String,\n      clazz: Class[_],\n      jsonExtractor: JsonExtractorOption,\n      formats: Formats = Utils.json4sDefaultFormats): Params = {\n    implicit val f = formats\n    val pClass = clazz.getConstructors.head.getParameterTypes\n    if (pClass.size == 0) {\n      if (json != \"\") {\n        warn(s\"Non-empty parameters supplied to ${clazz.getName}, but its \" +\n          \"constructor does not accept any arguments. Stubbing with empty \" +\n          \"parameters.\")\n      }\n      EmptyParams()\n    } else {\n      val apClass = pClass.head\n      try {\n        JsonExtractor.extract(jsonExtractor, json, apClass, f).asInstanceOf[Params]\n      } catch {\n        case e@(_: MappingException | _: JsonSyntaxException) =>\n          error(\n            s\"Unable to extract parameters for ${apClass.getName} from \" +\n              s\"JSON string: $json. Aborting workflow.\",\n            e)\n          throw e\n      }\n    }\n  }\n\n  def getParamsFromJsonByFieldAndClass(\n      variantJson: JValue,\n      field: String,\n      classMap: Map[String, Class[_]],\n      engineLanguage: EngineLanguage.Value,\n      jsonExtractor: JsonExtractorOption): (String, Params) = {\n    variantJson findField {\n      case JField(f, _) => f == field\n      case _ => false\n    } map { jv =>\n      implicit lazy val formats = Utils.json4sDefaultFormats + new NameParamsSerializer\n      val np: NameParams = try {\n        jv._2.extract[NameParams]\n      } catch {\n        case e: Exception =>\n          error(s\"Unable to extract $field name and params $jv\")\n          throw e\n      }\n      val extractedParams = np.params.map { p =>\n        try {\n          if (!classMap.contains(np.name)) {\n            error(s\"Unable to find $field class with name '${np.name}'\" +\n              \" defined in Engine.\")\n            sys.exit(1)\n          }\n          WorkflowUtils.extractParams(\n            engineLanguage,\n            compact(render(p)),\n            classMap(np.name),\n            jsonExtractor,\n            formats)\n        } catch {\n          case e: Exception =>\n            error(s\"Unable to extract $field params $p\")\n            throw e\n        }\n      }.getOrElse(EmptyParams())\n\n      (np.name, extractedParams)\n    } getOrElse(\"\", EmptyParams())\n  }\n\n  /** Grab environmental variables that starts with 'PIO_'. */\n  def pioEnvVars: Map[String, String] =\n    sys.env.filter(kv => kv._1.startsWith(\"PIO_\"))\n\n  /** Converts Java (non-Scala) objects to a JSON4S JValue.\n    *\n    * @param params The Java object to be converted.\n    */\n  def javaObjectToJValue(params: AnyRef): JValue = parse(gson.toJson(params))\n\n  // Extract debug string by recursively traversing the data.\n  def debugString[D](data: D): String = {\n    val s: String = data match {\n      case rdd: RDD[_] => {\n        debugString(rdd.collect())\n      }\n      case javaRdd: JavaRDDLike[_, _] => {\n        debugString(javaRdd.collect())\n      }\n      case array: Array[_] => {\n        \"[\" + array.map(debugString).mkString(\",\") + \"]\"\n      }\n      case d: AnyRef => {\n        d.toString\n      }\n      case null => \"null\"\n    }\n    s\n  }\n\n  /** Detect third party software configuration files to be submitted as\n    * extras to Apache Spark. This makes sure all executors receive the same\n    * configuration.\n    */\n  def thirdPartyConfFiles: Seq[String] = {\n    val thirdPartyFiles = Map(\n      \"PIO_CONF_DIR\" -> \"log4j.properties\",\n      \"ES_CONF_DIR\" -> \"elasticsearch.yml\",\n      \"HADOOP_CONF_DIR\" -> \"core-site.xml\",\n      \"HBASE_CONF_DIR\" -> \"hbase-site.xml\")\n\n    thirdPartyFiles.keys.toSeq.flatMap { k: String =>\n      sys.env.get(k) map { x =>\n        val p = Seq(x, thirdPartyFiles(k)).mkString(File.separator)\n        if (new File(p).exists) Seq(p) else Seq[String]()\n      } getOrElse Seq[String]()\n    }\n  }\n\n  def thirdPartyClasspaths: Seq[String] = {\n    val thirdPartyPaths = Seq(\n      \"PIO_CONF_DIR\",\n      \"ES_CONF_DIR\",\n      \"POSTGRES_JDBC_DRIVER\",\n      \"MYSQL_JDBC_DRIVER\",\n      \"HADOOP_CONF_DIR\",\n      \"HBASE_CONF_DIR\")\n    thirdPartyPaths.flatMap(p =>\n      sys.env.get(p).map(Seq(_)).getOrElse(Seq[String]())\n    )\n  }\n\n  def thirdPartyJars: Seq[URI] = {\n    val thirdPartyPaths = Seq(\n      \"POSTGRES_JDBC_DRIVER\",\n      \"MYSQL_JDBC_DRIVER\")\n    thirdPartyPaths.flatMap(p =>\n      sys.env.get(p) map { f =>\n        val file = new File(f)\n        if (file.exists()) {\n          Seq(file.toURI)\n        } else {\n          warn(s\"Environment variable $p is pointing to a nonexistent file $f. Ignoring.\")\n          Seq.empty\n        }\n      } getOrElse Seq.empty\n    )\n  }\n\n  def modifyLogging(verbose: Boolean): Unit = {\n    val rootLoggerLevel = if (verbose) Level.TRACE else Level.INFO\n    val chattyLoggerLevel = if (verbose) Level.INFO else Level.WARN\n\n    LogManager.getRootLogger.setLevel(rootLoggerLevel)\n\n    LogManager.getLogger(\"org.elasticsearch\").setLevel(chattyLoggerLevel)\n    LogManager.getLogger(\"org.apache.hadoop\").setLevel(chattyLoggerLevel)\n    LogManager.getLogger(\"org.apache.spark\").setLevel(chattyLoggerLevel)\n    LogManager.getLogger(\"org.eclipse.jetty\").setLevel(chattyLoggerLevel)\n    LogManager.getLogger(\"akka\").setLevel(chattyLoggerLevel)\n  }\n\n  def extractNameParams(jv: JValue): NameParams = {\n    implicit val formats = Utils.json4sDefaultFormats\n    val nameOpt = (jv \\ \"name\").extract[Option[String]]\n    val paramsOpt = (jv \\ \"params\").extract[Option[JValue]]\n\n    if (nameOpt.isEmpty && paramsOpt.isEmpty) {\n      error(\"Unable to find 'name' or 'params' fields in\" +\n        s\" ${compact(render(jv))}.\\n\" +\n        \"Since 0.8.4, the 'params' field is required in engine.json\" +\n        \" in order to specify parameters for DataSource, Preparator or\" +\n        \" Serving.\\n\" +\n        \"Please go to http://predictionio.apache.org/resources/upgrade/\" +\n        \" for detailed instruction of how to change engine.json.\")\n      sys.exit(1)\n    }\n\n    if (nameOpt.isEmpty) {\n      info(s\"No 'name' is found. Default empty String will be used.\")\n    }\n\n    if (paramsOpt.isEmpty) {\n      info(s\"No 'params' is found. Default EmptyParams will be used.\")\n    }\n\n    NameParams(\n      name = nameOpt.getOrElse(\"\"),\n      params = paramsOpt\n    )\n  }\n\n  def extractSparkConf(root: JValue): List[(String, String)] = {\n    def flatten(jv: JValue): List[(List[String], String)] = {\n      jv match {\n        case JObject(fields) =>\n          for ((namePrefix, childJV) <- fields;\n               (name, value) <- flatten(childJV))\n          yield (namePrefix :: name) -> value\n        case JArray(_) => {\n          error(\"Arrays are not allowed in the sparkConf section of engine.js.\")\n          sys.exit(1)\n        }\n        case JNothing => Nil\n        case _ => List(Nil -> jv.values.toString)\n      }\n    }\n\n    flatten(root \\ \"sparkConf\").map(x =>\n      (x._1.reduce((a, b) => s\"$a.$b\"), x._2))\n  }\n}\n\ncase class NameParams(name: String, params: Option[JValue])\n\nclass NameParamsSerializer extends CustomSerializer[NameParams](format => ( {\n  case jv: JValue => WorkflowUtils.extractNameParams(jv)\n}, {\n  case x: NameParams =>\n    JObject(JField(\"name\", JString(x.name)) ::\n      JField(\"params\", x.params.getOrElse(JNothing)) :: Nil)\n}\n  ))\n\n/** Collection of reusable workflow related utilities that touch on Apache\n  * Spark. They are separated to avoid compilation problems with certain code.\n  */\nobject SparkWorkflowUtils extends Logging {\n  def getPersistentModel[AP <: Params, M](\n      pmm: PersistentModelManifest,\n      runId: String,\n      params: AP,\n      sc: Option[SparkContext],\n      cl: ClassLoader): M = {\n    val runtimeMirror = universe.runtimeMirror(cl)\n    val pmmModule = runtimeMirror.staticModule(pmm.className)\n    val pmmObject = runtimeMirror.reflectModule(pmmModule)\n    try {\n      pmmObject.instance.asInstanceOf[PersistentModelLoader[AP, M]](\n        runId,\n        params,\n        sc)\n    } catch {\n      case e @ (_: NoSuchFieldException | _: ClassNotFoundException) => try {\n        val loadMethod = Class.forName(pmm.className).getMethod(\n          \"load\",\n          classOf[String],\n          classOf[Params],\n          classOf[SparkContext])\n        loadMethod.invoke(null, runId, params, sc.orNull).asInstanceOf[M]\n      } catch {\n        case e: ClassNotFoundException =>\n          error(s\"Model class ${pmm.className} cannot be found.\")\n          throw e\n        case e: NoSuchMethodException =>\n          error(\n            \"The load(String, Params, SparkContext) method cannot be found.\")\n          throw e\n      }\n    }\n  }\n}\n\nclass WorkflowInterruption() extends Exception\n\ncase class StopAfterReadInterruption() extends WorkflowInterruption\n\ncase class StopAfterPrepareInterruption() extends WorkflowInterruption\n\nobject EngineLanguage extends Enumeration {\n  val Scala, Java = Value\n}\n"
  },
  {
    "path": "core/src/main/twirl/org/apache/predictionio/controller/metric_evaluator.scala.html",
    "content": "<!--\n  Licensed to the Apache Software Foundation (ASF) under one or more\n  contributor license agreements.  See the NOTICE file distributed with\n  this work for additional information regarding copyright ownership.\n  The ASF licenses this file to You under the Apache License, Version 2.0\n  (the \"License\"); you may not use this file except in compliance with\n  the License.  You may obtain a copy of the License at\n\n      http://www.apache.org/licenses/LICENSE-2.0\n\n  Unless required by applicable law or agreed to in writing, software\n  distributed under the License is distributed on an \"AS IS\" BASIS,\n  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n  See the License for the specific language governing permissions and\n  limitations under the License.\n-->\n<html>\n  <head>\n    <script type='text/javascript' src='https://www.google.com/jsapi'></script>\n    <script src=\"http://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js\"></script>\n    <script>\n      google.load('visualization', '1', {packages:['table', 'corechart',]});\n    </script>\n  </head>\n  <body>\n    <h1>Metric Evaluator</h1>\n    <div id='debug'></div>\n    <div id='table'>\n      <h3>Engine Params Evaluation Results</h3>\n      <div>Click on table to view the engine params</div>\n    </div>\n    <pre id='engineParams'></div>\n    <script type='text/javascript'>\n      google.setOnLoadCallback(load);\n\n      //var url =  'http://localhost:9000/engine_instances/ky01Q-glQheNE_s885JTSg/local_evaluator_results.json';\n      var url = 'evaluator_results.json';\n      var rawData;\n      var metricHeader;\n      var otherMetricHeaders;\n      var engineParamsScores;\n      var table;\n      var dataTable;\n\n      function load() {\n        rawData = JSON.parse(\n            jQuery.ajax({\n              url: url,\n              dataType: 'json',\n              async: false,\n              }).responseText);\n\n        metricHeader = rawData['metricHeader'];\n        otherMetricHeaders = rawData['otherMetricHeaders'];\n        engineParamsScores = rawData['engineParamsScores'];\n\n        drawTable();\n      }\n\n      function tableSelectHandler() {\n        var selection = table.getSelection();\n        if (selection.length > 0) {\n          var row = selection[0].row;\n          var idx = dataTable.getValue(row, 0);\n          var engineParams = engineParamsScores[idx]._1;\n\n          document.getElementById('engineParams').innerHTML = JSON.stringify(\n            engineParams, null, 2);\n        } else {\n          document.getElementById('engineParams').innerHTML = \"\";\n        }\n      }\n\n      function drawTable() {\n        var tableDiv = document.createElement('div');\n        document.getElementById('table').appendChild(tableDiv);\n\n\n        var dataArray = [];\n\n        var headers = ['Index', 'Best', metricHeader].concat(otherMetricHeaders);\n        dataArray.push(headers);\n\n        for (epIdx = 0; epIdx < engineParamsScores.length; epIdx++) {\n          var epScore = engineParamsScores[epIdx];\n          var isBest = (epIdx == rawData.bestIdx ? \"*\" : \"\");\n          dataArray.push([epIdx, isBest, epScore._2.score].concat(epScore._2.otherScores));\n        }\n\n        dataTable = google.visualization.arrayToDataTable(dataArray, false);\n\n        // formatter\n        var numberFormatter = new google.visualization.NumberFormat({fractionDigits: 4});\n\n        for (colIdx = 1; colIdx < dataTable.getNumberOfColumns(); colIdx++) {\n          if (dataTable.getColumnType(colIdx) == \"number\") {\n            numberFormatter.format(dataTable, colIdx);\n          }\n        }\n\n        table = new google.visualization.Table(tableDiv);\n\n        // select handler\n        google.visualization.events.addListener(table, 'select', tableSelectHandler);\n\n        table.draw(dataTable);\n      }\n\n    </script>\n  </body>\n</html>\n"
  },
  {
    "path": "core/src/main/twirl/org/apache/predictionio/workflow/index.scala.html",
    "content": "@import org.apache.predictionio.data.storage.EngineInstance\n@import org.apache.predictionio.workflow.ServerConfig\n@import org.joda.time.DateTime\n@import org.joda.time.format.DateTimeFormat\n@(args: ServerConfig,\n  engineInstance: EngineInstance,\n  algorithms: Seq[String],\n  algorithmsParams: Seq[String],\n  models: Seq[String],\n  dataSourceParams: String,\n  preparatorParams: String,\n  servingParams: String,\n  serverStartTime: DateTime,\n  feedback: Boolean,\n  eventServerIp: String,\n  eventServerPort: Int,\n  requestCount: Int,\n  avgServingSec: Double,\n  lastServingSec: Double\n  )\n<!DOCTYPE html>\n<!--\n  Licensed to the Apache Software Foundation (ASF) under one or more\n  contributor license agreements.  See the NOTICE file distributed with\n  this work for additional information regarding copyright ownership.\n  The ASF licenses this file to You under the Apache License, Version 2.0\n  (the \"License\"); you may not use this file except in compliance with\n  the License.  You may obtain a copy of the License at\n\n      http://www.apache.org/licenses/LICENSE-2.0\n\n  Unless required by applicable law or agreed to in writing, software\n  distributed under the License is distributed on an \"AS IS\" BASIS,\n  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n  See the License for the specific language governing permissions and\n  limitations under the License.\n-->\n<html lang=\"en\">\n  <head>\n    <title>@{engineInstance.engineFactory} (@{engineInstance.engineVariant}) - PredictionIO Engine Server at @{args.ip}:@{args.port}</title>\n    <link rel=\"stylesheet\" href=\"https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css\" integrity=\"sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u\" crossorigin=\"anonymous\">\n    <style type=\"text/css\">\n      td { font-family: Menlo, Monaco, Consolas, \"Courier New\", monospace; }\n    </style>\n  </head>\n  <body>\n    <div class=\"container\">\n      <div class=\"page-header\">\n        <h1>PredictionIO Engine Server at @{args.ip}:@{args.port}</h1>\n        <p class=\"lead\">@{engineInstance.engineFactory} (@{engineInstance.engineVariant})</p>\n      </div>\n      <h2>Engine Information</h2>\n      <table class=\"table table-bordered table-striped\">\n        <tr><th>Training Start Time</th><td>@{DateTimeFormat.forStyle(\"FF\").print(engineInstance.startTime)}</td></tr>\n        <tr><th>Training End Time</th><td>@{DateTimeFormat.forStyle(\"FF\").print(engineInstance.endTime)}</td></tr>\n        <tr><th>Variant ID</th><td>@{engineInstance.engineVariant}</td></tr>\n        <tr><th>Instance ID</th><td>@{engineInstance.id}</td></tr>\n      </table>\n      <h2>Server Information</h2>\n      <table class=\"table table-bordered table-striped\">\n        <tr><th>Start Time</th><td>@{DateTimeFormat.forStyle(\"FF\").print(serverStartTime)}</td></tr>\n        <tr><th>Request Count</th><td>@{requestCount}</td></tr>\n        <tr><th>Average Serving Time</th><td>@{f\"${avgServingSec}%.4f\"} seconds</td></tr>\n        <tr><th>Last Serving Time</th><td>@{f\"${lastServingSec}%.4f\"} seconds</td></tr>\n        <tr><th>Engine Factory Class (Scala/Java)</th><td>@{engineInstance.engineFactory}</td></tr>\n      </table>\n      <h2>Data Source</h2>\n      <table class=\"table table-bordered table-striped\">\n        <tr><th>Parameters</th><td>@{dataSourceParams}</td></tr>\n      </table>\n      <h2>Data Preparator</h2>\n      <table class=\"table table-bordered table-striped\">\n        <tr><th>Parameters</th><td>@{preparatorParams}</td></tr>\n      </table>\n      <h2>Algorithms and Models</h2>\n        <table class=\"table table-bordered table-striped\">\n          <tr><th>#</th><th colspan=\"2\">Information</th></tr>\n          @for((((algo, param), model), i) <- algorithms.zip(algorithmsParams).zip(models).zipWithIndex) {\n            <tr>\n              <th rowspan=\"3\">@{i + 1}</th>\n              <th>Class</th><td>@{algo}</td>\n            </tr>\n            <tr><th>Parameters</th><td>@{param}</td></tr>\n            <tr><th>Model</th><td>@{model}</td></tr>\n          }\n        </table>\n      <h2>Serving</h2>\n      <table class=\"table table-bordered table-striped\">\n        <tr><th>Parameters</th><td>@{servingParams}</td></tr>\n      </table>\n      <h2>Feedback Loop Information</h2>\n      <table class=\"table table-bordered table-striped\">\n        <tr><th>Feedback Loop Enabled?</th><td>@{feedback}</td></tr>\n        <tr><th>Event Server IP</th><td>@{eventServerIp}</td></tr>\n        <tr><th>Event Server Port</th><td>@{eventServerPort}</td></tr>\n      </table>\n    </div>\n  </body>\n</html>\n"
  },
  {
    "path": "core/src/test/java/org/apache/predictionio/workflow/JavaParams.java",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow;\n\nimport org.apache.predictionio.controller.Params;\n\npublic class JavaParams implements Params {\n    private final String p;\n\n    public JavaParams(String p) {\n        this.p = p;\n    }\n\n    public String getP() {\n        return p;\n    }\n}\n"
  },
  {
    "path": "core/src/test/java/org/apache/predictionio/workflow/JavaQuery.java",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow;\n\nimport java.io.Serializable;\n\npublic class JavaQuery implements Serializable{\n    private final String q;\n\n    public JavaQuery(String q) {\n        this.q = q;\n    }\n\n    public String getQ() {\n        return q;\n    }\n\n    @Override\n    public boolean equals(Object o) {\n        if (this == o) return true;\n        if (o == null || getClass() != o.getClass()) return false;\n\n        JavaQuery javaQuery = (JavaQuery) o;\n\n        return !(q != null ? !q.equals(javaQuery.q) : javaQuery.q != null);\n\n    }\n\n    @Override\n    public int hashCode() {\n        return q != null ? q.hashCode() : 0;\n    }\n}\n"
  },
  {
    "path": "core/src/test/java/org/apache/predictionio/workflow/JavaQueryTypeAdapterFactory.java",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow;\n\nimport com.google.gson.Gson;\nimport com.google.gson.TypeAdapter;\nimport com.google.gson.TypeAdapterFactory;\nimport com.google.gson.reflect.TypeToken;\nimport com.google.gson.stream.JsonReader;\nimport com.google.gson.stream.JsonToken;\nimport com.google.gson.stream.JsonWriter;\n\nimport java.io.IOException;\n\npublic class JavaQueryTypeAdapterFactory implements TypeAdapterFactory {\n    @Override\n    public <T> TypeAdapter<T> create(Gson gson, TypeToken<T> type) {\n        if (type.getRawType().equals(JavaQuery.class)) {\n            return (TypeAdapter<T>) new TypeAdapter<JavaQuery>() {\n                public void write(JsonWriter out, JavaQuery value) throws IOException {\n                    if (value == null) {\n                        out.nullValue();\n                    } else {\n                        out.beginObject();\n                        out.name(\"q\").value(value.getQ().toUpperCase());\n                        out.endObject();\n                    }\n                }\n\n                public JavaQuery read(JsonReader reader) throws IOException {\n                    if (reader.peek() == JsonToken.NULL) {\n                        reader.nextNull();\n                        return null;\n                    } else {\n                        reader.beginObject();\n                        reader.nextName();\n                        String q = reader.nextString();\n                        reader.endObject();\n                        return new JavaQuery(q.toUpperCase());\n                    }\n                }\n            };\n        } else {\n            return null;\n        }\n    }\n}\n"
  },
  {
    "path": "core/src/test/scala/org/apache/predictionio/controller/EngineTest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.workflow.PersistentModelManifest\nimport org.apache.predictionio.workflow.SharedSparkContext\nimport org.apache.predictionio.workflow.StopAfterPrepareInterruption\nimport org.apache.predictionio.workflow.StopAfterReadInterruption\n\nimport grizzled.slf4j.Logger\nimport org.apache.predictionio.workflow.WorkflowParams\nimport org.apache.spark.rdd.RDD\nimport org.scalatest.Inspectors._\nimport org.scalatest.Matchers._\nimport org.scalatest.FunSuite\nimport org.scalatest.Inside\n\nimport scala.util.Random\n\nclass EngineSuite\nextends FunSuite with Inside with SharedSparkContext {\n  import org.apache.predictionio.controller.Engine0._\n  @transient lazy val logger = Logger[this.type] \n\n  test(\"Engine.train\") {\n    val engine = new Engine(\n      classOf[PDataSource2],\n      classOf[PPreparator1],\n      Map(\"\" -> classOf[PAlgo2]),\n      classOf[LServing1])\n\n    val engineParams = EngineParams(\n      dataSourceParams = PDataSource2.Params(0),\n      preparatorParams = PPreparator1.Params(1),\n      algorithmParamsList = Seq((\"\", PAlgo2.Params(2))),\n      servingParams = LServing1.Params(3))\n\n    val models = engine.train(\n      sc, \n      engineParams, \n      engineInstanceId = \"\",\n      params = WorkflowParams())\n    \n    val pd = ProcessedData(1, TrainingData(0))\n\n    // PAlgo2.Model doesn't have IPersistentModel trait implemented. Hence the\n    // model extract after train is Unit.\n    models should contain theSameElementsAs Seq(())\n  }\n\n  test(\"Engine.train persisting PAlgo.Model\") {\n    val engine = new Engine(\n      classOf[PDataSource2],\n      classOf[PPreparator1],\n      Map(\n        \"PAlgo2\" -> classOf[PAlgo2],\n        \"PAlgo3\" -> classOf[PAlgo3]\n      ),\n      classOf[LServing1])\n\n    val engineParams = EngineParams(\n      dataSourceParams = PDataSource2.Params(0),\n      preparatorParams = PPreparator1.Params(1),\n      algorithmParamsList = Seq(\n        (\"PAlgo2\", PAlgo2.Params(2)),\n        (\"PAlgo3\", PAlgo3.Params(21)),\n        (\"PAlgo3\", PAlgo3.Params(22))\n      ),\n      servingParams = LServing1.Params(3))\n\n    val pd = ProcessedData(1, TrainingData(0))\n    val model21 = PAlgo3.Model(21, pd)\n    val model22 = PAlgo3.Model(22, pd)\n\n    val models = engine.train(\n      sc, \n      engineParams, \n      engineInstanceId = \"\",\n      params = WorkflowParams())\n\n    val pModel21 = PersistentModelManifest(model21.getClass.getName)\n    val pModel22 = PersistentModelManifest(model22.getClass.getName)\n    \n    models should contain theSameElementsAs Seq((), pModel21, pModel22)\n  }\n\n  test(\"Engine.train persisting LAlgo.Model\") {\n    val engine = Engine(\n      classOf[LDataSource1],\n      classOf[LPreparator1],\n      Map(\n        \"LAlgo1\" -> classOf[LAlgo1],\n        \"LAlgo2\" -> classOf[LAlgo2],\n        \"LAlgo3\" -> classOf[LAlgo3]\n      ),\n      classOf[LServing1])\n\n    val engineParams = EngineParams(\n      dataSourceParams = LDataSource1.Params(0),\n      preparatorParams = LPreparator1.Params(1),\n      algorithmParamsList = Seq(\n        (\"LAlgo2\", LAlgo2.Params(20)),\n        (\"LAlgo2\", LAlgo2.Params(21)),\n        (\"LAlgo3\", LAlgo3.Params(22))),\n      servingParams = LServing1.Params(3))\n\n    val pd = ProcessedData(1, TrainingData(0))\n    val model20 = LAlgo2.Model(20, pd)\n    val model21 = LAlgo2.Model(21, pd)\n    val model22 = LAlgo3.Model(22, pd)\n\n    //val models = engine.train(sc, engineParams, WorkflowParams())\n    val models = engine.train(\n      sc, \n      engineParams, \n      engineInstanceId = \"\",\n      params = WorkflowParams())\n\n    val pModel20 = PersistentModelManifest(model20.getClass.getName)\n    val pModel21 = PersistentModelManifest(model21.getClass.getName)\n    \n    models should contain theSameElementsAs Seq(pModel20, pModel21, model22)\n  }\n  \n  test(\"Engine.train persisting P&NAlgo.Model\") {\n    val engine = new Engine(\n      classOf[PDataSource2],\n      classOf[PPreparator1],\n      Map(\n        \"PAlgo2\" -> classOf[PAlgo2],\n        \"PAlgo3\" -> classOf[PAlgo3],\n        \"NAlgo2\" -> classOf[NAlgo2],\n        \"NAlgo3\" -> classOf[NAlgo3]\n      ),\n      classOf[LServing1])\n\n    val engineParams = EngineParams(\n      dataSourceParams = PDataSource2.Params(0),\n      preparatorParams = PPreparator1.Params(1),\n      algorithmParamsList = Seq(\n        (\"PAlgo2\", PAlgo2.Params(20)),\n        (\"PAlgo3\", PAlgo3.Params(21)),\n        (\"PAlgo3\", PAlgo3.Params(22)),\n        (\"NAlgo2\", NAlgo2.Params(23)),\n        (\"NAlgo3\", NAlgo3.Params(24)),\n        (\"NAlgo3\", NAlgo3.Params(25))\n      ),\n      servingParams = LServing1.Params(3))\n\n    val pd = ProcessedData(1, TrainingData(0))\n    val model21 = PAlgo3.Model(21, pd)\n    val model22 = PAlgo3.Model(22, pd)\n    val model23 = NAlgo2.Model(23, pd)\n    val model24 = NAlgo3.Model(24, pd)\n    val model25 = NAlgo3.Model(25, pd)\n\n    //val models = engine.train(sc, engineParams, WorkflowParams())\n    val models = engine.train(\n      sc, \n      engineParams, \n      engineInstanceId = \"\",\n      params = WorkflowParams())\n\n    val pModel21 = PersistentModelManifest(model21.getClass.getName)\n    val pModel22 = PersistentModelManifest(model22.getClass.getName)\n    val pModel23 = PersistentModelManifest(model23.getClass.getName)\n    \n    models should contain theSameElementsAs Seq(\n      (), pModel21, pModel22, pModel23, model24, model25)\n  }\n\n  test(\"Engine.eval\") {\n    val engine = new Engine(\n      classOf[PDataSource2],\n      classOf[PPreparator1],\n      Map(\"\" -> classOf[PAlgo2]),\n      classOf[LServing1])\n\n    val qn = 10\n    val en = 3\n\n    val engineParams = EngineParams(\n      dataSourceParams = PDataSource2.Params(id = 0, en = en, qn = qn),\n      preparatorParams = PPreparator1.Params(1),\n      algorithmParamsList = Seq((\"\", PAlgo2.Params(2))),\n      servingParams = LServing1.Params(3))\n\n    val algoCount = engineParams.algorithmParamsList.size\n    val pd = ProcessedData(1, TrainingData(0))\n    val model0 = PAlgo2.Model(2, pd)\n\n    val evalDataSet = engine.eval(sc, engineParams, WorkflowParams())\n\n    evalDataSet should have size en\n\n    forAll(evalDataSet.zipWithIndex) { case (evalData, ex) => {\n      val (evalInfo, qpaRDD) = evalData\n      evalInfo shouldBe EvalInfo(0)\n\n      val qpaSeq: Seq[(Query, Prediction, Actual)] = qpaRDD.collect\n\n      qpaSeq should have size qn\n\n      forAll (qpaSeq) { case (q, p, a) => \n        val Query(qId, qEx, qQx, _) = q\n        val Actual(aId, aEx, aQx) = a\n        qId shouldBe aId\n        qEx shouldBe ex\n        aEx shouldBe ex\n        qQx shouldBe aQx\n\n        inside (p) { case Prediction(pId, pQ, pModels, pPs) => {\n          pId shouldBe 3\n          pQ shouldBe q\n          pModels shouldBe None\n          pPs should have size algoCount\n          pPs shouldBe Seq(\n            Prediction(id = 2, q = q, models = Some(model0)))\n        }}\n      }\n    }}\n  }\n\n  test(\"Engine.prepareDeploy PAlgo\") {\n    val engine = new Engine(\n      classOf[PDataSource2],\n      classOf[PPreparator1],\n      Map(\n        \"PAlgo2\" -> classOf[PAlgo2],\n        \"PAlgo3\" -> classOf[PAlgo3],\n        \"NAlgo2\" -> classOf[NAlgo2],\n        \"NAlgo3\" -> classOf[NAlgo3]\n      ),\n      classOf[LServing1])\n\n    val engineParams = EngineParams(\n      dataSourceParams = PDataSource2.Params(0),\n      preparatorParams = PPreparator1.Params(1),\n      algorithmParamsList = Seq(\n        (\"PAlgo2\", PAlgo2.Params(20)),\n        (\"PAlgo3\", PAlgo3.Params(21)),\n        (\"PAlgo3\", PAlgo3.Params(22)),\n        (\"NAlgo2\", NAlgo2.Params(23)),\n        (\"NAlgo3\", NAlgo3.Params(24)),\n        (\"NAlgo3\", NAlgo3.Params(25))\n      ),\n      servingParams = LServing1.Params(3))\n\n    val pd = ProcessedData(1, TrainingData(0))\n    val model20 = PAlgo2.Model(20, pd)\n    val model21 = PAlgo3.Model(21, pd)\n    val model22 = PAlgo3.Model(22, pd)\n    val model23 = NAlgo2.Model(23, pd)\n    val model24 = NAlgo3.Model(24, pd)\n    val model25 = NAlgo3.Model(25, pd)\n\n    val rand = new Random()\n\n    val fakeEngineInstanceId = s\"FakeInstanceId-${rand.nextLong()}\"\n\n    val persistedModels = engine.train(\n      sc,\n      engineParams,\n      engineInstanceId = fakeEngineInstanceId,\n      params = WorkflowParams()\n    )\n\n    val deployableModels = engine.prepareDeploy(\n      sc,\n      engineParams,\n      fakeEngineInstanceId,\n      persistedModels,\n      params = WorkflowParams()\n    )\n\n    deployableModels should contain theSameElementsAs Seq(\n      model20, model21, model22, model23, model24, model25)\n  }\n}\n\nclass EngineTrainSuite extends FunSuite with SharedSparkContext {\n  import org.apache.predictionio.controller.Engine0._\n  val defaultWorkflowParams: WorkflowParams = WorkflowParams()\n\n  test(\"Parallel DS/P/Algos\") {\n    val models = Engine.train(\n      sc,\n      new PDataSource0(0),\n      new PPreparator0(1),\n      Seq(\n        new PAlgo0(2),\n        new PAlgo1(3),\n        new PAlgo0(4)),\n      defaultWorkflowParams\n    )\n\n    val pd = ProcessedData(1, TrainingData(0))\n\n    models should contain theSameElementsAs Seq(\n      PAlgo0.Model(2, pd), PAlgo1.Model(3, pd), PAlgo0.Model(4, pd))\n  }\n\n  test(\"Empty Algos Sequence\") {\n    val models = Engine.train(\n      sc,\n      new PDataSource0(0),\n      new PPreparator0(1),\n      Nil,\n      defaultWorkflowParams\n    )\n\n    models should not be null\n  }\n\n  test(\"Null defaultWorkflowParams\") {\n\n    an [NullPointerException] should be thrownBy Engine.train(\n      sc,\n      new PDataSource0(0),\n      new PPreparator0(1),\n      Seq(\n        new PAlgo0(2),\n        new PAlgo1(3),\n        new PAlgo0(4)),\n      null\n    )\n  }\n\n  test(\"Null Spark Context\") {\n    // Shouldn't we check if Spark Context is empty ?\n    val models = Engine.train(\n      null,\n      new PDataSource0(0),\n      new PPreparator0(1),\n      Seq(\n        new PAlgo0(2),\n        new PAlgo1(3),\n        new PAlgo0(4)),\n      defaultWorkflowParams\n    )\n\n    val pd = ProcessedData(1, TrainingData(0))\n\n    models should contain theSameElementsAs Seq(\n      PAlgo0.Model(2, pd), PAlgo1.Model(3, pd), PAlgo0.Model(4, pd))\n  }\n\n  test(\"Null DataSource\") {\n    // Shouldn't we check if Spark Context is empty ?\n    an [NullPointerException] should be thrownBy Engine.train(\n      sc,\n      null,\n      new PPreparator0(1),\n      Seq(\n        new PAlgo0(2),\n        new PAlgo1(3),\n        new PAlgo0(4)),\n      defaultWorkflowParams\n    )\n  }\n\n\n  test(\"Local DS/P/Algos\") {\n    val models = Engine.train(\n      sc,\n      new LDataSource0(0),\n      new LPreparator0(1),\n      Seq(\n        new LAlgo0(2),\n        new LAlgo1(3),\n        new LAlgo0(4)),\n      defaultWorkflowParams\n    )\n    \n    val pd = ProcessedData(1, TrainingData(0))\n\n    val expectedResults = Seq(\n      LAlgo0.Model(2, pd),\n      LAlgo1.Model(3, pd),\n      LAlgo0.Model(4, pd))\n\n    forAll(models.zip(expectedResults)) { case (model, expected) => \n      model shouldBe a [RDD[_]]\n      val localModel = model.asInstanceOf[RDD[_]].collect\n      localModel should contain theSameElementsAs Seq(expected)\n    }\n  }\n\n  test(\"P2L DS/P/Algos\") {\n    val models = Engine.train(\n      sc,\n      new PDataSource0(0),\n      new PPreparator0(1),\n      Seq(\n        new NAlgo0(2),\n        new NAlgo1(3),\n        new NAlgo0(4)),\n      defaultWorkflowParams\n    )\n\n    val pd = ProcessedData(1, TrainingData(0))\n    \n    models should contain theSameElementsAs Seq(\n      NAlgo0.Model(2, pd), NAlgo1.Model(3, pd), NAlgo0.Model(4, pd))\n  }\n  \n  test(\"Parallel DS/P/Algos Stop-After-Read\") {\n    val workflowParams = defaultWorkflowParams.copy(\n      stopAfterRead = true)\n\n    an [StopAfterReadInterruption] should be thrownBy Engine.train(\n      sc,\n      new PDataSource0(0),\n      new PPreparator0(1),\n      Seq(\n        new PAlgo0(2),\n        new PAlgo1(3),\n        new PAlgo0(4)),\n      workflowParams\n    )\n  }\n  \n  test(\"Parallel DS/P/Algos Stop-After-Prepare\") {\n    val workflowParams = defaultWorkflowParams.copy(\n      stopAfterPrepare = true)\n\n    an [StopAfterPrepareInterruption] should be thrownBy Engine.train(\n      sc,\n      new PDataSource0(0),\n      new PPreparator0(1),\n      Seq(\n        new PAlgo0(2),\n        new PAlgo1(3),\n        new PAlgo0(4)),\n      workflowParams\n    )\n  }\n  \n  test(\"Parallel DS/P/Algos Dirty TrainingData\") {\n    val workflowParams = defaultWorkflowParams.copy(\n      skipSanityCheck = false)\n\n    an [AssertionError] should be thrownBy Engine.train(\n      sc,\n      new PDataSource3(0, error = true),\n      new PPreparator0(1),\n      Seq(\n        new PAlgo0(2),\n        new PAlgo1(3),\n        new PAlgo0(4)),\n      workflowParams\n    )\n  }\n  \n  test(\"Parallel DS/P/Algos Dirty TrainingData But Skip Check\") {\n    val workflowParams = defaultWorkflowParams.copy(\n      skipSanityCheck = true)\n\n    val models = Engine.train(\n      sc,\n      new PDataSource3(0, error = true),\n      new PPreparator0(1),\n      Seq(\n        new PAlgo0(2),\n        new PAlgo1(3),\n        new PAlgo0(4)),\n      workflowParams\n    )\n    \n  val pd = ProcessedData(1, TrainingData(0, error = true))\n\n    models should contain theSameElementsAs Seq(\n      PAlgo0.Model(2, pd), PAlgo1.Model(3, pd), PAlgo0.Model(4, pd))\n  }\n}\n\n\nclass EngineEvalSuite\nextends FunSuite with Inside with SharedSparkContext {\n  import org.apache.predictionio.controller.Engine0._\n\n  @transient lazy val logger = Logger[this.type] \n  \n  test(\"Simple Parallel DS/P/A/S\") {\n    val en = 2\n    val qn = 5\n\n    val evalDataSet: Seq[(EvalInfo, RDD[(Query, Prediction, Actual)])] = \n    Engine.eval(\n      sc,\n      new PDataSource1(id = 1, en = en, qn = qn),\n      new PPreparator0(id = 2),\n      Seq(new PAlgo0(id = 3)),\n      new LServing0(id = 10))\n\n    val pd = ProcessedData(2, TrainingData(1))\n    val model0 = PAlgo0.Model(3, pd)\n\n    forAll(evalDataSet.zipWithIndex) { case (evalData, ex) => {\n      val (evalInfo, qpaRDD) = evalData\n      evalInfo shouldBe EvalInfo(1)\n\n      val qpaSeq: Seq[(Query, Prediction, Actual)] = qpaRDD.collect\n      forAll (qpaSeq) { case (q, p, a) => \n        val Query(qId, qEx, qQx, _) = q\n        val Actual(aId, aEx, aQx) = a\n        qId shouldBe aId\n        qEx shouldBe ex\n        aEx shouldBe ex\n        qQx shouldBe aQx\n\n        inside (p) { case Prediction(pId, pQ, pModels, pPs) => {\n          pId shouldBe 10\n          pQ shouldBe q\n          pModels shouldBe None\n          pPs should have size 1\n          pPs shouldBe Seq(\n            Prediction(id = 3, q = q, models = Some(model0)))\n        }}\n      }\n\n    }}\n\n  }\n\n  test(\"Parallel DS/P/A/S\") {\n    val en = 2\n    val qn = 5\n\n    val evalDataSet: Seq[(EvalInfo, RDD[(Query, Prediction, Actual)])] = \n    Engine.eval(\n      sc,\n      new PDataSource1(id = 1, en = en, qn = qn),\n      new PPreparator0(id = 2),\n      Seq(\n        new PAlgo0(id = 3), \n        new PAlgo1(id = 4),\n        new NAlgo1(id = 5)),\n      new LServing0(id = 10))\n\n    val pd = ProcessedData(2, TrainingData(1))\n    val model0 = PAlgo0.Model(3, pd)\n    val model1 = PAlgo1.Model(4, pd)\n    val model2 = NAlgo1.Model(5, pd)\n\n    forAll(evalDataSet.zipWithIndex) { case (evalData, ex) => {\n      val (evalInfo, qpaRDD) = evalData\n      evalInfo shouldBe EvalInfo(1)\n\n      val qpaSeq: Seq[(Query, Prediction, Actual)] = qpaRDD.collect\n      forAll (qpaSeq) { case (q, p, a) => \n        val Query(qId, qEx, qQx, _) = q\n        val Actual(aId, aEx, aQx) = a\n        qId shouldBe aId\n        qEx shouldBe ex\n        aEx shouldBe ex\n        qQx shouldBe aQx\n\n        inside (p) { case Prediction(pId, pQ, pModels, pPs) => {\n          pId shouldBe 10\n          pQ shouldBe q\n          pModels shouldBe None\n          pPs should have size 3\n          pPs shouldBe Seq(\n            Prediction(id = 3, q = q, models = Some(model0)),\n            Prediction(id = 4, q = q, models = Some(model1)),\n            Prediction(id = 5, q = q, models = Some(model2))\n          )\n        }}\n      }\n    }}\n  }\n  \n  test(\"Parallel DS/P/A/S with Supplemented Query\") {\n    val en = 2\n    val qn = 5\n\n    val evalDataSet: Seq[(EvalInfo, RDD[(Query, Prediction, Actual)])] = \n    Engine.eval(\n      sc,\n      new PDataSource1(id = 1, en = en, qn = qn),\n      new PPreparator0(id = 2),\n      Seq(\n        new PAlgo0(id = 3), \n        new PAlgo1(id = 4),\n        new NAlgo1(id = 5)),\n      new LServing2(id = 10))\n\n    val pd = ProcessedData(2, TrainingData(1))\n    val model0 = PAlgo0.Model(3, pd)\n    val model1 = PAlgo1.Model(4, pd)\n    val model2 = NAlgo1.Model(5, pd)\n\n    forAll(evalDataSet.zipWithIndex) { case (evalData, ex) => {\n      val (evalInfo, qpaRDD) = evalData\n      evalInfo shouldBe EvalInfo(1)\n\n      val qpaSeq: Seq[(Query, Prediction, Actual)] = qpaRDD.collect\n      forAll (qpaSeq) { case (q, p, a) => \n        val Query(qId, qEx, qQx, qSupp) = q\n        val Actual(aId, aEx, aQx) = a\n        qId shouldBe aId\n        qEx shouldBe ex\n        aEx shouldBe ex\n        qQx shouldBe aQx\n        qSupp shouldBe false\n\n        inside (p) { case Prediction(pId, pQ, pModels, pPs) => {\n          pId shouldBe 10\n          pQ shouldBe q\n          pModels shouldBe None\n          pPs should have size 3\n          // queries inside prediction should have supp set to true, since it\n          // represents what the algorithms see.\n          val qSupp = q.copy(supp = true)\n          pPs shouldBe Seq(\n            Prediction(id = 3, q = qSupp, models = Some(model0)),\n            Prediction(id = 4, q = qSupp, models = Some(model1)),\n            Prediction(id = 5, q = qSupp, models = Some(model2))\n          )\n        }}\n      }\n    }}\n  }\n  \n  test(\"Local DS/P/A/S\") {\n    val en = 2\n    val qn = 5\n\n    val evalDataSet: Seq[(EvalInfo, RDD[(Query, Prediction, Actual)])] = \n    Engine.eval(\n      sc,\n      new LDataSource0(id = 1, en = en, qn = qn),\n      new LPreparator0(id = 2),\n      Seq(\n        new LAlgo0(id = 3), \n        new LAlgo1(id = 4),\n        new LAlgo1(id = 5)),\n      new LServing0(id = 10))\n\n    val pd = ProcessedData(2, TrainingData(1))\n    val model0 = LAlgo0.Model(3, pd)\n    val model1 = LAlgo1.Model(4, pd)\n    val model2 = LAlgo1.Model(5, pd)\n\n    forAll(evalDataSet.zipWithIndex) { case (evalData, ex) => {\n      val (evalInfo, qpaRDD) = evalData\n      evalInfo shouldBe EvalInfo(1)\n\n      val qpaSeq: Seq[(Query, Prediction, Actual)] = qpaRDD.collect\n      forAll (qpaSeq) { case (q, p, a) => \n        val Query(qId, qEx, qQx, _) = q\n        val Actual(aId, aEx, aQx) = a\n        qId shouldBe aId\n        qEx shouldBe ex\n        aEx shouldBe ex\n        qQx shouldBe aQx\n\n        inside (p) { case Prediction(pId, pQ, pModels, pPs) => {\n          pId shouldBe 10\n          pQ shouldBe q\n          pModels shouldBe None\n          pPs should have size 3\n          pPs shouldBe Seq(\n            Prediction(id = 3, q = q, models = Some(model0)),\n            Prediction(id = 4, q = q, models = Some(model1)),\n            Prediction(id = 5, q = q, models = Some(model2))\n          )\n        }}\n      }\n\n    }}\n\n  }\n}\n\n\n"
  },
  {
    "path": "core/src/test/scala/org/apache/predictionio/controller/EvaluationTest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.workflow.{SharedSparkContext, SharedStorageContext}\nimport org.scalatest.FunSuite\nimport org.scalatest.Inside\nimport org.scalatest.Matchers._\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\nobject EvaluationSuite {\n  import org.apache.predictionio.controller.TestEvaluator._\n\n  class Metric0 extends Metric[EvalInfo, Query, Prediction, Actual, Int] {\n    def calculate(\n      sc: SparkContext,\n      evalDataSet: Seq[(EvalInfo, RDD[(Query, Prediction, Actual)])]): Int = 1\n  }\n\n  object Evaluation0 extends Evaluation {\n    engineMetric = (new FakeEngine(1, 1, 1), new Metric0())\n  }\n}\n\n\nclass EvaluationSuite\nextends FunSuite with Inside with SharedSparkContext with SharedStorageContext {\n  import org.apache.predictionio.controller.EvaluationSuite._\n\n  test(\"Evaluation makes MetricEvaluator\") {\n    // MetricEvaluator is typed [EvalInfo, Query, Prediction, Actual, Int],\n    // however this information is erased on JVM. scalatest doc recommends to\n    // use wildcards.\n    Evaluation0.evaluator shouldBe a [MetricEvaluator[_, _, _, _, _]]\n  }\n\n  test(\"Load from class path\") {\n    val r = org.apache.predictionio.workflow.WorkflowUtils.getEvaluation(\n      \"org.apache.predictionio.controller.EvaluationSuite.Evaluation0\",\n      getClass.getClassLoader)\n\n    r._2 shouldBe EvaluationSuite.Evaluation0\n  }\n\n}\n"
  },
  {
    "path": "core/src/test/scala/org/apache/predictionio/controller/EvaluatorTest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.core._\nimport org.apache.predictionio.workflow.WorkflowParams\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\nobject TestEvaluator {\n  case class EvalInfo(id: Int, ex: Int)\n  case class Query(id: Int, ex: Int, qx: Int)\n  case class Prediction(id: Int, ex: Int, qx: Int)\n  case class Actual(id: Int, ex: Int, qx: Int)\n\n  class FakeEngine(val id: Int, val en: Int, val qn: Int)\n  extends BaseEngine[EvalInfo, Query, Prediction, Actual] {\n    def train(\n      sc: SparkContext, \n      engineParams: EngineParams,\n      instanceId: String = \"\",\n      params: WorkflowParams = WorkflowParams()\n    ): Seq[Any] = {\n      Seq[Any]()\n    }\n\n    def eval(\n      sc: SparkContext, \n      engineParams: EngineParams, \n      params: WorkflowParams)\n    : Seq[(EvalInfo, RDD[(Query, Prediction, Actual)])] = {\n      (0 until en).map { ex => {\n        val qpas = (0 until qn).map { qx => {\n          (Query(id, ex, qx), Prediction(id, ex, qx), Actual(id, ex, qx))\n        }}\n  \n        (EvalInfo(id = id, ex = ex), sc.parallelize(qpas))\n      }}\n    }\n  \n  }\n\n  /*\n  class Evaluator0 extends Evaluator[EvalInfo, Query, Prediction, Actual,\n      (Query, Prediction, Actual), \n      (EvalInfo, Seq[(Query, Prediction, Actual)]),\n      Seq[(EvalInfo, (EvalInfo, Seq[(Query, Prediction, Actual)]))]\n      ] {\n\n    def evaluateUnit(q: Query, p: Prediction, a: Actual)\n    : (Query, Prediction, Actual) = (q, p, a)\n\n    def evaluateSet(\n        evalInfo: EvalInfo, \n        eus: Seq[(Query, Prediction, Actual)])\n    : (EvalInfo, Seq[(Query, Prediction, Actual)]) = (evalInfo, eus)\n\n    def evaluateAll(\n      input: Seq[(EvalInfo, (EvalInfo, Seq[(Query, Prediction, Actual)]))]) \n    = input\n  }\n  */\n\n}\n\n/*\nclass EvaluatorSuite\nextends FunSuite with Inside with SharedSparkContext {\n  import org.apache.predictionio.controller.TestEvaluator._\n  @transient lazy val logger = Logger[this.type] \n\n  test(\"Evaluator.evaluate\") {\n    val engine = new FakeEngine(1, 3, 10)\n    val evaluator = new Evaluator0()\n  \n    val evalDataSet = engine.eval(sc, null.asInstanceOf[EngineParams])\n    val er: Seq[(EvalInfo, (EvalInfo, Seq[(Query, Prediction, Actual)]))] =\n      evaluator.evaluateBase(sc, evalDataSet)\n\n    evalDataSet.zip(er).map { case (input, output) => {\n      val (inputEvalInfo, inputQpaRDD) = input\n      val (outputEvalInfo, (outputEvalInfo2, outputQpaSeq)) = output\n      \n      inputEvalInfo shouldBe outputEvalInfo\n      inputEvalInfo shouldBe outputEvalInfo2\n      \n      val inputQpaSeq: Array[(Query, Prediction, Actual)] = inputQpaRDD.collect\n\n      inputQpaSeq.size should be (outputQpaSeq.size)\n      // TODO. match inputQpa and outputQpa content.\n    }}\n  }\n}\n*/\n"
  },
  {
    "path": "core/src/test/scala/org/apache/predictionio/controller/FastEvalEngineTest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.workflow.WorkflowParams\nimport org.scalatest.FunSuite\nimport org.scalatest.Inside\nimport org.scalatest.Matchers._\nimport org.scalatest.Inspectors._\n\nimport org.apache.predictionio.workflow.SharedSparkContext\n\nclass FastEngineSuite\nextends FunSuite with Inside with SharedSparkContext {\n  import org.apache.predictionio.controller.Engine0._\n  \n  test(\"Single Evaluation\") {\n    val engine = new FastEvalEngine(\n      Map(\"\" -> classOf[PDataSource2]),\n      Map(\"\" -> classOf[PPreparator1]),\n      Map(\n        \"PAlgo2\" -> classOf[PAlgo2],\n        \"PAlgo3\" -> classOf[PAlgo3]\n      ),\n      Map(\"\" -> classOf[LServing1]))\n\n    val qn = 10\n    val en = 3\n\n    val engineParams = EngineParams(\n      dataSourceParams = PDataSource2.Params(id = 0, en = en, qn = qn),\n      preparatorParams = PPreparator1.Params(1),\n      algorithmParamsList = Seq(\n        (\"PAlgo2\", PAlgo2.Params(20)),\n        (\"PAlgo2\", PAlgo2.Params(21)),\n        (\"PAlgo3\", PAlgo3.Params(22))\n      ),\n      servingParams = LServing1.Params(3))\n\n    val algoCount = engineParams.algorithmParamsList.size\n    val pd = ProcessedData(1, TrainingData(0))\n    val model0 = PAlgo2.Model(20, pd)\n    val model1 = PAlgo2.Model(21, pd)\n    val model2 = PAlgo3.Model(22, pd)\n\n    val evalDataSet = engine.eval(sc, engineParams, WorkflowParams())\n\n    evalDataSet should have size en\n\n    forAll(evalDataSet.zipWithIndex) { case (evalData, ex) => {\n      val (evalInfo, qpaRDD) = evalData\n      evalInfo shouldBe EvalInfo(0)\n\n      val qpaSeq: Seq[(Query, Prediction, Actual)] = qpaRDD.collect\n\n      qpaSeq should have size qn\n\n      forAll (qpaSeq) { case (q, p, a) => \n        val Query(qId, qEx, qQx, _) = q\n        val Actual(aId, aEx, aQx) = a\n        qId shouldBe aId\n        qEx shouldBe ex\n        aEx shouldBe ex\n        qQx shouldBe aQx\n\n        inside (p) { case Prediction(pId, pQ, pModels, pPs) => {\n          pId shouldBe 3\n          pQ shouldBe q\n          pModels shouldBe None\n          pPs should have size algoCount\n          pPs shouldBe Seq(\n            Prediction(id = 20, q = q, models = Some(model0)),\n            Prediction(id = 21, q = q, models = Some(model1)),\n            Prediction(id = 22, q = q, models = Some(model2))\n          )\n        }}\n      }\n    }}\n  }\n\n  test(\"Batch Evaluation\") {\n    val engine = new FastEvalEngine(\n      Map(\"\" -> classOf[PDataSource2]),\n      Map(\"\" -> classOf[PPreparator1]),\n      Map(\"\" -> classOf[PAlgo2]),\n      Map(\"\" -> classOf[LServing1]))\n\n    val qn = 10\n    val en = 3\n\n    val baseEngineParams = EngineParams(\n      dataSourceParams = PDataSource2.Params(id = 0, en = en, qn = qn),\n      preparatorParams = PPreparator1.Params(1),\n      algorithmParamsList = Seq((\"\", PAlgo2.Params(2))),\n      servingParams = LServing1.Params(3))\n\n    val ep0 = baseEngineParams\n    val ep1 = baseEngineParams.copy(\n      algorithmParamsList = Seq((\"\", PAlgo2.Params(2))))\n    val ep2 = baseEngineParams.copy(\n      algorithmParamsList = Seq((\"\", PAlgo2.Params(20))))\n\n    val engineEvalDataSet = engine.batchEval(\n      sc,\n      Seq(ep0, ep1, ep2),\n      WorkflowParams())\n\n    val evalDataSet0 = engineEvalDataSet(0)._2\n    val evalDataSet1 = engineEvalDataSet(1)._2\n    val evalDataSet2 = engineEvalDataSet(2)._2\n\n    evalDataSet0 shouldBe evalDataSet1\n    evalDataSet0 should not be evalDataSet2\n    evalDataSet1 should not be evalDataSet2\n\n    // evalDataSet0._1 should be theSameInstanceAs evalDataSet1._1\n    // When things are cached correctly, evalDataSet0 and 1 should share the\n    // same EI\n    evalDataSet0.zip(evalDataSet1).foreach { case (e0, e1) => {\n      e0._1 should be theSameInstanceAs e1._1\n      e0._2 should be theSameInstanceAs e1._2\n    }}\n   \n    // So as set1 and set2, however, the QPA-RDD should be different.\n    evalDataSet1.zip(evalDataSet2).foreach { case (e1, e2) => {\n      e1._1 should be theSameInstanceAs e2._1\n      val e1Qpa = e1._2\n      val e2Qpa = e2._2\n      e1Qpa should not be theSameInstanceAs (e2Qpa)\n    }}\n  }\n  \n  test(\"Not cached when isEqual not implemented\") {\n    // PDataSource3.Params is a class not case class. Need to implement the\n    // isEqual function for hashing.\n    val engine = new FastEvalEngine(\n      Map(\"\" -> classOf[PDataSource4]),\n      Map(\"\" -> classOf[PPreparator1]),\n      Map(\"\" -> classOf[PAlgo2]),\n      Map(\"\" -> classOf[LServing1]))\n\n    val qn = 10\n    val en = 3\n\n    val baseEngineParams = EngineParams(\n      dataSourceParams = new PDataSource4.Params(id = 0, en = en, qn = qn),\n      preparatorParams = PPreparator1.Params(1),\n      algorithmParamsList = Seq((\"\", PAlgo2.Params(2))),\n      servingParams = LServing1.Params(3))\n\n    val ep0 = baseEngineParams\n    val ep1 = baseEngineParams.copy(\n      algorithmParamsList = Seq((\"\", PAlgo2.Params(3))))\n    // ep2.dataSource is different from ep0.\n    val ep2 = baseEngineParams.copy(\n      dataSourceParams = (\"\", new PDataSource4.Params(id = 0, en = en, qn = qn)),\n      algorithmParamsList = Seq((\"\", PAlgo2.Params(3))))\n\n    val engineEvalDataSet = engine.batchEval(\n      sc,\n      Seq(ep0, ep1, ep2),\n      WorkflowParams())\n\n    val evalDataSet0 = engineEvalDataSet(0)._2\n    val evalDataSet1 = engineEvalDataSet(1)._2\n    val evalDataSet2 = engineEvalDataSet(2)._2\n\n    evalDataSet0 should not be evalDataSet1\n    evalDataSet0 should not be evalDataSet2\n    evalDataSet1 should not be evalDataSet2\n\n    // Set0 should have same EI as Set1, since their dsp are the same instance.\n    evalDataSet0.zip(evalDataSet1).foreach { case (e0, e1) => {\n      e0._1 should be theSameInstanceAs (e1._1)\n    }}\n  \n    // Set1 should have different EI as Set2, since Set2's dsp is another\n    // instance\n    evalDataSet1.zip(evalDataSet2).foreach { case (e1, e2) => {\n      e1._1 should not be theSameInstanceAs (e2._1)\n    }}\n  }\n}\n"
  },
  {
    "path": "core/src/test/scala/org/apache/predictionio/controller/MetricEvaluatorTest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.workflow.{SharedSparkContext, SharedStorageContext, WorkflowParams}\nimport org.scalatest.FunSuite\n\nobject MetricEvaluatorSuite {\n  case class Metric0() extends SumMetric[EmptyParams, Int, Int, Int, Int] {\n    def calculate(q: Int, p: Int, a: Int): Int = q\n  }\n\n  object Evaluation0 extends Evaluation {}\n}\n\nclass MetricEvaluatorDevSuite extends FunSuite with SharedSparkContext\nwith SharedStorageContext {\n  import org.apache.predictionio.controller.MetricEvaluatorSuite._\n\n  test(\"a\") {\n    val metricEvaluator = MetricEvaluator(\n      Metric0(),\n      Seq(Metric0(), Metric0())\n    )\n \n    val engineEvalDataSet = Seq(\n      (EngineParams(), Seq(\n        (EmptyParams(), sc.parallelize(Seq((1,0,0), (2,0,0)))))),\n      (EngineParams(), Seq(\n        (EmptyParams(), sc.parallelize(Seq((1,0,0), (2,0,0)))))))\n\n    val r = metricEvaluator.evaluateBase(\n      sc,\n      Evaluation0,\n      engineEvalDataSet,\n      WorkflowParams())\n\n  }\n}\n"
  },
  {
    "path": "core/src/test/scala/org/apache/predictionio/controller/MetricTest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.workflow.SharedSparkContext\n\nimport grizzled.slf4j.Logger\nimport org.scalatest.Matchers._\nimport org.scalatest.FunSuite\nimport org.scalatest.Inside\n\nobject MetricDevSuite {\n  class QIntSumMetric extends SumMetric[EmptyParams, Int, Int, Int, Int] {\n    def calculate(q: Int, p: Int, a: Int): Int = q\n  }\n  \n  class QDoubleSumMetric extends SumMetric[EmptyParams, Int, Int, Int, Double] {\n    def calculate(q: Int, p: Int, a: Int): Double = q.toDouble\n  }\n  \n  class QAverageMetric extends AverageMetric[EmptyParams, Int, Int, Int] {\n    def calculate(q: Int, p: Int, a: Int): Double = q.toDouble\n  }\n  \n  class QOptionAverageMetric extends OptionAverageMetric[EmptyParams, Int, Int, Int] {\n    def calculate(q: Int, p: Int, a: Int): Option[Double] = {\n      if (q < 0) { None } else { Some(q.toDouble) }\n    }\n  }\n  \n  class QStdevMetric extends StdevMetric[EmptyParams, Int, Int, Int] {\n    def calculate(q: Int, p: Int, a: Int): Double = q.toDouble\n  }\n  \n  class QOptionStdevMetric extends OptionStdevMetric[EmptyParams, Int, Int, Int] {\n    def calculate(q: Int, p: Int, a: Int): Option[Double] = {\n      if (q < 0) { None } else { Some(q.toDouble) }\n    }\n  }\n  \n}\n\nclass MetricDevSuite\nextends FunSuite with Inside with SharedSparkContext {\n  @transient lazy val logger = Logger[this.type] \n  \n  test(\"Average Metric\") {\n    val qpaSeq0 = Seq((1, 0, 0), (2, 0, 0), (3, 0, 0))\n    val qpaSeq1 = Seq((4, 0, 0), (5, 0, 0), (6, 0, 0))\n\n    val evalDataSet = Seq(\n      (EmptyParams(), sc.parallelize(qpaSeq0)),\n      (EmptyParams(), sc.parallelize(qpaSeq1)))\n  \n    val m = new MetricDevSuite.QAverageMetric()\n    val result = m.calculate(sc, evalDataSet)\n    \n    result shouldBe (21.0 / 6)\n  }\n  \n  test(\"Option Average Metric\") {\n    val qpaSeq0 = Seq((1, 0, 0), (2, 0, 0), (3, 0, 0))\n    val qpaSeq1 = Seq((-4, 0, 0), (-5, 0, 0), (6, 0, 0))\n\n    val evalDataSet = Seq(\n      (EmptyParams(), sc.parallelize(qpaSeq0)),\n      (EmptyParams(), sc.parallelize(qpaSeq1)))\n  \n    val m = new MetricDevSuite.QOptionAverageMetric()\n    val result = m.calculate(sc, evalDataSet)\n    \n    result shouldBe (12.0 / 4)\n  }\n  \n  test(\"Stdev Metric\") {\n    val qpaSeq0 = Seq((1, 0, 0), (1, 0, 0), (1, 0, 0), (1, 0, 0))\n    val qpaSeq1 = Seq((5, 0, 0), (5, 0, 0), (5, 0, 0), (5, 0, 0))\n\n    val evalDataSet = Seq(\n      (EmptyParams(), sc.parallelize(qpaSeq0)),\n      (EmptyParams(), sc.parallelize(qpaSeq1)))\n  \n    val m = new MetricDevSuite.QStdevMetric()\n    val result = m.calculate(sc, evalDataSet)\n    \n    result shouldBe 2.0\n  }\n  \n  test(\"Option Stdev Metric\") {\n    val qpaSeq0 = Seq((1, 0, 0), (1, 0, 0), (1, 0, 0), (1, 0, 0))\n    val qpaSeq1 = Seq((5, 0, 0), (5, 0, 0), (5, 0, 0), (5, 0, 0), (-5, 0, 0))\n\n    val evalDataSet = Seq(\n      (EmptyParams(), sc.parallelize(qpaSeq0)),\n      (EmptyParams(), sc.parallelize(qpaSeq1)))\n  \n    val m = new MetricDevSuite.QOptionStdevMetric()\n    val result = m.calculate(sc, evalDataSet)\n    \n    result shouldBe 2.0\n  }\n\n  test(\"Sum Metric [Int]\") {\n    val qpaSeq0 = Seq((1, 0, 0), (2, 0, 0), (3, 0, 0))\n    val qpaSeq1 = Seq((4, 0, 0), (5, 0, 0), (6, 0, 0))\n\n    val evalDataSet = Seq(\n      (EmptyParams(), sc.parallelize(qpaSeq0)),\n      (EmptyParams(), sc.parallelize(qpaSeq1)))\n  \n    val m = new MetricDevSuite.QIntSumMetric()\n    val result = m.calculate(sc, evalDataSet)\n    \n    result shouldBe 21\n  }\n\n  test(\"Sum Metric [Double]\") {\n    val qpaSeq0 = Seq((1, 0, 0), (2, 0, 0), (3, 0, 0))\n    val qpaSeq1 = Seq((4, 0, 0), (5, 0, 0), (6, 0, 0))\n\n    val evalDataSet = Seq(\n      (EmptyParams(), sc.parallelize(qpaSeq0)),\n      (EmptyParams(), sc.parallelize(qpaSeq1)))\n  \n    val m = new MetricDevSuite.QDoubleSumMetric()\n    val result = m.calculate(sc, evalDataSet)\n    \n    result shouldBe 21.0\n  }\n}\n"
  },
  {
    "path": "core/src/test/scala/org/apache/predictionio/controller/SampleEngine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.controller\n\nimport org.apache.predictionio.controller.{Params => PIOParams}\nimport org.apache.predictionio.core._\n\nimport grizzled.slf4j.Logger\nimport org.apache.predictionio.workflow.WorkflowParams\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nobject Engine0 {\n  @transient lazy val logger = Logger[this.type] \n\n  case class TrainingData(id: Int, error: Boolean = false) extends SanityCheck {\n    def sanityCheck(): Unit = {\n      Predef.assert(!error, \"Not Error\")\n    }\n  }\n\n  case class EvalInfo(id: Int)\n  case class ProcessedData(id: Int, td: TrainingData)\n\n  case class Query(id: Int, ex: Int = 0, qx: Int = 0, supp: Boolean = false)\n  case class Actual(id: Int, ex: Int = 0, qx: Int = 0)\n  case class Prediction(\n    id: Int, q: Query, models: Option[Any] = None, \n    ps: Seq[Prediction] = Seq[Prediction]())\n\n  class PDataSource0(id: Int = 0) \n  extends PDataSource[TrainingData, EvalInfo, Query, Actual] {\n    def readTraining(sc: SparkContext): TrainingData = {\n      TrainingData(id)\n    }\n  }\n  \n  class PDataSource1(id: Int = 0, en: Int = 0, qn: Int = 0)\n  extends PDataSource[TrainingData, EvalInfo, Query, Actual] {\n    def readTraining(sc: SparkContext): TrainingData = TrainingData(id)\n    \n    override\n    def readEval(sc: SparkContext)\n    : Seq[(TrainingData, EvalInfo, RDD[(Query, Actual)])] = {\n      (0 until en).map { ex => {\n        val qaSeq: Seq[(Query, Actual)] = (0 until qn).map { qx => {\n          (Query(id, ex=ex, qx=qx), Actual(id, ex, qx))\n        }}\n        (TrainingData(id), EvalInfo(id), sc.parallelize(qaSeq))\n      }}\n    }\n  }\n\n  object PDataSource2 {\n    case class Params(id: Int, en: Int = 0, qn: Int = 0) extends PIOParams\n  }\n  \n  class PDataSource2(params: PDataSource2.Params)\n  extends PDataSource[TrainingData, EvalInfo, Query, Actual] {\n    val id = params.id\n    def readTraining(sc: SparkContext): TrainingData = TrainingData(id)\n    \n    override\n    def readEval(sc: SparkContext)\n    : Seq[(TrainingData, EvalInfo, RDD[(Query, Actual)])] = {\n      (0 until params.en).map { ex => {\n        val qaSeq: Seq[(Query, Actual)] = (0 until params.qn).map { qx => {\n          (Query(id, ex=ex, qx=qx), Actual(id, ex, qx))\n        }}\n        (TrainingData(id), EvalInfo(id), sc.parallelize(qaSeq))\n      }}\n    }\n  }\n  \n  class PDataSource3(id: Int = 0, error: Boolean = false) \n  extends PDataSource[TrainingData, EvalInfo, Query, Actual] {\n    def readTraining(sc: SparkContext): TrainingData = {\n      TrainingData(id = id, error = error)\n    }\n  }\n  \n  object PDataSource4 {\n    class Params(val id: Int, val en: Int = 0, val qn: Int = 0) \n      extends PIOParams\n  }\n  \n  class PDataSource4(params: PDataSource4.Params)\n  extends PDataSource[TrainingData, EvalInfo, Query, Actual] {\n    val id = params.id\n    def readTraining(sc: SparkContext): TrainingData = TrainingData(id)\n    \n    override\n    def readEval(sc: SparkContext)\n    : Seq[(TrainingData, EvalInfo, RDD[(Query, Actual)])] = {\n      (0 until params.en).map { ex => {\n        val qaSeq: Seq[(Query, Actual)] = (0 until params.qn).map { qx => {\n          (Query(id, ex=ex, qx=qx), Actual(id, ex, qx))\n        }}\n        (TrainingData(id), EvalInfo(id), sc.parallelize(qaSeq))\n      }}\n    }\n  }\n  \n  class LDataSource0(id: Int, en: Int = 0, qn: Int = 0) \n    extends LDataSource[TrainingData, EvalInfo, Query, Actual] {\n    def readTraining(): TrainingData = TrainingData(id)\n   \n    override\n    def readEval()\n    : Seq[(TrainingData, EvalInfo, Seq[(Query, Actual)])] = {\n      (0 until en).map { ex => {\n        val qaSeq: Seq[(Query, Actual)] = (0 until qn).map { qx => {\n          (Query(id, ex=ex, qx=qx), Actual(id, ex, qx))\n        }}\n        (TrainingData(id), EvalInfo(id), qaSeq)\n      }}\n    }\n  }\n  \n  object LDataSource1 {\n    case class Params(id: Int, en: Int = 0, qn: Int = 0) extends PIOParams\n  }\n  \n  class LDataSource1(params: LDataSource1.Params)\n  extends LDataSource[TrainingData, EvalInfo, Query, Actual] {\n    val id = params.id\n    def readTraining(): TrainingData = TrainingData(id)\n    \n    override\n    def readEval(): Seq[(TrainingData, EvalInfo, Seq[(Query, Actual)])] = {\n      (0 until params.en).map { ex => {\n        val qaSeq: Seq[(Query, Actual)] = (0 until params.qn).map { qx => {\n          (Query(id, ex=ex, qx=qx), Actual(id, ex, qx))\n        }}\n        (TrainingData(id), EvalInfo(id), qaSeq)\n      }}\n    }\n  }\n  \n  class PPreparator0(id: Int = 0)\n  extends PPreparator[TrainingData, ProcessedData] {\n    def prepare(sc: SparkContext, td: TrainingData): ProcessedData = {\n      ProcessedData(id, td)\n    }\n  }\n\n  object PPreparator1 {\n    case class Params(id: Int  = 0) extends PIOParams\n  }\n\n  class PPreparator1(params: PPreparator1.Params)\n  extends PPreparator[TrainingData, ProcessedData] {\n    def prepare(sc: SparkContext, td: TrainingData): ProcessedData = {\n      ProcessedData(params.id, td)\n    }\n  }\n\n  class LPreparator0(id: Int = 0) \n  extends LPreparator[TrainingData, ProcessedData] {\n    def prepare(td: TrainingData): ProcessedData = {\n      ProcessedData(id, td)\n    }\n  }\n  \n  object LPreparator1 {\n    case class Params(id: Int  = 0) extends PIOParams\n  }\n\n  class LPreparator1(params: LPreparator1.Params)\n  extends LPreparator[TrainingData, ProcessedData] {\n    def prepare(td: TrainingData): ProcessedData = {\n      ProcessedData(params.id, td)\n    }\n  }\n\n  object PAlgo0 {\n    case class Model(id: Int, pd: ProcessedData)\n  }\n\n  class PAlgo0(id: Int = 0)\n  extends PAlgorithm[ProcessedData, PAlgo0.Model, Query, Prediction] {\n    def train(sc: SparkContext, pd: ProcessedData)\n    : PAlgo0.Model = PAlgo0.Model(id, pd)\n\n    override\n    def batchPredict(m: PAlgo0.Model, qs: RDD[(Long, Query)])\n    : RDD[(Long, Prediction)] = {\n      qs.mapValues(q => Prediction(id, q, Some(m)))\n    }\n    \n    def predict(m: PAlgo0.Model, q: Query): Prediction = {\n      Prediction(id, q, Some(m))\n    }\n  }\n\n  object PAlgo1 {\n    case class Model(id: Int, pd: ProcessedData)\n  }\n\n  class PAlgo1(id: Int = 0)\n  extends PAlgorithm[ProcessedData, PAlgo1.Model, Query, Prediction] {\n    def train(sc: SparkContext, pd: ProcessedData)\n    : PAlgo1.Model = PAlgo1.Model(id, pd)\n\n    override\n    def batchPredict(m: PAlgo1.Model, qs: RDD[(Long, Query)])\n    : RDD[(Long, Prediction)] = {\n      qs.mapValues(q => Prediction(id, q, Some(m)))\n    }\n\n    def predict(m: PAlgo1.Model, q: Query): Prediction = {\n      Prediction(id, q, Some(m))\n    }\n  }\n  \n  object PAlgo2 {\n    case class Model(id: Int, pd: ProcessedData)\n    case class Params(id: Int) extends PIOParams\n  }\n\n  class PAlgo2(params: PAlgo2.Params)\n  extends PAlgorithm[ProcessedData, PAlgo2.Model, Query, Prediction] {\n    val id = params.id\n\n    def train(sc: SparkContext, pd: ProcessedData)\n    : PAlgo2.Model = PAlgo2.Model(id, pd)\n\n    override\n    def batchPredict(m: PAlgo2.Model, qs: RDD[(Long, Query)])\n    : RDD[(Long, Prediction)] = {\n      qs.mapValues(q => Prediction(id, q, Some(m)))\n    }\n\n    def predict(m: PAlgo2.Model, q: Query): Prediction = {\n      Prediction(id, q, Some(m))\n    }\n  }\n  \n  object PAlgo3 {\n    case class Model(id: Int, pd: ProcessedData)\n    extends LocalFileSystemPersistentModel[Params]\n    \n    object Model extends LocalFileSystemPersistentModelLoader[Params, Model]\n\n    case class Params(id: Int) extends PIOParams\n  }\n\n  class PAlgo3(params: PAlgo3.Params)\n  extends PAlgorithm[ProcessedData, PAlgo3.Model, Query, Prediction] {\n    val id = params.id\n\n    def train(sc: SparkContext, pd: ProcessedData)\n    : PAlgo3.Model = PAlgo3.Model(id, pd)\n\n    override\n    def batchPredict(m: PAlgo3.Model, qs: RDD[(Long, Query)])\n    : RDD[(Long, Prediction)] = {\n      qs.mapValues(q => Prediction(id, q, Some(m)))\n    }\n\n    def predict(m: PAlgo3.Model, q: Query): Prediction = {\n      Prediction(id, q, Some(m))\n    }\n  }\n  \n  object LAlgo0 {\n    case class Model(id: Int, pd: ProcessedData)\n  }\n\n  class LAlgo0(id: Int = 0) \n  extends LAlgorithm[ProcessedData, LAlgo0.Model, Query, Prediction] {\n    def train(pd: ProcessedData): LAlgo0.Model = LAlgo0.Model(id, pd)\n\n    def predict(m: LAlgo0.Model, q: Query): Prediction = {\n      Prediction(id, q, Some(m))\n    }\n  }\n  \n  object LAlgo1 {\n    case class Model(id: Int, pd: ProcessedData)\n  }\n\n  class LAlgo1(id: Int = 0) \n  extends LAlgorithm[ProcessedData, LAlgo1.Model, Query, Prediction] {\n    def train(pd: ProcessedData): LAlgo1.Model = LAlgo1.Model(id, pd)\n    \n    def predict(m: LAlgo1.Model, q: Query): Prediction = {\n      Prediction(id, q, Some(m))\n    }\n  }\n  \n  object LAlgo2 {\n    case class Params(id: Int) extends PIOParams\n\n    case class Model(id: Int, pd: ProcessedData)\n    extends LocalFileSystemPersistentModel[EmptyParams]\n    \n    object Model extends LocalFileSystemPersistentModelLoader[EmptyParams, Model]\n  }\n\n  class LAlgo2(params: LAlgo2.Params) \n  extends LAlgorithm[ProcessedData, LAlgo2.Model, Query, Prediction] {\n    def train(pd: ProcessedData): LAlgo2.Model = LAlgo2.Model(params.id, pd)\n    \n    def predict(m: LAlgo2.Model, q: Query): Prediction = {\n      Prediction(params.id, q, Some(m))\n    }\n  }\n\n  object LAlgo3 {\n    case class Params(id: Int) extends PIOParams\n\n    case class Model(id: Int, pd: ProcessedData)\n  }\n\n  class LAlgo3(params: LAlgo3.Params) \n  extends LAlgorithm[ProcessedData, LAlgo3.Model, Query, Prediction] {\n    def train(pd: ProcessedData): LAlgo3.Model = LAlgo3.Model(params.id, pd)\n    \n    def predict(m: LAlgo3.Model, q: Query): Prediction = {\n      Prediction(params.id, q, Some(m))\n    }\n  }\n\n  // N : P2L. As N is in the middle of P and L.\n  object NAlgo0 {\n    case class Model(id: Int, pd: ProcessedData)\n  }\n\n  class NAlgo0 (id: Int = 0)\n  extends P2LAlgorithm[ProcessedData, NAlgo0.Model, Query, Prediction] {\n    def train(sc: SparkContext, pd: ProcessedData)\n    : NAlgo0.Model = NAlgo0.Model(id, pd)\n  \n    def predict(m: NAlgo0.Model, q: Query): Prediction = {\n      Prediction(id, q, Some(m))\n    }\n  }\n\n  object NAlgo1 {\n    case class Model(id: Int, pd: ProcessedData)\n  }\n\n  class NAlgo1 (id: Int = 0)\n  extends P2LAlgorithm[ProcessedData, NAlgo1.Model, Query, Prediction] {\n    def train(sc: SparkContext, pd: ProcessedData)\n    : NAlgo1.Model = NAlgo1.Model(id, pd)\n   \n    def predict(m: NAlgo1.Model, q: Query): Prediction = {\n      Prediction(id, q, Some(m))\n    }\n  }\n  \n  object NAlgo2 {\n    case class Params(id: Int) extends PIOParams\n\n    case class Model(id: Int, pd: ProcessedData)\n    extends LocalFileSystemPersistentModel[EmptyParams]\n    \n    object Model extends LocalFileSystemPersistentModelLoader[EmptyParams, Model]\n  }\n\n  class NAlgo2(params: NAlgo2.Params) \n  extends P2LAlgorithm[ProcessedData, NAlgo2.Model, Query, Prediction] {\n    def train(sc: SparkContext, pd: ProcessedData)\n    : NAlgo2.Model = NAlgo2.Model(params.id, pd)\n    \n    def predict(m: NAlgo2.Model, q: Query): Prediction = {\n      Prediction(params.id, q, Some(m))\n    }\n  }\n\n  object NAlgo3 {\n    case class Params(id: Int) extends PIOParams\n\n    case class Model(id: Int, pd: ProcessedData)\n  }\n\n  class NAlgo3(params: NAlgo3.Params) \n  extends P2LAlgorithm[ProcessedData, NAlgo3.Model, Query, Prediction] {\n    def train(sc: SparkContext, pd: ProcessedData)\n    : NAlgo3.Model = NAlgo3.Model(params.id, pd)\n    \n    def predict(m: NAlgo3.Model, q: Query): Prediction = {\n      Prediction(params.id, q, Some(m))\n    }\n  }\n\n  class LServing0(id: Int = 0) extends LServing[Query, Prediction] {\n    def serve(q: Query, ps: Seq[Prediction]): Prediction = {\n      Prediction(id, q, ps=ps)\n    }\n  }\n\n  object LServing1 {\n    case class Params(id: Int) extends PIOParams\n  }\n  \n  class LServing1(params: LServing1.Params) extends LServing[Query, Prediction] {\n    def serve(q: Query, ps: Seq[Prediction]): Prediction = {\n      Prediction(params.id, q, ps=ps)\n    }\n  }\n  \n  class LServing2(id: Int) extends LServing[Query, Prediction] {\n    override\n    def supplement(q: Query): Query = q.copy(supp = true)\n\n    def serve(q: Query, ps: Seq[Prediction]): Prediction = {\n      Prediction(id, q, ps=ps)\n    }\n  }\n}\n\nobject Engine1 {\n  case class EvalInfo(v: Double) extends Serializable\n  case class Query() extends Serializable\n  case class Prediction() extends Serializable\n  case class Actual() extends Serializable\n  case class DSP(v: Double) extends Params\n}\n\nclass Engine1 \nextends BaseEngine[\n  Engine1.EvalInfo, Engine1.Query, Engine1.Prediction, Engine1.Actual] {\n\n  def train(\n    sc: SparkContext, \n    engineParams: EngineParams,\n    engineInstanceId: String = \"\",\n    params: WorkflowParams = WorkflowParams()): Seq[Any] = Seq[Any]()\n\n  def eval(sc: SparkContext, engineParams: EngineParams, params: WorkflowParams)\n  : Seq[(Engine1.EvalInfo, \n      RDD[(Engine1.Query, Engine1.Prediction, Engine1.Actual)])] = {\n    val dsp = engineParams.dataSourceParams._2.asInstanceOf[Engine1.DSP]\n    Seq(\n      (Engine1.EvalInfo(dsp.v),\n        sc.emptyRDD[(Engine1.Query, Engine1.Prediction, Engine1.Actual)]))\n  }\n}\n\n\nclass Metric0\nextends Metric[Engine1.EvalInfo, Engine1.Query, Engine1.Prediction,\nEngine1.Actual, Double] {\n  override def header: String = \"Metric0\"\n\n  def calculate(\n    sc: SparkContext, \n    evalDataSet: Seq[(Engine1.EvalInfo, RDD[(Engine1.Query, Engine1.Prediction,\n    Engine1.Actual)])]): Double = {\n    evalDataSet.head._1.v\n  }\n}\n\nobject Metric1 {\n  case class Result(c: Int, v: Double) extends Serializable\n}\n\nclass Metric1\nextends Metric[Engine1.EvalInfo, Engine1.Query, Engine1.Prediction,\nEngine1.Actual, Metric1.Result]()(Ordering.by[Metric1.Result, Double](_.v)) {\n  override def header: String = \"Metric1\"\n\n  def calculate(\n    sc: SparkContext, \n    evalDataSet: Seq[(Engine1.EvalInfo, RDD[(Engine1.Query, Engine1.Prediction,\n    Engine1.Actual)])]): Metric1.Result = {\n    Metric1.Result(0, evalDataSet.head._1.v)\n  }\n}\n\n"
  },
  {
    "path": "core/src/test/scala/org/apache/predictionio/core/SelfCleaningDataSourceTest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.core\n\nimport org.apache.predictionio.core.SelfCleaningDataSource\nimport org.apache.predictionio.core.EventWindow\nimport org.apache.predictionio.workflow.SharedSparkContext\n\nimport org.apache.predictionio.controller.PDataSource\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EmptyActualResult\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.storage.Storage\nimport org.apache.predictionio.data.store._\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\n\nimport org.json4s._\nimport org.json4s.DefaultFormats\n\nimport org.apache.spark.rdd.RDD\nimport org.scalatest.Inspectors._\nimport org.scalatest.Matchers._\nimport org.scalatest.FunSuite\nimport org.scalatest.Inside\n\ncase class DataSourceParams(appName: String, eventWindow: Option[EventWindow], appId: Int) extends Params\n\nclass SelfCleaningPDataSource(anAppName: String) extends PDataSource[TrainingData,EmptyEvaluationInfo, Query, EmptyActualResult] with SelfCleaningDataSource {\n\n  val (appId, channelId) = org.apache.predictionio.data.store.Common.appNameToId(anAppName, None)\n\n\n  val dsp = DataSourceParams(anAppName, Some(EventWindow(Some(\"1825 days\"), true, true)), appId = appId)\n\n  override def appName = dsp.appName\n  override def eventWindow = dsp.eventWindow\n\n  override def readTraining(sc: SparkContext): TrainingData = new TrainingData()\n\n  def events = Storage.getPEvents().find(appId = dsp.appId)_\n\n  def itemEvents = Storage.getPEvents().find(appId = dsp.appId, entityType = Some(\"item\"), eventNames = Some(Seq(\"$set\")))_  \n \n  def eventsAgg = Storage.getPEvents().aggregateProperties(appId = dsp.appId, entityType = \"item\")_\n\n}\n\nclass SelfCleaningDataSourceTest extends FunSuite with Inside with SharedSparkContext {\n\n  //To run manually, requires app \"cleanedTest\" and test.json data imported to it\n  ignore(\"Test event cleanup\") {\n    val source = new SelfCleaningPDataSource(\"cleanedTest\")\n    val eventsBeforeCount = source.events(sc).count\n    val itemEventsBeforeCount = source.itemEvents(sc).count\n\n    source.cleanPersistedPEvents(sc)\n\n    val eventsAfterCount = source.events(sc).count\n    val eventsAfter = source.events(sc)\n    val itemEventsAfterCount = source.itemEvents(sc).count   \n    val distinctEventsAfterCount = eventsAfter.map(x => \n      CleanedDataSourceTest.stripIdAndCreationTimeFromEvents(x)).distinct.count\n\n    val nexusSet = eventsAfter.filter(x => x.event == \"$set\" && x.entityId == \"Nexus\").take(1)(0) \n\n    implicit val formats = DefaultFormats\n\n    nexusSet.properties.get[String](\"available\") should equal (\"2016-03-18T13:31:49.016770+00:00\")\n\n    nexusSet.properties.get[JArray](\"categories\").values should equal (\n                   JArray(\n                     List(JString(\"Tablets\"),\n                          JString(\"Electronics\"),\n                          JString(\"Google\"))).values)\n \n    distinctEventsAfterCount should equal (eventsAfterCount)\n    eventsBeforeCount should be > (eventsAfterCount) \n    itemEventsBeforeCount should be > (itemEventsAfterCount)\n    itemEventsAfterCount should be > 0l\n  }\n}\n\nobject CleanedDataSourceTest{\n  def stripIdAndCreationTimeFromEvents(x: Event): Event = {\n   Event(event = x.event, entityType = x.entityType, entityId = x.entityId, targetEntityType = x.targetEntityType, targetEntityId = x.targetEntityId, properties = x.properties, eventTime = x.eventTime, tags = x.tags, prId= x.prId, creationTime = x.eventTime)\n  }\n}\n\n\n\ncase class Query() extends Serializable\n\nclass TrainingData() extends Serializable\n"
  },
  {
    "path": "core/src/test/scala/org/apache/predictionio/core/test.json",
    "content": "{\"eventId\":\"KpjNMVrQzY2s0TZhYB3vsAAAAVOFSkM1kLoZgQnOA1E\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Nexus\",\"properties\":{\"categories\":[\"Tablets\",\"Electronics\",\"Google\"]},\"eventTime\":\"2016-03-17T15:55:49.941Z\",\"creationTime\":\"2016-03-17T15:55:49.945Z\"}\n{\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Nexus\",\"properties\":{\"categories\":[\"Tablets\",\"Electronics\",\"Google\"]},\"eventTime\":\"2016-03-17T15:55:49.941Z\",\"creationTime\":\"2016-03-17T15:55:49.945Z\"}\n{\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Nexus\",\"properties\":{\"categories\":[\"Tablets\",\"Electronics\",\"Google2\"], \"test\": [\"testA\", \"testB\"]},\"eventTime\":\"2006-03-17T15:54:49.941Z\",\"creationTime\":\"2006-03-17T15:54:49.945Z\"}\n{\"eventId\":\"KpjNMVrQzY2s0TZhYB3vsAAAAVOFSkNogMMiTarDxQA\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Nexus\",\"properties\":{\"countries\":[\"United States\",\"Canada\"]},\"eventTime\":\"2016-03-17T15:55:49.992Z\",\"creationTime\":\"2016-03-17T15:55:49.997Z\"}\n{\"eventId\":\"KpjNMVrQzY2s0TZhYB3vsAAAAVOFSkOdrr3SJaHTlQQ\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Nexus\",\"properties\":{\"available\":\"2016-03-14T13:31:49.016770+00:00\",\"date\":\"2016-03-16T13:31:49.016770+00:00\",\"expires\":\"2016-03-18T13:31:49.016770+00:00\"},\"eventTime\":\"2016-03-17T15:55:50.045Z\",\"creationTime\":\"2016-03-17T15:55:50.049Z\"}\n{\"eventId\":\"KpjNMVrQzY2s0TZhYB3vsAAAAVOFSkOdrr3SJaHTlQQ\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Nexus\",\"properties\":{\"available\":\"2016-03-18T13:31:49.016770+00:00\",\"date\":\"2016-03-16T13:31:49.016770+00:00\",\"expires\":\"2016-03-18T13:31:49.016770+00:00\"},\"eventTime\":\"2016-03-18T15:55:50.045Z\",\"creationTime\":\"2016-03-18T15:55:50.049Z\"}\n{\"eventId\":\"MdgNfySNSsz0WVh1q6f3_gAAAVOFSkNKjmJz4kil3F0\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Surface\",\"properties\":{\"categories\":[\"Tablets\",\"Electronics\",\"Microsoft\"]},\"eventTime\":\"2016-03-17T15:55:49.962Z\",\"creationTime\":\"2016-03-17T15:55:49.966Z\"}\n{\"eventId\":\"MdgNfySNSsz0WVh1q6f3_gAAAVOFSkN-lNLH6dbWhjI\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Surface\",\"properties\":{\"countries\":[\"United States\",\"Canada\"]},\"eventTime\":\"2016-03-17T15:55:50.014Z\",\"creationTime\":\"2016-03-17T15:55:50.018Z\"}\n{\"eventId\":\"MdgNfySNSsz0WVh1q6f3_gAAAVOFSkOmhp8HSvY0l2M\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Surface\",\"properties\":{\"available\":\"2016-03-15T08:43:49.016770+00:00\",\"date\":\"2016-03-17T08:43:49.016770+00:00\",\"expires\":\"2016-03-19T08:43:49.016770+00:00\"},\"eventTime\":\"2016-03-17T15:55:50.054Z\",\"creationTime\":\"2016-03-17T15:55:50.060Z\"}\n{\"eventId\":\"PxKvMIeTGaAvnzYFx0Il5AAAAVOFSkJTgLGAfdlk374\",\"event\":\"purchase\",\"entityType\":\"user\",\"entityId\":\"U 2\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Nexus\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.715Z\",\"creationTime\":\"2016-03-17T15:55:49.721Z\"}\n{\"eventId\":\"PxKvMIeTGaAvnzYFx0Il5AAAAVOFSkJflBwNuoxYZSk\",\"event\":\"purchase\",\"entityType\":\"user\",\"entityId\":\"U 2\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Galaxy\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.727Z\",\"creationTime\":\"2016-03-17T15:55:49.734Z\"}\n{\"eventId\":\"PxKvMIeTGaAvnzYFx0Il5AAAAVOFSkK0jq-hYskgTHQ\",\"event\":\"view\",\"entityType\":\"user\",\"entityId\":\"U 2\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Phones\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.812Z\",\"creationTime\":\"2016-03-17T15:55:49.816Z\"}\n{\"eventId\":\"PxKvMIeTGaAvnzYFx0Il5AAAAVOFSkLBme-oEd51kRc\",\"event\":\"view\",\"entityType\":\"user\",\"entityId\":\"U 2\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Tablets\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.825Z\",\"creationTime\":\"2016-03-17T15:55:49.830Z\"}\n{\"eventId\":\"PxKvMIeTGaAvnzYFx0Il5AAAAVOFSkLMtz_jVwJkrMo\",\"event\":\"view\",\"entityType\":\"user\",\"entityId\":\"U 2\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Mobile-acc\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.836Z\",\"creationTime\":\"2016-03-17T15:55:49.841Z\"}\n{\"eventId\":\"P0xK5wvjfKzdMwVGPH_MzgAAAVOFSkMXp9rxoAwBXfs\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Iphone 5\",\"properties\":{\"categories\":[\"Phones\",\"Electronics\",\"Apple\"]},\"eventTime\":\"2016-03-17T15:55:49.911Z\",\"creationTime\":\"2016-03-17T15:55:49.915Z\"}\n{\"eventId\":\"P0xK5wvjfKzdMwVGPH_MzgAAAVOFSkPHpu0QYdCNeC4\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Iphone 5\",\"properties\":{\"available\":\"2016-03-17T18:19:49.016770+00:00\",\"date\":\"2016-03-19T18:19:49.016770+00:00\",\"expires\":\"2016-03-21T18:19:49.016770+00:00\"},\"eventTime\":\"2016-03-17T15:55:50.087Z\",\"creationTime\":\"2016-03-17T15:55:50.091Z\"}\n{\"eventId\":\"Rh2bjOtiPNen04BEL4hS1AAAAVOFSkJsqukj8XhK8UQ\",\"event\":\"purchase\",\"entityType\":\"user\",\"entityId\":\"u-3\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Surface\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.740Z\",\"creationTime\":\"2016-03-17T15:55:49.745Z\"}\n{\"eventId\":\"Rh2bjOtiPNen04BEL4hS1AAAAVOFSkLXofSwU6V_g8M\",\"event\":\"view\",\"entityType\":\"user\",\"entityId\":\"u-3\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Mobile-acc\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.847Z\",\"creationTime\":\"2016-03-17T15:55:49.851Z\"}\n{\"eventId\":\"Z0813DMQIKz7N4VGxZhmngAAAVOFSj-Yq3R7qgg6_Vk\",\"event\":\"purchase\",\"entityType\":\"user\",\"entityId\":\"u1\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Iphone 6\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.016Z\",\"creationTime\":\"2016-03-17T15:55:49.401Z\"}\n{\"eventId\":\"Z0813DMQIKz7N4VGxZhmngAAAVOFSkIlpz1FtdazY3s\",\"event\":\"purchase\",\"entityType\":\"user\",\"entityId\":\"u1\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Iphone 5\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.669Z\",\"creationTime\":\"2016-03-17T15:55:49.678Z\"}\n{\"eventId\":\"Z0813DMQIKz7N4VGxZhmngAAAVOFSkI0iPemXrYZZvo\",\"event\":\"purchase\",\"entityType\":\"user\",\"entityId\":\"u1\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Iphone 4\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.684Z\",\"creationTime\":\"2016-03-17T15:55:49.693Z\"}\n{\"eventId\":\"Z0813DMQIKz7N4VGxZhmngAAAVOFSkJDiVnOzeypN7I\",\"event\":\"purchase\",\"entityType\":\"user\",\"entityId\":\"u1\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Ipad-retina\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.699Z\",\"creationTime\":\"2016-03-17T15:55:49.707Z\"}\n{\"eventId\":\"Z0813DMQIKz7N4VGxZhmngAAAVOFSkKbpK-PGBIgfOI\",\"event\":\"view\",\"entityType\":\"user\",\"entityId\":\"u1\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Phones\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.787Z\",\"creationTime\":\"2016-03-17T15:55:49.791Z\"}\n{\"eventId\":\"Z0813DMQIKz7N4VGxZhmngAAAVOFSkKptArC8-MR6bE\",\"event\":\"view\",\"entityType\":\"user\",\"entityId\":\"u1\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Mobile-acc\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.801Z\",\"creationTime\":\"2016-03-17T15:55:49.806Z\"}\n{\"eventId\":\"dsw1LKGItnaOliG661FGeQAAAVOFSkJ2rb2DtAZ6Kc0\",\"event\":\"purchase\",\"entityType\":\"user\",\"entityId\":\"u-4\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Iphone 5\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.750Z\",\"creationTime\":\"2016-03-17T15:55:49.754Z\"}\n{\"eventId\":\"dsw1LKGItnaOliG661FGeQAAAVOFSkJ2rb2DtAZ6Kc0\",\"event\":\"purchase\",\"entityType\":\"user\",\"entityId\":\"u-4\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Iphone 5\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.750Z\",\"creationTime\":\"2016-03-17T15:55:49.754Z\"}\n{\"eventId\":\"dsw1LKGItnaOliG661FGeQAAAVOFSkKBmIoMOHYdSNc\",\"event\":\"purchase\",\"entityType\":\"user\",\"entityId\":\"u-4\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Iphone 4\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.761Z\",\"creationTime\":\"2016-03-17T15:55:49.769Z\"}\n{\"eventId\":\"dsw1LKGItnaOliG661FGeQAAAVOFSkKPlZrbJdSAuNo\",\"event\":\"purchase\",\"entityType\":\"user\",\"entityId\":\"u-4\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Galaxy\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.775Z\",\"creationTime\":\"2016-03-17T15:55:49.781Z\"}\n{\"eventId\":\"dsw1LKGItnaOliG661FGeQAAAVOFSkLhgVH2nSiQUk8\",\"event\":\"view\",\"entityType\":\"user\",\"entityId\":\"u-4\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Phones\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.857Z\",\"creationTime\":\"2016-03-17T15:55:49.862Z\"}\n{\"eventId\":\"dsw1LKGItnaOliG661FGeQAAAVOFSkLsv05zv25rTp8\",\"event\":\"view\",\"entityType\":\"user\",\"entityId\":\"u-4\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Tablets\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.868Z\",\"creationTime\":\"2016-03-17T15:55:49.872Z\"}\n{\"eventId\":\"dsw1LKGItnaOliG661FGeQAAAVOFSkL2lG__U2kPe1Y\",\"event\":\"view\",\"entityType\":\"user\",\"entityId\":\"u-4\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Soap\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.878Z\",\"creationTime\":\"2016-03-17T15:55:49.882Z\"}\n{\"eventId\":\"gmvnQ953Qb_tMUAzxNqgtQAAAVOFSkMKgheCJU2SSYI\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Iphone 6\",\"properties\":{\"categories\":[\"Phones\",\"Electronics\",\"Apple\"]},\"eventTime\":\"2016-03-17T15:55:49.898Z\",\"creationTime\":\"2016-03-17T15:55:49.903Z\"}\n{\"eventId\":\"gmvnQ953Qb_tMUAzxNqgtQAAAVOFSkOIh2BhgBjtKYU\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Iphone 6\",\"properties\":{\"available\":\"2016-03-12T23:07:49.016770+00:00\",\"date\":\"2016-03-14T23:07:49.016770+00:00\",\"expires\":\"2016-03-16T23:07:49.016770+00:00\"},\"eventTime\":\"2016-03-17T15:55:50.024Z\",\"creationTime\":\"2016-03-17T15:55:50.028Z\"}\n{\"eventId\":\"pAabCfxStG8KscX91YcbQgAAAVOFSkMgnNUmSCOAk-k\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Iphone 4\",\"properties\":{\"categories\":[\"Phones\",\"Electronics\",\"Apple\"]},\"eventTime\":\"2016-03-17T15:55:49.920Z\",\"creationTime\":\"2016-03-17T15:55:49.925Z\"}\n{\"eventId\":\"pAabCfxStG8KscX91YcbQgAAAVOFSkNUieljPZ0N8Ks\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Iphone 4\",\"properties\":{\"countries\":[\"United States\",\"Canada\",\"Estados Unidos Mexicanos\"]},\"eventTime\":\"2016-03-17T15:55:49.972Z\",\"creationTime\":\"2016-03-17T15:55:49.976Z\"}\n{\"eventId\":\"pAabCfxStG8KscX91YcbQgAAAVOFSkOztX--kWmGKeg\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Iphone 4\",\"properties\":{\"available\":\"2016-03-16T03:55:49.016770+00:00\",\"date\":\"2016-03-18T03:55:49.016770+00:00\",\"expires\":\"2016-03-20T03:55:49.016770+00:00\"},\"eventTime\":\"2016-03-17T15:55:50.067Z\",\"creationTime\":\"2016-03-17T15:55:50.071Z\"}\n{\"eventId\":\"7CvEfxvyU91u9adLcWdeDAAAAVOFSkMrhghB7z6eySU\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Ipad-retina\",\"properties\":{\"categories\":[\"Tablets\",\"Electronics\",\"Apple\"]},\"eventTime\":\"2016-03-17T15:55:49.931Z\",\"creationTime\":\"2016-03-17T15:55:49.935Z\"}\n{\"eventId\":\"7CvEfxvyU91u9adLcWdeDAAAAVOFSkNemo-5_T66338\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Ipad-retina\",\"properties\":{\"countries\":[\"United States\",\"Estados Unidos Mexicanos\"]},\"eventTime\":\"2016-03-17T15:55:49.982Z\",\"creationTime\":\"2016-03-17T15:55:49.986Z\"}\n{\"eventId\":\"7CvEfxvyU91u9adLcWdeDAAAAVOFSkOSqPalKUCDgQI\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Ipad-retina\",\"properties\":{\"available\":\"2016-03-13T18:19:49.016770+00:00\",\"date\":\"2016-03-15T18:19:49.016770+00:00\",\"expires\":\"2016-03-17T18:19:49.016770+00:00\"},\"eventTime\":\"2016-03-17T15:55:50.034Z\",\"creationTime\":\"2016-03-17T15:55:50.038Z\"}\n{\"eventId\":\"7QC1-7RtN0F-51rlq5irAgAAAVOFSkM_vOQlAmRTg3s\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Galaxy\",\"properties\":{\"categories\":[\"Phones\",\"Electronics\",\"Samsung\"]},\"eventTime\":\"2016-03-17T15:55:49.951Z\",\"creationTime\":\"2016-03-17T15:55:49.955Z\"}\n{\"eventId\":\"7QC1-7RtN0F-51rlq5irAgAAAVOFSkN0pUflT-SwR3w\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Galaxy\",\"properties\":{\"countries\":[\"United States\"]},\"eventTime\":\"2016-03-17T15:55:50.004Z\",\"creationTime\":\"2016-03-17T15:55:50.008Z\"}\n{\"eventId\":\"7QC1-7RtN0F-51rlq5irAgAAAVOFSkO9ko9YEaEplJs\",\"event\":\"$set\",\"entityType\":\"item\",\"entityId\":\"Galaxy\",\"properties\":{\"available\":\"2016-03-16T23:07:49.016770+00:00\",\"date\":\"2016-03-18T23:07:49.016770+00:00\",\"expires\":\"2016-03-20T23:07:49.016770+00:00\"},\"eventTime\":\"2016-03-17T15:55:50.077Z\",\"creationTime\":\"2016-03-17T15:55:50.081Z\"}\n{\"eventId\":\"-ea0Iys05y2nvrM9WUmnwwAAAVOFSkL_k9xrJWi41qM\",\"event\":\"view\",\"entityType\":\"user\",\"entityId\":\"u5\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Soap\",\"properties\":{},\"eventTime\":\"2016-03-17T15:55:49.887Z\",\"creationTime\":\"2016-03-17T15:55:49.892Z\"}\n{\"eventId\":\"dsw1LKGItnaOliG661FGeQAAAVOFSkJ2rb2DtAZ6Kc0\",\"event\":\"purchase\",\"entityType\":\"user\",\"entityId\":\"u-4\",\"targetEntityType\":\"item\",\"targetEntityId\":\"Iphone 5\",\"properties\":{},\"eventTime\":\"1970-03-17T15:55:49.750Z\",\"creationTime\":\"1970-03-17T15:55:49.754Z\"}\n"
  },
  {
    "path": "core/src/test/scala/org/apache/predictionio/workflow/BaseTest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n// package org.apache.spark\npackage org.apache.predictionio.workflow\n\nimport _root_.io.netty.util.internal.logging.{InternalLoggerFactory, Slf4JLoggerFactory}\nimport org.apache.predictionio.data.storage.{EnvironmentFactory, EnvironmentService}\nimport org.scalatest.BeforeAndAfterAll\nimport org.scalatest.BeforeAndAfterEach\nimport org.scalatest.Suite\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkConf\nimport org.scalamock.scalatest.MockFactory\n\n\n/** Manages a local `sc` {@link SparkContext} variable, correctly stopping it\n  * after each test. */\ntrait LocalSparkContext\nextends BeforeAndAfterEach with BeforeAndAfterAll { self: Suite =>\n\n  @transient var sc: SparkContext = _\n\n  override def beforeAll() {\n    InternalLoggerFactory.setDefaultFactory(new Slf4JLoggerFactory())\n    super.beforeAll()\n  }\n\n  override def afterEach() {\n    resetSparkContext()\n    super.afterEach()\n  }\n\n  def resetSparkContext() : Unit = {\n    LocalSparkContext.stop(sc)\n    sc = null\n  }\n\n}\n\nobject LocalSparkContext {\n  def stop(sc: SparkContext) {\n    if (sc != null) {\n      sc.stop()\n    }\n    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown\n    System.clearProperty(\"spark.driver.port\")\n  }\n\n  /** Runs `f` by passing in `sc` and ensures that `sc` is stopped. */\n  def withSpark[T](sc: SparkContext)(f: SparkContext => T) : Unit = {\n    try {\n      f(sc)\n    } finally {\n      stop(sc)\n    }\n  }\n\n}\n/** Shares a local `SparkContext` between all tests in a suite and closes it at the end */\ntrait SharedSparkContext extends BeforeAndAfterAll { self: Suite =>\n\n  @transient private var _sc: SparkContext = _\n\n  def sc: SparkContext = _sc\n\n  var conf = new SparkConf(false)\n\n  override def beforeAll() {\n    _sc = new SparkContext(\"local[4]\", \"test\", conf)\n    super.beforeAll()\n  }\n\n  override def afterAll() {\n    LocalSparkContext.stop(_sc)\n    _sc = null\n    super.afterAll()\n  }\n}\n\ntrait SharedStorageContext extends BeforeAndAfterAll { self: Suite =>\n\n  override def beforeAll(): Unit ={\n    ConfigurationMockUtil.createJDBCMockedConfig\n    super.beforeAll()\n  }\n\n  override def afterAll(): Unit = {\n    super.afterAll()\n  }\n\n}\n\nobject ConfigurationMockUtil extends MockFactory {\n\n  def createJDBCMockedConfig: Unit = {\n    val mockedEnvService = mock[EnvironmentService]\n    (mockedEnvService.envKeys _)\n      .expects\n      .returning(List(\"PIO_STORAGE_REPOSITORIES_METADATA_NAME\",\n        \"PIO_STORAGE_SOURCES_MYSQL_TYPE\"))\n      .twice\n\n    (mockedEnvService.getByKey _)\n      .expects(\"PIO_STORAGE_REPOSITORIES_METADATA_NAME\")\n      .returning(\"test_metadata\")\n\n    (mockedEnvService.getByKey _)\n      .expects(\"PIO_STORAGE_REPOSITORIES_METADATA_SOURCE\")\n      .returning(\"MYSQL\")\n\n    (mockedEnvService.getByKey _)\n      .expects(\"PIO_STORAGE_SOURCES_MYSQL_TYPE\")\n      .returning(\"jdbc\")\n\n    (mockedEnvService.filter _)\n      .expects(*)\n      .returning(Map(\n        \"URL\" -> \"jdbc:h2:~/test;MODE=MySQL;AUTO_SERVER=TRUE\",\n        \"USERNAME\" -> \"sa\",\n        \"PASSWORD\" -> \"\")\n      )\n\n    EnvironmentFactory.environmentService = new Some(mockedEnvService)\n  }\n}\n\n"
  },
  {
    "path": "core/src/test/scala/org/apache/predictionio/workflow/EngineWorkflowTest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n"
  },
  {
    "path": "core/src/test/scala/org/apache/predictionio/workflow/EvaluationWorkflowTest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.workflow\n\nimport org.apache.predictionio.controller._\nimport org.scalamock.scalatest.MockFactory\nimport org.scalatest.FunSuite\nimport org.scalatest.Matchers._\n\nclass EvaluationWorkflowSuite extends FunSuite with SharedStorageContext\n  with SharedSparkContext with MockFactory {\n\n  test(\"Evaluation return best engine params, simple result type: Double\") {\n    val engine = new Engine1()\n    val ep0 = EngineParams(dataSourceParams = Engine1.DSP(0.2))\n    val ep1 = EngineParams(dataSourceParams = Engine1.DSP(0.3))\n    val ep2 = EngineParams(dataSourceParams = Engine1.DSP(0.3))\n    val ep3 = EngineParams(dataSourceParams = Engine1.DSP(-0.2))\n    val engineParamsList = Seq(ep0, ep1, ep2, ep3)\n\n    val evaluator = MetricEvaluator(new Metric0())\n  \n    object Eval extends Evaluation {\n      engineEvaluator = (new Engine1(), MetricEvaluator(new Metric0()))\n    }\n\n    val result = EvaluationWorkflow.runEvaluation(\n      sc,\n      Eval,\n      engine,\n      engineParamsList,\n      evaluator,\n      WorkflowParams())\n\n    result.bestScore.score shouldBe 0.3\n    result.bestEngineParams shouldBe ep1\n  }\n\n  test(\"Evaluation return best engine params, complex result type\") {\n    val engine = new Engine1()\n    val ep0 = EngineParams(dataSourceParams = Engine1.DSP(0.2))\n    val ep1 = EngineParams(dataSourceParams = Engine1.DSP(0.3))\n    val ep2 = EngineParams(dataSourceParams = Engine1.DSP(0.3))\n    val ep3 = EngineParams(dataSourceParams = Engine1.DSP(-0.2))\n    val engineParamsList = Seq(ep0, ep1, ep2, ep3)\n\n    val evaluator = MetricEvaluator(new Metric1())\n    \n    object Eval extends Evaluation {\n      engineEvaluator = (new Engine1(), MetricEvaluator(new Metric1()))\n    }\n\n    val result = EvaluationWorkflow.runEvaluation(\n      sc,\n      Eval,\n      engine,\n      engineParamsList,\n      evaluator,\n      WorkflowParams())\n  \n    result.bestScore.score shouldBe Metric1.Result(0, 0.3)\n    result.bestEngineParams shouldBe ep1\n  }\n}\n"
  },
  {
    "path": "core/src/test/scala/org/apache/predictionio/workflow/JsonExtractorSuite.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.workflow\n\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.controller.Utils\nimport org.json4s.CustomSerializer\nimport org.json4s.JsonAST.JField\nimport org.json4s.JsonAST.JObject\nimport org.json4s.JsonAST.JString\nimport org.json4s.MappingException\nimport org.json4s.native.JsonMethods.compact\nimport org.json4s.native.JsonMethods.render\nimport org.scalatest.FunSuite\nimport org.scalatest.Matchers\n\nclass JsonExtractorSuite extends FunSuite with Matchers {\n\n  test(\"Extract Scala object using option Json4sNative works with optional and default value \" +\n    \"provided\") {\n\n    val json = \"\"\"{\"string\": \"query string\", \"optional\": \"optional string\", \"default\": \"d\"}\"\"\"\n\n    val query = JsonExtractor.extract(\n      JsonExtractorOption.Json4sNative,\n      json,\n      classOf[ScalaQuery])\n\n    query should be (ScalaQuery(\"query string\", Some(\"optional string\"), \"d\"))\n  }\n\n  test(\"Extract Scala object using option Json4sNative works with no optional and no default \" +\n    \"value provided\") {\n\n    val json = \"\"\"{\"string\": \"query string\"}\"\"\"\n\n    val query = JsonExtractor.extract(\n      JsonExtractorOption.Json4sNative,\n      json,\n      classOf[ScalaQuery])\n\n    query should be (ScalaQuery(\"query string\", None, \"default\"))\n  }\n\n  test(\"Extract Scala object using option Json4sNative works with null optional and null default\" +\n    \" value\") {\n\n    val json = \"\"\"{\"string\": \"query string\", \"optional\": null, \"default\": null}\"\"\"\n\n    val query = JsonExtractor.extract(\n      JsonExtractorOption.Json4sNative,\n      json,\n      classOf[ScalaQuery])\n\n    query should be (ScalaQuery(\"query string\", None, \"default\"))\n  }\n\n  test(\"Extract Scala object using option Both works with optional and default value provided\") {\n\n    val json = \"\"\"{\"string\": \"query string\", \"optional\": \"optional string\", \"default\": \"d\"}\"\"\"\n\n    val query = JsonExtractor.extract(\n      JsonExtractorOption.Json4sNative,\n      json,\n      classOf[ScalaQuery])\n\n    query should be (ScalaQuery(\"query string\", Some(\"optional string\"), \"d\"))\n  }\n\n  test(\"Extract Scala object using option Both works with no optional and no default value \" +\n    \"provided\") {\n\n    val json = \"\"\"{\"string\": \"query string\"}\"\"\"\n\n    val query = JsonExtractor.extract(\n      JsonExtractorOption.Json4sNative,\n      json,\n      classOf[ScalaQuery])\n\n    query should be (ScalaQuery(\"query string\", None, \"default\"))\n  }\n\n  test(\"Extract Scala object using option Both works with null optional and null default value\") {\n\n    val json = \"\"\"{\"string\": \"query string\", \"optional\": null, \"default\": null}\"\"\"\n\n    val query = JsonExtractor.extract(\n      JsonExtractorOption.Json4sNative,\n      json,\n      classOf[ScalaQuery])\n\n    query should be (ScalaQuery(\"query string\", None, \"default\"))\n  }\n\n  test(\"Extract Scala object using option Gson should not get default value and optional none\" +\n    \" value\") {\n\n    val json = \"\"\"{\"string\": \"query string\"}\"\"\"\n    val query = JsonExtractor.extract(\n      JsonExtractorOption.Gson,\n      json,\n      classOf[ScalaQuery])\n\n    query should be (ScalaQuery(\"query string\", null, null))\n  }\n\n  test(\"Extract Scala object using option Gson should throw an exception with optional \" +\n    \"value provided\") {\n\n    val json = \"\"\"{\"string\": \"query string\", \"optional\": \"o\", \"default\": \"d\"}\"\"\"\n    intercept[RuntimeException] {\n      JsonExtractor.extract(\n        JsonExtractorOption.Gson,\n        json,\n        classOf[ScalaQuery])\n    }\n  }\n\n  test(\"Extract Java object using option Gson works\") {\n\n    val json = \"\"\"{\"q\": \"query string\"}\"\"\"\n\n    val query = JsonExtractor.extract(\n      JsonExtractorOption.Gson,\n      json,\n      classOf[JavaQuery])\n\n    query should be (new JavaQuery(\"query string\"))\n  }\n\n  test(\"Extract Java object using option Both works\") {\n\n    val json = \"\"\"{\"q\": \"query string\"}\"\"\"\n\n    val query = JsonExtractor.extract(\n      JsonExtractorOption.Both,\n      json,\n      classOf[JavaQuery])\n\n    query should be (new JavaQuery(\"query string\"))\n  }\n\n  test(\"Extract Java object using option Json4sNative should throw an exception\") {\n\n    val json = \"\"\"{\"q\": \"query string\"}\"\"\"\n\n    intercept[MappingException] {\n      JsonExtractor.extract(\n        JsonExtractorOption.Json4sNative,\n        json,\n        classOf[JavaQuery])\n    }\n  }\n\n  test(\"Extract Scala object using option Json4sNative with custom deserializer\") {\n    val json = \"\"\"{\"string\": \"query string\", \"optional\": \"o\", \"default\": \"d\"}\"\"\"\n\n    val query = JsonExtractor.extract(\n      JsonExtractorOption.Json4sNative,\n      json,\n      classOf[ScalaQuery],\n      Utils.json4sDefaultFormats + new UpperCaseFormat\n    )\n\n    query should be(ScalaQuery(\"QUERY STRING\", Some(\"O\"), \"D\"))\n  }\n\n  test(\"Extract Java object usingoption Gson with custom deserializer\") {\n    val json = \"\"\"{\"q\": \"query string\"}\"\"\"\n\n    val query = JsonExtractor.extract(\n      extractorOption = JsonExtractorOption.Gson,\n      json = json,\n      clazz = classOf[JavaQuery],\n      gsonTypeAdapterFactories = Seq(new JavaQueryTypeAdapterFactory)\n    )\n\n    query should be(new JavaQuery(\"QUERY STRING\"))\n  }\n\n  test(\"Java object to JValue using option Both works\") {\n    val query = new JavaQuery(\"query string\")\n    val jValue = JsonExtractor.toJValue(JsonExtractorOption.Both, query)\n\n    compact(render(jValue)) should be (\"\"\"{\"q\":\"query string\"}\"\"\")\n  }\n\n  test(\"Java object to JValue using option Gson works\") {\n    val query = new JavaQuery(\"query string\")\n    val jValue = JsonExtractor.toJValue(JsonExtractorOption.Gson, query)\n\n    compact(render(jValue)) should be (\"\"\"{\"q\":\"query string\"}\"\"\")\n  }\n\n  test(\"Java object to JValue using option Json4sNative results in empty Json\") {\n    val query = new JavaQuery(\"query string\")\n    val jValue = JsonExtractor.toJValue(JsonExtractorOption.Json4sNative, query)\n\n    compact(render(jValue)) should be (\"\"\"{}\"\"\")\n  }\n\n  test(\"Scala object to JValue using option Both works\") {\n    val query = new ScalaQuery(\"query string\", Some(\"option\"))\n    val jValue = JsonExtractor.toJValue(JsonExtractorOption.Both, query)\n\n    compact(render(jValue)) should\n      be (\"\"\"{\"string\":\"query string\",\"optional\":\"option\",\"default\":\"default\"}\"\"\")\n  }\n\n  test(\"Scala object to JValue using option Gson does not serialize optional\") {\n    val query = new ScalaQuery(\"query string\", Some(\"option\"))\n    val jValue = JsonExtractor.toJValue(JsonExtractorOption.Gson, query)\n\n    compact(render(jValue)) should\n      be (\"\"\"{\"string\":\"query string\",\"optional\":{},\"default\":\"default\"}\"\"\")\n  }\n\n  test(\"Scala object to JValue using option Json4sNative works\") {\n    val query = new ScalaQuery(\"query string\", Some(\"option\"))\n    val jValue = JsonExtractor.toJValue(JsonExtractorOption.Json4sNative, query)\n\n    compact(render(jValue)) should\n      be (\"\"\"{\"string\":\"query string\",\"optional\":\"option\",\"default\":\"default\"}\"\"\")\n  }\n\n  test(\"Scala object to JValue using option Json4sNative with custom serializer\") {\n    val query = new ScalaQuery(\"query string\", Some(\"option\"))\n    val jValue = JsonExtractor.toJValue(\n      JsonExtractorOption.Json4sNative,\n      query,\n      Utils.json4sDefaultFormats + new UpperCaseFormat\n    )\n\n    compact(render(jValue)) should\n      be (\"\"\"{\"string\":\"QUERY STRING\",\"optional\":\"OPTION\",\"default\":\"DEFAULT\"}\"\"\")\n  }\n\n  test(\"Java object to JValue using option Gson with custom serializer\") {\n    val query = new JavaQuery(\"query string\")\n    val jValue = JsonExtractor.toJValue(\n      extractorOption = JsonExtractorOption.Gson,\n      o = query,\n      gsonTypeAdapterFactories = Seq(new JavaQueryTypeAdapterFactory)\n    )\n\n    compact(render(jValue)) should be (\"\"\"{\"q\":\"QUERY STRING\"}\"\"\")\n  }\n\n  test(\"Java Param to Json using option Both\") {\n    val param = (\"algo\", new JavaParams(\"parameter\"))\n    val json = JsonExtractor.paramToJson(JsonExtractorOption.Both, param)\n\n    json should be (\"\"\"{\"algo\":{\"p\":\"parameter\"}}\"\"\")\n  }\n\n  test(\"Java Param to Json using option Gson\") {\n    val param = (\"algo\", new JavaParams(\"parameter\"))\n    val json = JsonExtractor.paramToJson(JsonExtractorOption.Gson, param)\n\n    json should be (\"\"\"{\"algo\":{\"p\":\"parameter\"}}\"\"\")\n  }\n\n  test(\"Scala Param to Json using option Both\") {\n    val param = (\"algo\", AlgorithmParams(\"parameter\"))\n    val json = JsonExtractor.paramToJson(JsonExtractorOption.Both, param)\n\n    json should be (\"\"\"{\"algo\":{\"a\":\"parameter\"}}\"\"\")\n  }\n\n  test(\"Scala Param to Json using option Json4sNative\") {\n    val param = (\"algo\", AlgorithmParams(\"parameter\"))\n    val json = JsonExtractor.paramToJson(JsonExtractorOption.Json4sNative, param)\n\n    json should be (\"\"\"{\"algo\":{\"a\":\"parameter\"}}\"\"\")\n  }\n\n  test(\"Java Params to Json using option Both\") {\n    val params = Seq((\"algo\", new JavaParams(\"parameter\")), (\"algo2\", new JavaParams(\"parameter2\")))\n    val json = JsonExtractor.paramsToJson(JsonExtractorOption.Both, params)\n\n    json should be (\"\"\"[{\"algo\":{\"p\":\"parameter\"}},{\"algo2\":{\"p\":\"parameter2\"}}]\"\"\")\n  }\n\n  test(\"Java Params to Json using option Gson\") {\n    val params = Seq((\"algo\", new JavaParams(\"parameter\")), (\"algo2\", new JavaParams(\"parameter2\")))\n    val json = JsonExtractor.paramsToJson(JsonExtractorOption.Gson, params)\n\n    json should be (\"\"\"[{\"algo\":{\"p\":\"parameter\"}},{\"algo2\":{\"p\":\"parameter2\"}}]\"\"\")\n  }\n\n  test(\"Scala Params to Json using option Both\") {\n    val params =\n      Seq((\"algo\", AlgorithmParams(\"parameter\")), (\"algo2\", AlgorithmParams(\"parameter2\")))\n    val json = JsonExtractor.paramsToJson(JsonExtractorOption.Both, params)\n\n    json should be (org.json4s.native.Serialization.write(params)(Utils.json4sDefaultFormats))\n  }\n\n  test(\"Scala Params to Json using option Json4sNative\") {\n    val params =\n      Seq((\"algo\", AlgorithmParams(\"parameter\")), (\"algo2\", AlgorithmParams(\"parameter2\")))\n    val json = JsonExtractor.paramsToJson(JsonExtractorOption.Json4sNative, params)\n\n    json should be (org.json4s.native.Serialization.write(params)(Utils.json4sDefaultFormats))\n  }\n\n  test(\"Mixed Java and Scala Params to Json using option Both\") {\n    val params =\n      Seq((\"scala\", AlgorithmParams(\"parameter\")), (\"java\", new JavaParams(\"parameter2\")))\n    val json = JsonExtractor.paramsToJson(JsonExtractorOption.Both, params)\n\n    json should be (\"\"\"[{\"scala\":{\"a\":\"parameter\"}},{\"java\":{\"p\":\"parameter2\"}}]\"\"\")\n  }\n\n  test(\"Serializing Scala EngineParams works using option Json4sNative\") {\n    val ep = new EngineParams(\n      dataSourceParams = (\"ds\", DataSourceParams(\"dsp\")),\n      algorithmParamsList = Seq((\"a0\", AlgorithmParams(\"ap\"))))\n\n    val json = JsonExtractor.engineParamsToJson(JsonExtractorOption.Json4sNative, ep)\n\n    json should be (\n      \"\"\"{\"dataSourceParams\":{\"ds\":{\"a\":\"dsp\"}},\"preparatorParams\":{\"\":{}},\"\"\" +\n        \"\"\"\"algorithmParamsList\":[{\"a0\":{\"a\":\"ap\"}}],\"servingParams\":{\"\":{}}}\"\"\")\n  }\n\n  test(\"Serializing Java EngineParams works using option Gson\") {\n    val ep = new EngineParams(\n      dataSourceParams = (\"ds\", new JavaParams(\"dsp\")),\n      algorithmParamsList = Seq((\"a0\", new JavaParams(\"ap\")), (\"a1\", new JavaParams(\"ap2\"))))\n\n    val json = JsonExtractor.engineParamsToJson(JsonExtractorOption.Gson, ep)\n\n    json should be (\n      \"\"\"{\"dataSourceParams\":{\"ds\":{\"p\":\"dsp\"}},\"preparatorParams\":{\"\":{}},\"\"\" +\n        \"\"\"\"algorithmParamsList\":[{\"a0\":{\"p\":\"ap\"}},{\"a1\":{\"p\":\"ap2\"}}],\"servingParams\":{\"\":{}}}\"\"\")\n  }\n\n  test(\"Serializing Java EngineParams works using option Both\") {\n    val ep = new EngineParams(\n      dataSourceParams = (\"ds\", new JavaParams(\"dsp\")),\n      algorithmParamsList = Seq((\"a0\", new JavaParams(\"ap\")), (\"a1\", new JavaParams(\"ap2\"))))\n\n    val json = JsonExtractor.engineParamsToJson(JsonExtractorOption.Both, ep)\n\n    json should be (\n      \"\"\"{\"dataSourceParams\":{\"ds\":{\"p\":\"dsp\"}},\"preparatorParams\":{\"\":{}},\"\"\" +\n        \"\"\"\"algorithmParamsList\":[{\"a0\":{\"p\":\"ap\"}},{\"a1\":{\"p\":\"ap2\"}}],\"servingParams\":{\"\":{}}}\"\"\")\n  }\n}\n\nprivate case class AlgorithmParams(a: String) extends Params\n\nprivate case class DataSourceParams(a: String) extends Params\n\nprivate case class ScalaQuery(string: String, optional: Option[String], default: String = \"default\")\n\nprivate class UpperCaseFormat extends CustomSerializer[ScalaQuery](format => ( {\n  case JObject(JField(\"string\", JString(string)) ::\n    JField(\"optional\", JString(optional)) ::\n    JField(\"default\", JString(default)) ::\n    Nil) => ScalaQuery(string.toUpperCase, Some(optional.toUpperCase), default.toUpperCase)\n}, {\n  case x: ScalaQuery =>\n    JObject(\n      JField(\"string\", JString(x.string.toUpperCase)),\n      JField(\"optional\", JString(x.optional.get.toUpperCase)),\n      JField(\"default\", JString(x.default.toUpperCase)))\n}))\n"
  },
  {
    "path": "data/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Data Collection API\n\nPlease refer to the documentation site - [Collecting Data through REST/SDKs](http://predictionio.apache.org/datacollection/eventapi/).\n\n## For Development Use only:\n\n### Start Data API without bin/pio\n\n```\n$ sbt/sbt \"data/compile\"\n$ set -a\n$ source conf/pio-env.sh\n$ set +a\n$ sbt/sbt \"data/run-main org.apache.predictionio.data.api.Run\"\n```\n\n### Very simple test\n\n```\n$ data/test.sh <appAccessKey>\n```\n\n### Unit test (Very minimal)\n\n```\n$ set -a\n$ source conf/pio-env.sh\n$ set +a\n$ sbt/sbt \"data/test\"\n```\n\n- test for EventService\n\n```\n$ sbt/sbt \"data/test-only org.apache.predictionio.data.api.EventServiceSpec\"\n```\n\n- test for LEvents\n\n```\n$ sbt/sbt \"data/test-only org.apache.predictionio.data.storage.LEventsSpec\"\n```\n\n- test for ExampleJson and ExampleForm webhooks\n\n```\n$ sbt/sbt \"data/test-only org.apache.predictionio.data.webhooks.examplejson.ExampleJsonConnectorSpec\"\n$ sbt/sbt \"data/test-only org.apache.predictionio.data.webhooks.exampleform.ExampleFormConnectorSpec\"\n```\n\n### Upgrade from 0.8.0/0.8.1 to 0.8.2\n\nExperimental upgrade tool (Upgrade HBase schema from 0.8.0/0.8.1 to 0.8.2)\nCreate an app to store the data\n```\n$ bin/pio app new <my app>\n```\n\nReplace <to app ID> by the returned app ID:\n(<from app ID> is the original app ID used in 0.8.0/0.8.2.)\n\n```\n$ set -a\n$ source conf/pio-env.sh\n$ set +a\n$ sbt/sbt \"data/run-main org.apache.predictionio.data.storage.hbase.upgrade.Upgrade <from app ID>\" \"<to app ID>\"\n```\n\n### Upgrade from 0.8.2 to 0.8.3\n\n0.8.3 disallow entity types `pio_user` and `pio_item`. These types are used by\ndefault for most SDKs. We deprecate the use in 0.8.3, and SDKs helper functions\nuse `user` and `item` instead respectively.\n\nThis script performs the migration by copying one appId to another. User can\neither point the engine to the new appId, or can migrate the data back to the\nold one using hbase import / export tool.\n\nSuppose we are migrating `<old_app_id>`.\n\n#### 1. First create a new app:\n\n```\n$ set -a\n$ source conf/pio-env.sh\n$ set +a\n$ bin/pio app new NewApp\n... you will see <new_app_id>\n```\n\nThe App with `<new_app_id>` must be empty before you upgrade. You can check the status of this new created app using:\n\n```\n$ sbt/sbt \"data/run-main org.apache.predictionio.data.storage.hbase.upgrade.CheckDistribution <new_app_id>\"\n```\n\nIf it shows that it is non-empty, you can clean it with\n\n```\n$ bin/pio app data-delete <new_app_name>\n```\n\n#### 2. Run the following to migrate from <old_app_id> to <new_app_id>\n\n```\n$ sbt/sbt \"data/run-main org.apache.predictionio.data.storage.hbase.upgrade.Upgrade_0_8_3 <old_app_id> <new_app_id>\"\n... Done.\n```\n\nYou can use the following to check the <new_app_id> again. It should display the number of data being migrated:\n\n```\n$ sbt/sbt \"data/run-main org.apache.predictionio.data.storage.hbase.upgrade.CheckDistribution <new_app_id>\"\n```\n"
  },
  {
    "path": "data/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport PIOBuild._\n\nname := \"apache-predictionio-data\"\n\nlibraryDependencies ++= Seq(\n  \"org.scala-lang\"          % \"scala-reflect\"  % scalaVersion.value,\n  \"com.github.nscala-time\" %% \"nscala-time\"    % \"2.6.0\",\n  \"com.google.guava\"        % \"guava\"          % \"14.0.1\",\n  \"com.typesafe.akka\"      %% \"akka-http-testkit\" % \"10.1.5\" % \"test\",\n  \"org.apache.spark\"       %% \"spark-sql\"      % sparkVersion.value % \"provided\",\n  \"org.clapper\"            %% \"grizzled-slf4j\" % \"1.0.2\",\n  \"org.scalatest\"          %% \"scalatest\"      % \"2.1.7\" % \"test\",\n  \"org.specs2\"             %% \"specs2\"         % \"3.3.1\" % \"test\"\n    exclude(\"org.scalaz.stream\", s\"scalaz-stream_${scalaBinaryVersion.value}\"),\n  \"org.scalamock\"          %% \"scalamock-specs2-support\" % \"3.5.0\" % \"test\",\n  \"com.h2database\"           % \"h2\"             % \"1.4.196\" % \"test\")\n\nparallelExecution in Test := false\n\npomExtra := childrenPomExtra.value\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/Utils.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data\n\nimport org.joda.time.DateTime\nimport org.joda.time.format.ISODateTimeFormat\n\nimport java.lang.IllegalArgumentException\n\nprivate[predictionio] object Utils {\n\n  // use dateTime() for strict ISO8601 format\n  val dateTimeFormatter = ISODateTimeFormat.dateTime().withOffsetParsed()\n\n  val dateTimeNoMillisFormatter =\n    ISODateTimeFormat.dateTimeNoMillis().withOffsetParsed()\n\n  def stringToDateTime(dt: String): DateTime = {\n    // We accept two formats.\n    // 1. \"yyyy-MM-dd'T'HH:mm:ss.SSSZZ\"\n    // 2. \"yyyy-MM-dd'T'HH:mm:ssZZ\"\n    // The first one also takes milliseconds into account.\n    try {\n      // formatting for \"yyyy-MM-dd'T'HH:mm:ss.SSSZZ\"\n      dateTimeFormatter.parseDateTime(dt)\n    } catch {\n      case e: IllegalArgumentException => {\n        // handle when the datetime string doesn't specify milliseconds.\n        dateTimeNoMillisFormatter.parseDateTime(dt)\n      }\n    }\n  }\n\n  def dateTimeToString(dt: DateTime): String = dateTimeFormatter.print(dt)\n    // dt.toString\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/api/Common.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.api\n\nimport akka.http.scaladsl.server._\nimport org.apache.predictionio.data.storage.StorageException\nimport org.apache.predictionio.data.webhooks.ConnectorException\nimport org.json4s.{DefaultFormats, Formats}\nimport akka.http.scaladsl.model._\nimport akka.http.scaladsl.server.Directives._\nimport org.apache.predictionio.akkahttpjson4s.Json4sSupport._\n\nobject Common {\n\n  object Json4sProtocol {\n    implicit val serialization = org.json4s.native.Serialization\n    implicit def json4sFormats: Formats = DefaultFormats\n  }\n\n  import Json4sProtocol._\n\n  val exceptionHandler = ExceptionHandler {\n    case e: ConnectorException => {\n      complete(StatusCodes.BadRequest, Map(\"message\" -> s\"${e.getMessage()}\"))\n    }\n    case e: StorageException => {\n      complete(StatusCodes.InternalServerError, Map(\"message\" -> s\"${e.getMessage()}\"))\n    }\n    case e: Exception => {\n      complete(StatusCodes.InternalServerError, Map(\"message\" -> s\"${e.getMessage()}\"))\n    }\n  }\n\n  val rejectionHandler = RejectionHandler.newBuilder().handle {\n    case MalformedRequestContentRejection(msg, _) =>\n      complete(StatusCodes.BadRequest, Map(\"message\" -> msg))\n\n    case MissingQueryParamRejection(msg) =>\n      complete(StatusCodes.NotFound,\n        Map(\"message\" -> s\"missing required query parameter ${msg}.\"))\n\n    case AuthenticationFailedRejection(cause, challengeHeaders) => {\n      val msg = cause match {\n        case AuthenticationFailedRejection.CredentialsRejected =>\n          \"Invalid accessKey.\"\n        case AuthenticationFailedRejection.CredentialsMissing =>\n          \"Missing accessKey.\"\n      }\n      complete(StatusCodes.Unauthorized, Map(\"message\" -> msg))\n    }\n    case ChannelRejection(msg) =>\n      complete(StatusCodes.Unauthorized, Map(\"message\" -> msg))\n  }.result()\n}\n\n/** invalid channel */\ncase class ChannelRejection(msg: String) extends Rejection\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/api/EventInfo.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.api\n\nimport org.apache.predictionio.data.storage.Event\n\ncase class EventInfo(\n  appId: Int,\n  channelId: Option[Int],\n  event: Event)\n\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.api\n\nimport akka.event.{Logging, LoggingAdapter}\nimport sun.misc.BASE64Decoder\nimport java.util.concurrent.TimeUnit\n\nimport akka.actor._\nimport akka.http.scaladsl.Http\nimport akka.http.scaladsl.model.{FormData, HttpEntity, HttpResponse, StatusCodes}\nimport akka.http.scaladsl.model.ContentTypes._\nimport akka.http.scaladsl.model.headers.HttpChallenge\nimport akka.http.scaladsl.server.Directives.complete\nimport akka.http.scaladsl.server.directives._\nimport akka.http.scaladsl.server._\nimport akka.pattern.ask\nimport akka.util.Timeout\nimport akka.http.scaladsl.server.Directives._\nimport akka.stream.ActorMaterializer\nimport org.apache.predictionio.data.storage._\nimport org.apache.predictionio.akkahttpjson4s.Json4sSupport._\nimport org.json4s.{DefaultFormats, Formats, JObject}\n\nimport scala.concurrent._\nimport scala.concurrent.duration.Duration\nimport scala.util.{Failure, Success, Try}\n\nobject Json4sProtocol {\n  implicit val serialization = org.json4s.native.Serialization\n  implicit def json4sFormats: Formats = DefaultFormats +\n    new EventJson4sSupport.APISerializer +\n    new BatchEventsJson4sSupport.APISerializer +\n    // NOTE: don't use Json4s JodaTimeSerializers since it has issues,\n    // some format not converted, or timezone not correct\n    new DateTimeJson4sSupport.Serializer\n}\n\ncase class EventServerConfig(\n  ip: String = \"localhost\",\n  port: Int = 7070,\n  plugins: String = \"plugins\",\n  stats: Boolean = false)\n\nobject EventServer {\n  import Json4sProtocol._\n  import FutureDirectives._\n  import Common._\n\n  private val MaxNumberOfEventsPerBatchRequest = 50\n  private lazy val base64Decoder = new BASE64Decoder\n  private implicit val timeout = Timeout(5, TimeUnit.SECONDS)\n  private case class AuthData(appId: Int, channelId: Option[Int], events: Seq[String])\n\n  private def FailedAuth[T]: Either[Rejection, T] = Left(\n    AuthenticationFailedRejection(\n      AuthenticationFailedRejection.CredentialsRejected, HttpChallenge(\"eventserver\", None)\n    )\n  )\n\n  private def MissedAuth[T]: Either[Rejection, T] = Left(\n    AuthenticationFailedRejection(\n      AuthenticationFailedRejection.CredentialsMissing, HttpChallenge(\"eventserver\", None)\n    )\n  )\n\n  def createRoute(eventClient: LEvents,\n                  accessKeysClient: AccessKeys,\n                  channelsClient: Channels,\n                  logger: LoggingAdapter,\n                  statsActorRef: ActorSelection,\n                  pluginsActorRef: ActorSelection,\n                  config: EventServerConfig)(implicit executionContext: ExecutionContext): Route = {\n\n    /* with accessKey in query/header, return appId if succeed */\n    def withAccessKey: RequestContext => Future[Either[Rejection, AuthData]] = {\n      ctx: RequestContext =>\n        val accessKeyParamOpt = ctx.request.uri.query().get(\"accessKey\")\n        val channelParamOpt = ctx.request.uri.query().get(\"channel\")\n        Future {\n          // with accessKey in query, return appId if succeed\n          accessKeyParamOpt.map { accessKeyParam =>\n            accessKeysClient.get(accessKeyParam).map { k =>\n              channelParamOpt.map { ch =>\n                val channelMap =\n                  channelsClient.getByAppid(k.appid)\n                    .map(c => (c.name, c.id)).toMap\n                if (channelMap.contains(ch)) {\n                  Right(AuthData(k.appid, Some(channelMap(ch)), k.events))\n                } else {\n                  Left(ChannelRejection(s\"Invalid channel '$ch'.\"))\n                }\n              }.getOrElse{\n                Right(AuthData(k.appid, None, k.events))\n              }\n            }.getOrElse(FailedAuth)\n          }.getOrElse {\n            // with accessKey in header, return appId if succeed\n            ctx.request.headers.find(_.name == \"Authorization\").map { authHeader =>\n              authHeader.value.split(\"Basic \") match {\n                case Array(_, value) =>\n                  val appAccessKey =\n                    new String(base64Decoder.decodeBuffer(value)).trim.split(\":\")(0)\n                  accessKeysClient.get(appAccessKey) match {\n                    case Some(k) => Right(AuthData(k.appid, None, k.events))\n                    case None => FailedAuth\n                  }\n\n                case _ => FailedAuth\n              }\n            }.getOrElse(MissedAuth)\n          }\n        }\n    }\n\n    def authenticate[T](authenticator: RequestContext => Future[Either[Rejection, T]]):\n        AuthenticationDirective[T] = {\n      handleRejections(rejectionHandler).tflatMap { _ =>\n        extractRequestContext.flatMap { requestContext =>\n          onSuccess(authenticator(requestContext)).flatMap {\n            case Right(x) => provide(x)\n            case Left(x)  => reject(x): Directive1[T]\n          }\n        }\n      }\n    }\n\n    val pluginContext = EventServerPluginContext(logger)\n    val jsonPath = \"\"\"(.+)\\.json$\"\"\".r\n    val formPath = \"\"\"(.+)\\.form$\"\"\".r\n\n    val route: Route =\n      pathSingleSlash {\n        get {\n          complete(Map(\"status\" -> \"alive\"))\n        }\n      } ~\n      path(\"plugins.json\") {\n        get {\n          complete(\n            Map(\"plugins\" -> Map(\n              \"inputblockers\" -> pluginContext.inputBlockers.map { case (n, p) =>\n                n -> Map(\n                  \"name\"        -> p.pluginName,\n                  \"description\" -> p.pluginDescription,\n                  \"class\"       -> p.getClass.getName)\n              },\n              \"inputsniffers\" -> pluginContext.inputSniffers.map { case (n, p) =>\n                n -> Map(\n                  \"name\"        -> p.pluginName,\n                  \"description\" -> p.pluginDescription,\n                  \"class\"       -> p.getClass.getName)\n              }\n            ))\n          )\n        }\n      } ~\n      path(\"plugins\" / Segments) { segments =>\n        get {\n          handleExceptions(exceptionHandler) {\n            authenticate(withAccessKey) { authData =>\n              val pluginArgs = segments.drop(2)\n              val pluginType = segments(0)\n              val pluginName = segments(1)\n              pluginType match {\n                case EventServerPlugin.inputBlocker =>\n                  complete(HttpResponse(entity = HttpEntity(\n                    `application/json`,\n                    pluginContext.inputBlockers(pluginName).handleREST(\n                      authData.appId,\n                      authData.channelId,\n                      pluginArgs)\n                  )))\n\n                case EventServerPlugin.inputSniffer =>\n                  complete(pluginsActorRef ? PluginsActor.HandleREST(\n                    appId = authData.appId,\n                    channelId = authData.channelId,\n                    pluginName = pluginName,\n                    pluginArgs = pluginArgs) map { json =>\n                      HttpResponse(entity = HttpEntity(\n                        `application/json`,\n                        json.asInstanceOf[String]\n                      ))\n                    })\n              }\n            }\n          }\n        }\n      } ~\n      path(\"events\" / jsonPath ) { eventId =>\n        get {\n          handleExceptions(exceptionHandler) {\n            authenticate(withAccessKey) { authData =>\n              val appId = authData.appId\n              val channelId = authData.channelId\n              logger.debug(s\"GET event ${eventId}.\")\n              onSuccess(eventClient.futureGet(eventId, appId, channelId)){ eventOpt =>\n                  eventOpt.map { event =>\n                    complete(StatusCodes.OK, event)\n                  }.getOrElse(\n                    complete(StatusCodes.NotFound, Map(\"message\" -> \"Not Found\"))\n                  )\n              }\n            }\n          }\n        } ~\n        delete {\n          handleExceptions(exceptionHandler) {\n            authenticate(withAccessKey) { authData =>\n              val appId = authData.appId\n              val channelId = authData.channelId\n              logger.debug(s\"DELETE event ${eventId}.\")\n              onSuccess(eventClient.futureDelete(eventId, appId, channelId)){ found =>\n                if (found) {\n                  complete(StatusCodes.OK, Map(\"message\" -> \"Found\"))\n                } else {\n                  complete(StatusCodes.NotFound, Map(\"message\" -> \"Not Found\"))\n                }\n              }\n            }\n          }\n        }\n      } ~\n      path(\"events.json\") {\n        post {\n          handleExceptions(exceptionHandler) {\n            authenticate(withAccessKey) { authData =>\n              val appId = authData.appId\n              val channelId = authData.channelId\n              val events = authData.events\n              entity(as[Event]) { event =>\n                if (events.isEmpty || authData.events.contains(event.event)) {\n                  pluginContext.inputBlockers.values.foreach(\n                    _.process(EventInfo(\n                      appId = appId,\n                      channelId = channelId,\n                      event = event), pluginContext))\n                  onSuccess(eventClient.futureInsert(event, appId, channelId)){ id =>\n                    pluginsActorRef ! EventInfo(\n                      appId = appId,\n                      channelId = channelId,\n                      event = event)\n                    val result = (StatusCodes.Created, Map(\"eventId\" -> s\"${id}\"))\n                    if (config.stats) {\n                      statsActorRef ! Bookkeeping(appId, result._1, event)\n                    }\n                    complete(result)\n                  }\n                } else {\n                  complete(StatusCodes.Forbidden,\n                    Map(\"message\" -> s\"${event.event} events are not allowed\"))\n                }\n              }\n            }\n          }\n        } ~\n        get {\n          handleExceptions(exceptionHandler) {\n            authenticate(withAccessKey) { authData =>\n              val appId = authData.appId\n              val channelId = authData.channelId\n              parameters(\n                'startTime.?,\n                'untilTime.?,\n                'entityType.?,\n                'entityId.?,\n                'event.?,\n                'targetEntityType.?,\n                'targetEntityId.?,\n                'limit.as[Int].?,\n                'reversed.as[Boolean].?) {\n                (startTimeStr, untilTimeStr, entityType, entityId,\n                eventName,  // only support one event name\n                targetEntityType, targetEntityId,\n                limit, reversed) =>\n                  logger.debug(\n                    s\"GET events of appId=${appId} \" +\n                    s\"st=${startTimeStr} ut=${untilTimeStr} \" +\n                    s\"et=${entityType} eid=${entityId} \" +\n                    s\"li=${limit} rev=${reversed} \")\n\n                  require(!((reversed == Some(true))\n                    && (entityType.isEmpty || entityId.isEmpty)),\n                    \"the parameter reversed can only be used with\" +\n                      \" both entityType and entityId specified.\")\n\n                  val parseTime = Future {\n                    val startTime = startTimeStr.map(Utils.stringToDateTime(_))\n                    val untilTime = untilTimeStr.map(Utils.stringToDateTime(_))\n                    (startTime, untilTime)\n                  }\n\n\n                  val f = parseTime.flatMap { case (startTime, untilTime) =>\n                    val data = eventClient.futureFind(\n                      appId = appId,\n                      channelId = channelId,\n                      startTime = startTime,\n                      untilTime = untilTime,\n                      entityType = entityType,\n                      entityId = entityId,\n                      eventNames = eventName.map(List(_)),\n                      targetEntityType = targetEntityType.map(Some(_)),\n                      targetEntityId = targetEntityId.map(Some(_)),\n                      limit = limit.orElse(Some(20)),\n                      reversed = reversed)\n                      .map { eventIter =>\n                        if (eventIter.hasNext) {\n                          (StatusCodes.OK, eventIter.toArray)\n                        } else {\n                          (StatusCodes.NotFound, Map(\"message\" -> \"Not Found\"))\n                        }\n                      }\n                    data\n                  }\n\n                  onSuccess(f){ (status, body) => complete(status, body) }\n                }\n            }\n          }\n        }\n      } ~\n      path(\"batch\" / \"events.json\") {\n        post {\n          handleExceptions(exceptionHandler) {\n            authenticate(withAccessKey) { authData =>\n              val appId = authData.appId\n              val channelId = authData.channelId\n              val allowedEvents = authData.events\n\n              entity(as[Seq[Try[Event]]]) { events =>\n                if (events.length <= MaxNumberOfEventsPerBatchRequest) {\n                  val eventWithIndex = events.zipWithIndex\n\n                  val taggedEvents = eventWithIndex.collect { case (Success(event), i) =>\n                    if(allowedEvents.isEmpty || allowedEvents.contains(event.event)){\n                      (Right(event), i)\n                    } else {\n                      (Left(event), i)\n                    }\n                  }\n\n                  val insertEvents = taggedEvents.collect { case (Right(event), i) =>\n                    (event, i)\n                  }\n\n                  insertEvents.foreach { case (event, i) =>\n                    pluginContext.inputBlockers.values.foreach(\n                      _.process(EventInfo(\n                        appId = appId,\n                        channelId = channelId,\n                        event = event), pluginContext))\n                  }\n\n                  val f: Future[Seq[Map[String, Any]]] = eventClient.futureInsertBatch(\n                    insertEvents.map(_._1), appId, channelId).map { insertResults =>\n                    val results = insertResults.zip(insertEvents).map { case (id, (event, i)) =>\n                      pluginsActorRef ! EventInfo(\n                        appId = appId,\n                        channelId = channelId,\n                        event = event)\n                      val status = StatusCodes.Created\n                      if (config.stats) {\n                        statsActorRef ! Bookkeeping(appId, status, event)\n                      }\n                      (Map(\n                        \"status\"  -> status.intValue,\n                        \"eventId\" -> s\"${id}\"), i)\n                    } ++\n                      // Results of denied events\n                      taggedEvents.collect { case (Left(event), i) =>\n                        (Map(\n                          \"status\"  -> StatusCodes.Forbidden.intValue,\n                          \"message\" -> s\"${event.event} events are not allowed\"), i)\n                      } ++\n                      // Results of failed to deserialze events\n                      eventWithIndex.collect { case (Failure(exception), i) =>\n                        (Map(\n                          \"status\"  -> StatusCodes.BadRequest.intValue,\n                          \"message\" -> s\"${exception.getMessage()}\"), i)\n                      }\n\n                    // Restore original order\n                    results.sortBy { case (_, i) => i }.map { case (data, _) => data }\n                  }\n\n                  onSuccess(f.recover { case exception =>\n                    Map(\n                      \"status\" -> StatusCodes.InternalServerError.intValue,\n                      \"message\" -> s\"${exception.getMessage()}\"\n                    )\n                  }){ res => complete(res) }\n\n                } else {\n                  complete(StatusCodes.BadRequest,\n                    Map(\"message\" -> (s\"Batch request must have less than or equal to \" +\n                      s\"${MaxNumberOfEventsPerBatchRequest} events\")))\n                }\n              }\n            }\n          }\n        }\n      } ~\n      path(\"stats.json\") {\n        get {\n          handleExceptions(exceptionHandler) {\n            authenticate(withAccessKey) { authData =>\n              val appId = authData.appId\n              if (config.stats) {\n                complete {\n                  statsActorRef ? GetStats(appId) map {\n                    _.asInstanceOf[Map[String, StatsSnapshot]]\n                  }\n                }\n              } else {\n                complete(\n                  StatusCodes.NotFound,\n                  Map(\"message\" -> \"To see stats, launch Event Server with --stats argument.\")\n                )\n              }\n            }\n          }\n        }  // stats.json get\n      } ~\n      path(\"webhooks\" / jsonPath ) { web =>\n        post {\n          handleExceptions(exceptionHandler) {\n            authenticate(withAccessKey) { authData =>\n              val appId = authData.appId\n              val channelId = authData.channelId\n              entity(as[JObject]) { jObj =>\n                onSuccess(Webhooks.postJson(\n                  appId = appId,\n                  channelId = channelId,\n                  web = web,\n                  data = jObj,\n                  eventClient = eventClient,\n                  log = logger,\n                  stats = config.stats,\n                  statsActorRef = statsActorRef\n                )){\n                  (status, body) => complete(status, body)\n                }\n              }\n            }\n          }\n        } ~\n        get {\n          handleExceptions(exceptionHandler) {\n            authenticate(withAccessKey) { authData =>\n              val appId = authData.appId\n              val channelId = authData.channelId\n              onSuccess(\n                Webhooks.getJson(\n                appId = appId,\n                channelId = channelId,\n                web = web,\n                log = logger)\n              ){\n                (status, body) => complete(status, body)\n              }\n            }\n          }\n        }\n      } ~\n      path(\"webhooks\" / formPath ) { web =>\n        post {\n          handleExceptions(exceptionHandler) {\n            authenticate(withAccessKey) { authData =>\n              val appId = authData.appId\n              val channelId = authData.channelId\n              entity(as[FormData]){ formData =>\n                logger.debug(formData.toString)\n                onSuccess(Webhooks.postForm(\n                  appId = appId,\n                  channelId = channelId,\n                  web = web,\n                  data = formData,\n                  eventClient = eventClient,\n                  log = logger,\n                  stats = config.stats,\n                  statsActorRef = statsActorRef\n                )){\n                  (status, body) => complete(status, body)\n                }\n              }\n            }\n          }\n        } ~\n        get {\n          handleExceptions(exceptionHandler) {\n            authenticate(withAccessKey) { authData =>\n              val appId = authData.appId\n              val channelId = authData.channelId\n              onSuccess(Webhooks.getForm(\n                appId = appId,\n                channelId = channelId,\n                web = web,\n                log = logger\n              )){\n                (status, body) => complete(status, body)\n              }\n            }\n          }\n        }\n      }\n\n    route\n  }\n\n  def createEventServer(config: EventServerConfig): ActorSystem = {\n    implicit val system = ActorSystem(\"EventServerSystem\")\n    implicit val materializer = ActorMaterializer()\n    implicit val executionContext = system.dispatcher\n\n    val eventClient = Storage.getLEvents()\n    val accessKeysClient = Storage.getMetaDataAccessKeys()\n    val channelsClient = Storage.getMetaDataChannels()\n\n    val statsActorRef = system.actorSelection(\"/user/StatsActor\")\n    val pluginsActorRef = system.actorSelection(\"/user/PluginsActor\")\n\n    val logger = Logging(system, getClass)\n\n    val route = createRoute(eventClient, accessKeysClient, channelsClient,\n      logger, statsActorRef, pluginsActorRef, config)\n\n    Http().bindAndHandle(route, config.ip, config.port)\n\n    system\n  }\n}\n\nobject Run {\n  def main(args: Array[String]): Unit = {\n    val f = EventServer.createEventServer(EventServerConfig(\n      ip = \"0.0.0.0\",\n      port = 7070))\n    .whenTerminated\n\n    Await.ready(f, Duration.Inf)\n  }\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/api/EventServerPlugin.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.api\n\ntrait EventServerPlugin {\n  val pluginName: String\n  val pluginDescription: String\n  val pluginType: String\n\n  def process(eventInfo: EventInfo, context: EventServerPluginContext)\n\n  def handleREST(appId: Int, channelId: Option[Int], arguments: Seq[String]): String\n}\n\nobject EventServerPlugin {\n  val inputBlocker = \"inputblocker\"\n  val inputSniffer = \"inputsniffer\"\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/api/EventServerPluginContext.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.api\n\nimport java.util.ServiceLoader\n\nimport akka.event.LoggingAdapter\nimport grizzled.slf4j.Logging\n\nimport scala.collection.JavaConversions._\nimport scala.collection.mutable\n\nclass EventServerPluginContext(\n    val plugins: mutable.Map[String, mutable.Map[String, EventServerPlugin]],\n    val log: LoggingAdapter) {\n  def inputBlockers: Map[String, EventServerPlugin] =\n    plugins.getOrElse(EventServerPlugin.inputBlocker, Map.empty).toMap\n\n  def inputSniffers: Map[String, EventServerPlugin] =\n    plugins.getOrElse(EventServerPlugin.inputSniffer, Map.empty).toMap\n}\n\nobject EventServerPluginContext extends Logging {\n  def apply(log: LoggingAdapter): EventServerPluginContext = {\n    val plugins = mutable.Map[String, mutable.Map[String, EventServerPlugin]](\n      EventServerPlugin.inputBlocker -> mutable.Map(),\n      EventServerPlugin.inputSniffer -> mutable.Map())\n    val serviceLoader = ServiceLoader.load(classOf[EventServerPlugin])\n    serviceLoader foreach { service =>\n      plugins(service.pluginType) += service.pluginName -> service\n    }\n    new EventServerPluginContext(\n      plugins,\n      log)\n  }\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/api/PluginsActor.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.api\n\nimport akka.actor.Actor\nimport akka.event.Logging\n\nclass PluginsActor() extends Actor {\n  implicit val system = context.system\n  val log = Logging(system, this)\n\n  val pluginContext = EventServerPluginContext(log)\n\n  def receive: PartialFunction[Any, Unit] = {\n    case e: EventInfo =>\n      pluginContext.inputSniffers.values.foreach(_.process(e, pluginContext))\n    case h: PluginsActor.HandleREST =>\n      try {\n        sender() ! pluginContext.inputSniffers(h.pluginName).handleREST(\n          h.appId,\n          h.channelId,\n          h.pluginArgs)\n      } catch {\n        case e: Exception =>\n          sender() ! s\"\"\"{\"message\":\"${e.getMessage}\"}\"\"\"\n      }\n    case _ =>\n      log.error(\"Unknown message sent to Event Server input sniffer plugin host.\")\n  }\n}\n\nobject PluginsActor {\n  case class HandleREST(\n    pluginName: String,\n    appId: Int,\n    channelId: Option[Int],\n    pluginArgs: Seq[String])\n}\n\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/api/Stats.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.api\n\nimport akka.http.scaladsl.model.StatusCode\nimport org.apache.predictionio.data.storage.Event\n\nimport scala.collection.mutable.{HashMap => MHashMap}\nimport scala.collection.mutable\nimport com.github.nscala_time.time.Imports.DateTime\n\ncase class EntityTypesEvent(\n  val entityType: String,\n  val targetEntityType: Option[String],\n  val event: String) {\n\n  def this(e: Event) = this(\n    e.entityType,\n    e.targetEntityType,\n    e.event)\n}\n\ncase class KV[K, V](key: K, value: V)\n\ncase class StatsSnapshot(\n  val startTime: DateTime,\n  val endTime: Option[DateTime],\n  val basic: Seq[KV[EntityTypesEvent, Long]],\n  val statusCode: Seq[KV[StatusCode, Long]]\n)\n\n\nclass Stats(val startTime: DateTime) {\n  private[this] var _endTime: Option[DateTime] = None\n  var statusCodeCount = MHashMap[(Int, StatusCode), Long]().withDefaultValue(0L)\n  var eteCount = MHashMap[(Int, EntityTypesEvent), Long]().withDefaultValue(0L)\n\n  def cutoff(endTime: DateTime) {\n    _endTime = Some(endTime)\n  }\n\n  def update(appId: Int, statusCode: StatusCode, event: Event) {\n    statusCodeCount((appId, statusCode)) += 1\n    eteCount((appId, new EntityTypesEvent(event))) += 1\n  }\n\n  def extractByAppId[K, V](appId: Int, m: mutable.Map[(Int, K), V])\n  : Seq[KV[K, V]] = {\n    m\n    .toSeq\n    .flatMap { case (k, v) =>\n      if (k._1 == appId) { Seq(KV(k._2, v)) } else { Nil }\n    }\n  }\n\n  def get(appId: Int): StatsSnapshot = {\n    StatsSnapshot(\n      startTime,\n      _endTime,\n      extractByAppId(appId, eteCount),\n      extractByAppId(appId, statusCodeCount)\n    )\n  }\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/api/StatsActor.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.api\n\nimport akka.http.scaladsl.model.StatusCode\nimport org.apache.predictionio.data.storage.Event\n\nimport akka.actor.Actor\nimport akka.event.Logging\n\nimport com.github.nscala_time.time.Imports.DateTime\n\n/* message to StatsActor */\ncase class Bookkeeping(val appId: Int, statusCode: StatusCode, event: Event)\n\n/* message to StatsActor */\ncase class GetStats(val appId: Int)\n\nclass StatsActor extends Actor {\n  implicit val system = context.system\n  val log = Logging(system, this)\n\n  def getCurrent: DateTime = {\n    DateTime.now.\n      withMinuteOfHour(0).\n      withSecondOfMinute(0).\n      withMillisOfSecond(0)\n  }\n\n  var longLiveStats = new Stats(DateTime.now)\n  var hourlyStats = new Stats(getCurrent)\n\n  var prevHourlyStats = new Stats(getCurrent.minusHours(1))\n  prevHourlyStats.cutoff(hourlyStats.startTime)\n\n  def bookkeeping(appId: Int, statusCode: StatusCode, event: Event) {\n    val current = getCurrent\n    // If the current hour is different from the stats start time, we create\n    // another stats instance, and move the current to prev.\n    if (current != hourlyStats.startTime) {\n      prevHourlyStats = hourlyStats\n      prevHourlyStats.cutoff(current)\n      hourlyStats = new Stats(current)\n    }\n\n    hourlyStats.update(appId, statusCode, event)\n    longLiveStats.update(appId, statusCode, event)\n  }\n\n  def receive: Actor.Receive = {\n    case Bookkeeping(appId, statusCode, event) =>\n      bookkeeping(appId, statusCode, event)\n    case GetStats(appId) => sender() ! Map(\n      \"time\" -> DateTime.now,\n      \"currentHour\" -> hourlyStats.get(appId),\n      \"prevHour\" -> prevHourlyStats.get(appId),\n      \"longLive\" -> longLiveStats.get(appId))\n    case _ => log.error(\"Unknown message.\")\n  }\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/api/Webhooks.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.api\n\nimport akka.http.scaladsl.model.{FormData, StatusCode, StatusCodes}\nimport org.apache.predictionio.data.webhooks.ConnectorUtil\nimport org.apache.predictionio.data.storage.LEvents\n\nimport org.json4s.JObject\n\nimport akka.event.LoggingAdapter\nimport akka.actor.ActorSelection\n\nimport scala.concurrent.{ExecutionContext, Future}\n\n\nprivate[predictionio] object Webhooks {\n\n  def postJson(\n    appId: Int,\n    channelId: Option[Int],\n    web: String,\n    data: JObject,\n    eventClient: LEvents,\n    log: LoggingAdapter,\n    stats: Boolean,\n    statsActorRef: ActorSelection\n  )(implicit ec: ExecutionContext): Future[(StatusCode, Map[String, String])] = {\n\n    val eventFuture = Future {\n      WebhooksConnectors.json.get(web).map { connector =>\n        ConnectorUtil.toEvent(connector, data)\n      }\n    }\n\n    eventFuture.flatMap {\n      case None =>\n        Future successful {\n          val message = s\"webhooks connection for ${web} is not supported.\"\n          (StatusCodes.NotFound, Map(\"message\" -> message))\n        }\n      case Some(event) =>\n        val data = eventClient.futureInsert(event, appId, channelId).map { id =>\n          val result = (StatusCodes.Created, Map(\"eventId\" -> s\"${id}\"))\n\n          if (stats) {\n            statsActorRef ! Bookkeeping(appId, result._1, event)\n          }\n          result\n        }\n        data\n    }\n  }\n\n  def getJson(\n    appId: Int,\n    channelId: Option[Int],\n    web: String,\n    log: LoggingAdapter\n  )(implicit ec: ExecutionContext): Future[(StatusCode, Map[String, String])] = {\n    Future {\n      WebhooksConnectors.json.get(web).map { connector =>\n        (StatusCodes.OK, Map(\"message\" -> \"Ok\"))\n      }.getOrElse {\n        val message = s\"webhooks connection for ${web} is not supported.\"\n        (StatusCodes.NotFound, Map(\"message\" -> message))\n      }\n    }\n  }\n\n  def postForm(\n    appId: Int,\n    channelId: Option[Int],\n    web: String,\n    data: FormData,\n    eventClient: LEvents,\n    log: LoggingAdapter,\n    stats: Boolean,\n    statsActorRef: ActorSelection\n  )(implicit ec: ExecutionContext): Future[(StatusCode, Map[String, String])] = {\n    val eventFuture = Future {\n      WebhooksConnectors.form.get(web).map { connector =>\n        ConnectorUtil.toEvent(connector, data.fields.toMap)\n      }\n    }\n\n    eventFuture.flatMap {\n      case None =>\n        Future successful {\n          val message = s\"webhooks connection for ${web} is not supported.\"\n          (StatusCodes.NotFound, Map(\"message\" -> message))\n        }\n      case Some(event) =>\n        val data = eventClient.futureInsert(event, appId, channelId).map { id =>\n          val result = (StatusCodes.Created, Map(\"eventId\" -> s\"${id}\"))\n\n          if (stats) {\n            statsActorRef ! Bookkeeping(appId, result._1, event)\n          }\n          result\n        }\n        data\n    }\n  }\n\n  def getForm(\n    appId: Int,\n    channelId: Option[Int],\n    web: String,\n    log: LoggingAdapter\n  )(implicit ec: ExecutionContext): Future[(StatusCode, Map[String, String])] = {\n    Future {\n      WebhooksConnectors.form.get(web).map { connector =>\n        (StatusCodes.OK, Map(\"message\" -> \"Ok\"))\n      }.getOrElse {\n        val message = s\"webhooks connection for ${web} is not supported.\"\n        (StatusCodes.NotFound, Map(\"message\" -> message))\n      }\n    }\n  }\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/api/WebhooksConnectors.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.api\n\nimport org.apache.predictionio.data.webhooks.JsonConnector\nimport org.apache.predictionio.data.webhooks.FormConnector\n\nimport org.apache.predictionio.data.webhooks.segmentio.SegmentIOConnector\nimport org.apache.predictionio.data.webhooks.mailchimp.MailChimpConnector\n\nprivate[predictionio] object WebhooksConnectors {\n\n  val json: Map[String, JsonConnector] = Map(\n    \"segmentio\" -> SegmentIOConnector\n  )\n\n  val form: Map[String, FormConnector] = Map(\n    \"mailchimp\" -> MailChimpConnector\n  )\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/package.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio\n\n/** Provides data access for PredictionIO and any engines running on top of\n  * PredictionIO\n  */\npackage object data {}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/AccessKeys.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport java.security.SecureRandom\n\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.commons.codec.binary.Base64\n\n/** :: DeveloperApi ::\n  * Stores mapping of access keys, app IDs, and lists of allowed event names\n  *\n  * @param key Access key\n  * @param appid App ID\n  * @param events List of allowed events for this particular app key\n  * @group Meta Data\n  */\n@DeveloperApi\ncase class AccessKey(\n  key: String,\n  appid: Int,\n  events: Seq[String])\n\n/** :: DeveloperApi ::\n  * Base trait of the [[AccessKey]] data access object\n  *\n  * @group Meta Data\n  */\n@DeveloperApi\ntrait AccessKeys {\n  /** Insert a new [[AccessKey]]. If the key field is empty, a key will be\n    * generated.\n    */\n  def insert(k: AccessKey): Option[String]\n\n  /** Get an [[AccessKey]] by key */\n  def get(k: String): Option[AccessKey]\n\n  /** Get all [[AccessKey]]s */\n  def getAll(): Seq[AccessKey]\n\n  /** Get all [[AccessKey]]s for a particular app ID */\n  def getByAppid(appid: Int): Seq[AccessKey]\n\n  /** Update an [[AccessKey]] */\n  def update(k: AccessKey): Unit\n\n  /** Delete an [[AccessKey]] */\n  def delete(k: String): Unit\n\n  /** Default implementation of key generation */\n  def generateKey: String = {\n    val sr = new SecureRandom\n    val srBytes = Array.fill(48)(0.toByte)\n    sr.nextBytes(srBytes)\n    Base64.encodeBase64URLSafeString(srBytes) match {\n      case x if x startsWith \"-\" => generateKey\n      case x => x\n    }\n  }\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/Apps.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport org.apache.predictionio.annotation.DeveloperApi\n\n/** :: DeveloperApi ::\n  * Stores mapping of app IDs and names\n  *\n  * @param id ID of the app.\n  * @param name Name of the app.\n  * @param description Long description of the app.\n  * @group Meta Data\n  */\n@DeveloperApi\ncase class App(\n  id: Int,\n  name: String,\n  description: Option[String])\n\n/** :: DeveloperApi ::\n  * Base trait of the [[App]] data access object\n  *\n  * @group Meta Data\n  */\n@DeveloperApi\ntrait Apps {\n  /** Insert a new [[App]]. Returns a generated app ID if the supplied app ID is 0. */\n  def insert(app: App): Option[Int]\n\n  /** Get an [[App]] by app ID */\n  def get(id: Int): Option[App]\n\n  /** Get an [[App]] by app name */\n  def getByName(name: String): Option[App]\n\n  /** Get all [[App]]s */\n  def getAll(): Seq[App]\n\n  /** Update an [[App]] */\n  def update(app: App): Unit\n\n  /** Delete an [[App]] */\n  def delete(id: Int): Unit\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/BiMap.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport scala.collection.immutable.HashMap\n\nimport org.apache.spark.rdd.RDD\n\n/** Immutable Bi-directional Map\n  *\n  */\nclass BiMap[K, V] private[predictionio] (\n  private val m: Map[K, V],\n  private val i: Option[BiMap[V, K]] = None\n  ) extends Serializable {\n\n  // NOTE: make inverse's inverse point back to current BiMap\n  val inverse: BiMap[V, K] = i.getOrElse {\n    val rev = m.map(_.swap)\n    require((rev.size == m.size),\n      s\"Failed to create reversed map. Cannot have duplicated values.\")\n    new BiMap(rev, Some(this))\n  }\n\n  def get(k: K): Option[V] = m.get(k)\n\n  def getOrElse(k: K, default: => V): V = m.getOrElse(k, default)\n\n  def contains(k: K): Boolean = m.contains(k)\n\n  def apply(k: K): V = m.apply(k)\n\n  /** Converts to a map.\n    * @return a map of type immutable.Map[K, V]\n    */\n  def toMap: Map[K, V] = m\n\n  /** Converts to a sequence.\n    * @return a sequence containing all elements of this map\n    */\n  def toSeq: Seq[(K, V)] = m.toSeq\n\n  def size: Int = m.size\n\n  def take(n: Int): BiMap[K, V] = BiMap(m.take(n))\n\n  override def toString: String = m.toString\n}\n\nobject BiMap {\n\n  def apply[K, V](x: Map[K, V]): BiMap[K, V] = new BiMap(x)\n\n  /** Create a BiMap[String, Long] from a set of String. The Long index starts\n    * from 0.\n    * @param keys a set of String\n    * @return a String to Long BiMap\n    */\n  def stringLong(keys: Set[String]): BiMap[String, Long] = {\n    val hm = HashMap(keys.toSeq.zipWithIndex.map(t => (t._1, t._2.toLong)) : _*)\n    new BiMap(hm)\n  }\n\n  /** Create a BiMap[String, Long] from an array of String.\n    * NOTE: the the array cannot have duplicated element.\n    * The Long index starts from 0.\n    * @param keys a set of String\n    * @return a String to Long BiMap\n    */\n  def stringLong(keys: Array[String]): BiMap[String, Long] = {\n    val hm = HashMap(keys.zipWithIndex.map(t => (t._1, t._2.toLong)) : _*)\n    new BiMap(hm)\n  }\n\n  /** Create a BiMap[String, Long] from RDD[String]. The Long index starts\n    * from 0.\n    * @param keys RDD of String\n    * @return a String to Long BiMap\n    */\n  def stringLong(keys: RDD[String]): BiMap[String, Long] = {\n    stringLong(keys.distinct.collect)\n  }\n\n  /** Create a BiMap[String, Int] from a set of String. The Int index starts\n    * from 0.\n    * @param keys a set of String\n    * @return a String to Int BiMap\n    */\n  def stringInt(keys: Set[String]): BiMap[String, Int] = {\n    val hm = HashMap(keys.toSeq.zipWithIndex : _*)\n    new BiMap(hm)\n  }\n\n  /** Create a BiMap[String, Int] from an array of String.\n    * NOTE: the the array cannot have duplicated element.\n    * The Int index starts from 0.\n    * @param keys a set of String\n    * @return a String to Int BiMap\n    */\n  def stringInt(keys: Array[String]): BiMap[String, Int] = {\n    val hm = HashMap(keys.zipWithIndex : _*)\n    new BiMap(hm)\n  }\n\n  /** Create a BiMap[String, Int] from RDD[String]. The Int index starts\n    * from 0.\n    * @param keys RDD of String\n    * @return a String to Int BiMap\n    */\n  def stringInt(keys: RDD[String]): BiMap[String, Int] = {\n    stringInt(keys.distinct.collect)\n  }\n\n  private[this] def stringDoubleImpl(keys: Seq[String])\n  : BiMap[String, Double] = {\n    val ki = keys.zipWithIndex.map(e => (e._1, e._2.toDouble))\n    new BiMap(HashMap(ki : _*))\n  }\n\n  /** Create a BiMap[String, Double] from a set of String. The Double index\n    * starts from 0.\n    * @param keys a set of String\n    * @return a String to Double BiMap\n    */\n  def stringDouble(keys: Set[String]): BiMap[String, Double] = {\n    // val hm = HashMap(keys.toSeq.zipWithIndex.map(_.toDouble) : _*)\n    // new BiMap(hm)\n    stringDoubleImpl(keys.toSeq)\n  }\n\n  /** Create a BiMap[String, Double] from an array of String.\n    * NOTE: the the array cannot have duplicated element.\n    * The Double index starts from 0.\n    * @param keys a set of String\n    * @return a String to Double BiMap\n    */\n  def stringDouble(keys: Array[String]): BiMap[String, Double] = {\n    // val hm = HashMap(keys.zipWithIndex.mapValues(_.toDouble) : _*)\n    // new BiMap(hm)\n    stringDoubleImpl(keys.toSeq)\n  }\n\n  /** Create a BiMap[String, Double] from RDD[String]. The Double index starts\n    * from 0.\n    * @param keys RDD of String\n    * @return a String to Double BiMap\n    */\n  def stringDouble(keys: RDD[String]): BiMap[String, Double] = {\n    stringDoubleImpl(keys.distinct.collect)\n  }\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/Channels.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport org.apache.predictionio.annotation.DeveloperApi\n\n/** :: DeveloperApi ::\n  * Stores mapping of channel IDs, names and app ID\n  *\n  * @param id ID of the channel\n  * @param name Name of the channel (must be unique within the same app)\n  * @param appid ID of the app which this channel belongs to\n  * @group Meta Data\n  */\n@DeveloperApi\ncase class Channel(\n  id: Int,\n  name: String, // must be unique within the same app\n  appid: Int\n) {\n  require(Channel.isValidName(name),\n    \"Invalid channel name: ${name}. ${Channel.nameConstraint}\")\n}\n\n/** :: DeveloperApi ::\n  * Companion object of [[Channel]]\n  *\n  * @group Meta Data\n  */\n@DeveloperApi\nobject Channel {\n  /** Examine whether the supplied channel name is valid. A valid channel name\n    * must consists of 1 to 16 alphanumeric and '-' characters.\n    *\n    * @param s Channel name to examine\n    * @return true if channel name is valid, false otherwise\n    */\n  def isValidName(s: String): Boolean = {\n    // note: update channelNameConstraint if this rule is changed\n    s.matches(\"^[a-zA-Z0-9-]{1,16}$\")\n  }\n\n  /** For consistent error message display */\n  val nameConstraint: String =\n    \"Only alphanumeric and - characters are allowed and max length is 16.\"\n}\n\n/** :: DeveloperApi ::\n  * Base trait of the [[Channel]] data access object\n  *\n  * @group Meta Data\n  */\n@DeveloperApi\ntrait Channels {\n  /** Insert a new [[Channel]]. Returns a generated channel ID if original ID is 0. */\n  def insert(channel: Channel): Option[Int]\n\n  /** Get a [[Channel]] by channel ID */\n  def get(id: Int): Option[Channel]\n\n  /** Get all [[Channel]] by app ID */\n  def getByAppid(appid: Int): Seq[Channel]\n\n  /** Delete a [[Channel]] */\n  def delete(id: Int): Unit\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/DataMap.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport org.json4s._\nimport org.json4s.native.JsonMethods.parse\n\nimport scala.collection.GenTraversableOnce\nimport scala.collection.JavaConversions\n\n/** Exception class for [[DataMap]]\n  *\n  * @group Event Data\n  */\ncase class DataMapException(msg: String, cause: Exception)\n  extends Exception(msg, cause) {\n  def this(msg: String) = this(msg, null)\n}\n\n/** A DataMap stores properties of the event or entity. Internally it is a Map\n  * whose keys are property names and values are corresponding JSON values\n  * respectively. Use the [[get[T](name: String,clazz: Class[T])]] method to\n  * retrieve the value of a mandatory property or use [[getOpt]] to retrieve the\n  * value of an optional property.\n  *\n  * @param fields Map of property name to JValue\n  * @group Event Data\n  */\nclass DataMap (\n  val fields: Map[String, JValue]\n) extends Serializable {\n  @transient lazy implicit private val formats = DefaultFormats +\n    new DateTimeJson4sSupport.Serializer\n\n  /** Check the existence of a required property name. Throw an exception if\n    * it does not exist.\n    *\n    * @param name The property name\n    */\n  def require(name: String): Unit = {\n    if (!fields.contains(name)) {\n      throw new DataMapException(s\"The field $name is required.\")\n    }\n  }\n\n  /** Check if this DataMap contains a specific property.\n    *\n    * @param name The property name\n    * @return Return true if the property exists, else false.\n    */\n  def contains(name: String): Boolean = {\n    fields.contains(name)\n  }\n\n  /** Get the value of a mandatory property. Exception is thrown if the property\n    * does not exist.\n    *\n    * @tparam T The type of the property value\n    * @param name The property name\n    * @return Return the property value of type T\n    */\n  def get[T: Manifest](name: String): T = {\n    require(name)\n    fields(name) match {\n      case JNull => throw new DataMapException(\n        s\"The required field $name cannot be null.\")\n      case x: JValue => x.extract[T]\n    }\n  }\n\n  /** Get the value of an optional property. Return None if the property does\n    * not exist.\n    *\n    * @tparam T The type of the property value\n    * @param name The property name\n    * @return Return the property value of type Option[T]\n    */\n  def getOpt[T: Manifest](name: String): Option[T] = {\n    // either the field doesn't exist or its value is null\n    fields.get(name).flatMap(_.extract[Option[T]])\n  }\n\n  /** Get the value of an optional property. Return default value if the\n    * property does not exist.\n    *\n    * @tparam T The type of the property value\n    * @param name The property name\n    * @param default The default property value of type T\n    * @return Return the property value of type T\n    */\n  def getOrElse[T: Manifest](name: String, default: T): T = {\n    getOpt[T](name).getOrElse(default)\n  }\n\n  /** Java-friendly method for getting the value of a property. Return null if the\n    * property does not exist.\n    *\n    * @tparam T The type of the property value\n    * @param name The property name\n    * @param clazz The class of the type of the property value\n    * @return Return the property value of type T\n    */\n  def get[T](name: String, clazz: java.lang.Class[T]): T = {\n    val manifest =  new Manifest[T] {\n      override def erasure: Class[_] = clazz\n      override def runtimeClass: Class[_] = clazz\n    }\n\n    fields.get(name) match {\n      case None => null.asInstanceOf[T]\n      case Some(JNull) => null.asInstanceOf[T]\n      case Some(x) => x.extract[T](formats, manifest)\n    }\n  }\n\n  /** Java-friendly method for getting a list of values of a property. Return null if the\n    * property does not exist.\n    *\n    * @param name The property name\n    * @return Return the list of property values\n    */\n  def getStringList(name: String): java.util.List[String] = {\n    fields.get(name) match {\n      case None => null\n      case Some(JNull) => null\n      case Some(x) =>\n        JavaConversions.seqAsJavaList(x.extract[List[String]](formats, manifest[List[String]]))\n    }\n  }\n\n  /** Return a new DataMap with elements containing elements from the left hand\n    * side operand followed by elements from the right hand side operand.\n    *\n    * @param that Right hand side DataMap\n    * @return A new DataMap\n    */\n  def ++ (that: DataMap): DataMap = DataMap(this.fields ++ that.fields)\n\n  /** Creates a new DataMap from this DataMap by removing all elements of\n    * another collection.\n    *\n    * @param that A collection containing the removed property names\n    * @return A new DataMap\n    */\n  def -- (that: GenTraversableOnce[String]): DataMap =\n    DataMap(this.fields -- that)\n\n  /** Tests whether the DataMap is empty.\n    *\n    * @return true if the DataMap is empty, false otherwise.\n    */\n  def isEmpty: Boolean = fields.isEmpty\n\n  /** Collects all property names of this DataMap in a set.\n    *\n    * @return a set containing all property names of this DataMap.\n    */\n  def keySet: Set[String] = this.fields.keySet\n\n  /** Converts this DataMap to a List.\n    *\n    * @return a list of (property name, JSON value) tuples.\n    */\n  def toList(): List[(String, JValue)] = fields.toList\n\n  /** Converts this DataMap to a JObject.\n    *\n    * @return the JObject initialized by this DataMap.\n    */\n  def toJObject(): JObject = JObject(toList())\n\n  /** Converts this DataMap to case class of type T.\n    *\n    * @return the object of type T.\n    */\n  def extract[T: Manifest]: T = {\n    toJObject().extract[T]\n  }\n\n  override\n  def toString: String = s\"DataMap($fields)\"\n\n  override\n  def hashCode: Int = 41 + fields.hashCode\n\n  override\n  def equals(other: Any): Boolean = other match {\n    case that: DataMap => that.canEqual(this) && this.fields.equals(that.fields)\n    case _ => false\n  }\n\n  def canEqual(other: Any): Boolean = other.isInstanceOf[DataMap]\n}\n\n/** Companion object of the [[DataMap]] class\n  *\n  * @group Event Data\n  */\nobject DataMap {\n  /** Create an empty DataMap\n    * @return an empty DataMap\n    */\n  def apply(): DataMap = new DataMap(Map[String, JValue]())\n\n  /** Create an DataMap from a Map of String to JValue\n    * @param fields a Map of String to JValue\n    * @return a new DataMap initialized by fields\n    */\n  def apply(fields: Map[String, JValue]): DataMap = new DataMap(fields)\n\n  /** Create an DataMap from a JObject\n    * @param jObj JObject\n    * @return a new DataMap initialized by a JObject\n    */\n  def apply(jObj: JObject): DataMap = {\n    if (jObj == null) {\n      apply()\n    } else {\n      new DataMap(jObj.obj.toMap)\n    }\n  }\n\n  /** Create an DataMap from a JSON String\n    * @param js JSON String. eg \"\"\"{ \"a\": 1, \"b\": \"foo\" }\"\"\"\n    * @return a new DataMap initialized by a JSON string\n    */\n  def apply(js: String): DataMap = apply(parse(js).asInstanceOf[JObject])\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/DateTimeJson4sSupport.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.predictionio.data.{Utils => DataUtils}\nimport org.joda.time.DateTime\nimport org.json4s._\n\n/** :: DeveloperApi ::\n  * JSON4S serializer for Joda-Time\n  *\n  * @group Common\n  */\n@DeveloperApi\nobject DateTimeJson4sSupport {\n\n  @transient lazy implicit val formats = DefaultFormats\n\n  /** Serialize DateTime to JValue */\n  def serializeToJValue: PartialFunction[Any, JValue] = {\n    case d: DateTime => JString(DataUtils.dateTimeToString(d))\n  }\n\n  /** Deserialize JValue to DateTime */\n  def deserializeFromJValue: PartialFunction[JValue, DateTime] = {\n    case jv: JValue => DataUtils.stringToDateTime(jv.extract[String])\n  }\n\n  /** Custom JSON4S serializer for Joda-Time */\n  class Serializer extends CustomSerializer[DateTime](format => (\n    deserializeFromJValue, serializeToJValue))\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/EngineInstances.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport com.github.nscala_time.time.Imports._\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.json4s._\n\n/** :: DeveloperApi ::\n  * Stores parameters, model, and other information for each engine instance\n  *\n  * @param id Engine instance ID.\n  * @param status Status of the engine instance.\n  * @param startTime Start time of the training/evaluation.\n  * @param endTime End time of the training/evaluation.\n  * @param engineId Engine ID of the instance.\n  * @param engineVersion Engine version of the instance.\n  * @param engineVariant Engine variant ID of the instance.\n  * @param engineFactory Engine factory class for the instance.\n  * @param batch A batch label of the engine instance.\n  * @param env The environment in which the instance was created.\n  * @param sparkConf Custom Spark configuration of the instance.\n  * @param dataSourceParams Data source parameters of the instance.\n  * @param preparatorParams Preparator parameters of the instance.\n  * @param algorithmsParams Algorithms parameters of the instance.\n  * @param servingParams Serving parameters of the instance.\n  * @group Meta Data\n  */\n@DeveloperApi\ncase class EngineInstance(\n  id: String,\n  status: String,\n  startTime: DateTime,\n  endTime: DateTime,\n  engineId: String,\n  engineVersion: String,\n  engineVariant: String,\n  engineFactory: String,\n  batch: String,\n  env: Map[String, String],\n  sparkConf: Map[String, String],\n  dataSourceParams: String,\n  preparatorParams: String,\n  algorithmsParams: String,\n  servingParams: String)\n\n/** :: DeveloperApi ::\n  * Base trait of the [[EngineInstance]] data access object\n  *\n  * @group Meta Data\n  */\n@DeveloperApi\ntrait EngineInstances {\n  /** Insert a new [[EngineInstance]] */\n  def insert(i: EngineInstance): String\n\n  /** Get an [[EngineInstance]] by ID */\n  def get(id: String): Option[EngineInstance]\n\n  /** Get all [[EngineInstance]]s */\n  def getAll(): Seq[EngineInstance]\n\n  /** Get an instance that has started training the latest and has trained to\n    * completion\n    */\n  def getLatestCompleted(\n      engineId: String,\n      engineVersion: String,\n      engineVariant: String): Option[EngineInstance]\n\n  /** Get all instances that has trained to completion */\n  def getCompleted(\n    engineId: String,\n    engineVersion: String,\n    engineVariant: String): Seq[EngineInstance]\n\n  /** Update an [[EngineInstance]] */\n  def update(i: EngineInstance): Unit\n\n  /** Delete an [[EngineInstance]] */\n  def delete(id: String): Unit\n}\n\n/** :: DeveloperApi ::\n  * JSON4S serializer for [[EngineInstance]]\n  *\n  * @group Meta Data\n  */\n@DeveloperApi\nclass EngineInstanceSerializer\n    extends CustomSerializer[EngineInstance](\n  format => ({\n    case JObject(fields) =>\n      implicit val formats = DefaultFormats\n      val seed = EngineInstance(\n          id = \"\",\n          status = \"\",\n          startTime = DateTime.now,\n          endTime = DateTime.now,\n          engineId = \"\",\n          engineVersion = \"\",\n          engineVariant = \"\",\n          engineFactory = \"\",\n          batch = \"\",\n          env = Map.empty,\n          sparkConf = Map.empty,\n          dataSourceParams = \"\",\n          preparatorParams = \"\",\n          algorithmsParams = \"\",\n          servingParams = \"\")\n      fields.foldLeft(seed) { case (i, field) =>\n        field match {\n          case JField(\"id\", JString(id)) => i.copy(id = id)\n          case JField(\"status\", JString(status)) => i.copy(status = status)\n          case JField(\"startTime\", JString(startTime)) =>\n            i.copy(startTime = Utils.stringToDateTime(startTime))\n          case JField(\"endTime\", JString(endTime)) =>\n            i.copy(endTime = Utils.stringToDateTime(endTime))\n          case JField(\"engineId\", JString(engineId)) =>\n            i.copy(engineId = engineId)\n          case JField(\"engineVersion\", JString(engineVersion)) =>\n            i.copy(engineVersion = engineVersion)\n          case JField(\"engineVariant\", JString(engineVariant)) =>\n            i.copy(engineVariant = engineVariant)\n          case JField(\"engineFactory\", JString(engineFactory)) =>\n            i.copy(engineFactory = engineFactory)\n          case JField(\"batch\", JString(batch)) => i.copy(batch = batch)\n          case JField(\"env\", env) =>\n            i.copy(env = Extraction.extract[Map[String, String]](env))\n          case JField(\"sparkConf\", sparkConf) =>\n            i.copy(sparkConf = Extraction.extract[Map[String, String]](sparkConf))\n          case JField(\"dataSourceParams\", JString(dataSourceParams)) =>\n            i.copy(dataSourceParams = dataSourceParams)\n          case JField(\"preparatorParams\", JString(preparatorParams)) =>\n            i.copy(preparatorParams = preparatorParams)\n          case JField(\"algorithmsParams\", JString(algorithmsParams)) =>\n            i.copy(algorithmsParams = algorithmsParams)\n          case JField(\"servingParams\", JString(servingParams)) =>\n            i.copy(servingParams = servingParams)\n          case _ => i\n        }\n      }\n  },\n  {\n    case i: EngineInstance =>\n      JObject(\n        JField(\"id\", JString(i.id)) ::\n        JField(\"status\", JString(i.status)) ::\n        JField(\"startTime\", JString(i.startTime.toString)) ::\n        JField(\"endTime\", JString(i.endTime.toString)) ::\n        JField(\"engineId\", JString(i.engineId)) ::\n        JField(\"engineVersion\", JString(i.engineVersion)) ::\n        JField(\"engineVariant\", JString(i.engineVariant)) ::\n        JField(\"engineFactory\", JString(i.engineFactory)) ::\n        JField(\"batch\", JString(i.batch)) ::\n        JField(\"env\", Extraction.decompose(i.env)(DefaultFormats)) ::\n        JField(\"sparkConf\", Extraction.decompose(i.sparkConf)(DefaultFormats)) ::\n        JField(\"dataSourceParams\", JString(i.dataSourceParams)) ::\n        JField(\"preparatorParams\", JString(i.preparatorParams)) ::\n        JField(\"algorithmsParams\", JString(i.algorithmsParams)) ::\n        JField(\"servingParams\", JString(i.servingParams)) ::\n        Nil)\n  }\n))\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/EntityMap.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage\n\nimport org.apache.predictionio.annotation.Experimental\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\n/** :: Experimental :: */\n@Experimental\nclass EntityIdIxMap(val idToIx: BiMap[String, Long]) extends Serializable {\n\n  val ixToId: BiMap[Long, String] = idToIx.inverse\n\n  def apply(id: String): Long = idToIx(id)\n\n  def apply(ix: Long): String = ixToId(ix)\n\n  def contains(id: String): Boolean = idToIx.contains(id)\n\n  def contains(ix: Long): Boolean = ixToId.contains(ix)\n\n  def get(id: String): Option[Long] = idToIx.get(id)\n\n  def get(ix: Long): Option[String] = ixToId.get(ix)\n\n  def getOrElse(id: String, default: => Long): Long =\n    idToIx.getOrElse(id, default)\n\n  def getOrElse(ix: Long, default: => String): String =\n    ixToId.getOrElse(ix, default)\n\n  def toMap: Map[String, Long] = idToIx.toMap\n\n  def size: Long = idToIx.size\n\n  def take(n: Int): EntityIdIxMap = new EntityIdIxMap(idToIx.take(n))\n\n  override def toString: String = idToIx.toString\n}\n\n/** :: Experimental :: */\n@Experimental\nobject EntityIdIxMap {\n  def apply(keys: RDD[String]): EntityIdIxMap = {\n    new EntityIdIxMap(BiMap.stringLong(keys))\n  }\n}\n\n/** :: Experimental :: */\n@Experimental\nclass EntityMap[A](val idToData: Map[String, A],\n  override val idToIx: BiMap[String, Long]) extends EntityIdIxMap(idToIx) {\n\n  def this(idToData: Map[String, A]) = this(\n    idToData,\n    BiMap.stringLong(idToData.keySet)\n  )\n\n  def data(id: String): A = idToData(id)\n\n  def data(ix: Long): A = idToData(ixToId(ix))\n\n  def getData(id: String): Option[A] = idToData.get(id)\n\n  def getData(ix: Long): Option[A] = idToData.get(ixToId(ix))\n\n  def getOrElseData(id: String, default: => A): A =\n    getData(id).getOrElse(default)\n\n  def getOrElseData(ix: Long, default: => A): A =\n    getData(ix).getOrElse(default)\n\n  override def take(n: Int): EntityMap[A] = {\n    val newIdToIx = idToIx.take(n)\n    new EntityMap[A](idToData.filterKeys(newIdToIx.contains(_)), newIdToIx)\n  }\n\n  override def toString: String = {\n    s\"idToData: ${idToData.toString} \" + s\"idToix: ${idToIx.toString}\"\n  }\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/EvaluationInstances.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport com.github.nscala_time.time.Imports._\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.json4s._\n\n/** :: DeveloperApi ::\n  * Stores meta information for each evaluation instance.\n  *\n  * @param id Instance ID.\n  * @param status Status of this instance.\n  * @param startTime Start time of this instance.\n  * @param endTime End time of this instance.\n  * @param evaluationClass Evaluation class name of this instance.\n  * @param engineParamsGeneratorClass Engine parameters generator class name of this instance.\n  * @param batch Batch label of this instance.\n  * @param env The environment in which this instance was created.\n  * @param evaluatorResults Results of the evaluator.\n  * @param evaluatorResultsHTML HTML results of the evaluator.\n  * @param evaluatorResultsJSON JSON results of the evaluator.\n  * @group Meta Data\n  */\n@DeveloperApi\ncase class EvaluationInstance(\n  id: String = \"\",\n  status: String = \"\",\n  startTime: DateTime = DateTime.now,\n  endTime: DateTime = DateTime.now,\n  evaluationClass: String = \"\",\n  engineParamsGeneratorClass: String = \"\",\n  batch: String = \"\",\n  env: Map[String, String] = Map.empty,\n  sparkConf: Map[String, String] = Map.empty,\n  evaluatorResults: String = \"\",\n  evaluatorResultsHTML: String = \"\",\n  evaluatorResultsJSON: String = \"\")\n\n/** :: DeveloperApi ::\n  * Base trait of the [[EvaluationInstance]] data access object\n  *\n  * @group Meta Data\n  */\n@DeveloperApi\ntrait EvaluationInstances {\n  /** Insert a new [[EvaluationInstance]] */\n  def insert(i: EvaluationInstance): String\n\n  /** Get an [[EvaluationInstance]] by ID */\n  def get(id: String): Option[EvaluationInstance]\n\n  /** Get all [[EvaluationInstances]] */\n  def getAll: Seq[EvaluationInstance]\n\n  /** Get instances that are produced by evaluation and have run to completion,\n    * reverse sorted by the start time\n    */\n  def getCompleted: Seq[EvaluationInstance]\n\n  /** Update an [[EvaluationInstance]] */\n  def update(i: EvaluationInstance): Unit\n\n  /** Delete an [[EvaluationInstance]] */\n  def delete(id: String): Unit\n}\n\n/** :: DeveloperApi ::\n  * JSON4S serializer for [[EvaluationInstance]]\n  *\n  * @group Meta Data\n  */\nclass EvaluationInstanceSerializer extends CustomSerializer[EvaluationInstance](\n  format => ({\n    case JObject(fields) =>\n      implicit val formats = DefaultFormats\n      fields.foldLeft(EvaluationInstance()) { case (i, field) =>\n        field match {\n          case JField(\"id\", JString(id)) => i.copy(id = id)\n          case JField(\"status\", JString(status)) => i.copy(status = status)\n          case JField(\"startTime\", JString(startTime)) =>\n            i.copy(startTime = Utils.stringToDateTime(startTime))\n          case JField(\"endTime\", JString(endTime)) =>\n            i.copy(endTime = Utils.stringToDateTime(endTime))\n          case JField(\"evaluationClass\", JString(evaluationClass)) =>\n            i.copy(evaluationClass = evaluationClass)\n          case JField(\"engineParamsGeneratorClass\", JString(engineParamsGeneratorClass)) =>\n            i.copy(engineParamsGeneratorClass = engineParamsGeneratorClass)\n          case JField(\"batch\", JString(batch)) => i.copy(batch = batch)\n          case JField(\"env\", env) =>\n            i.copy(env = Extraction.extract[Map[String, String]](env))\n          case JField(\"sparkConf\", sparkConf) =>\n            i.copy(sparkConf = Extraction.extract[Map[String, String]](sparkConf))\n          case JField(\"evaluatorResults\", JString(evaluatorResults)) =>\n            i.copy(evaluatorResults = evaluatorResults)\n          case JField(\"evaluatorResultsHTML\", JString(evaluatorResultsHTML)) =>\n            i.copy(evaluatorResultsHTML = evaluatorResultsHTML)\n          case JField(\"evaluatorResultsJSON\", JString(evaluatorResultsJSON)) =>\n            i.copy(evaluatorResultsJSON = evaluatorResultsJSON)\n          case _ => i\n        }\n      }\n  }, {\n    case i: EvaluationInstance =>\n      JObject(\n        JField(\"id\", JString(i.id)) ::\n          JField(\"status\", JString(i.status)) ::\n          JField(\"startTime\", JString(i.startTime.toString)) ::\n          JField(\"endTime\", JString(i.endTime.toString)) ::\n          JField(\"evaluationClass\", JString(i.evaluationClass)) ::\n          JField(\"engineParamsGeneratorClass\", JString(i.engineParamsGeneratorClass)) ::\n          JField(\"batch\", JString(i.batch)) ::\n          JField(\"env\", Extraction.decompose(i.env)(DefaultFormats)) ::\n          JField(\"sparkConf\", Extraction.decompose(i.sparkConf)(DefaultFormats)) ::\n          JField(\"evaluatorResults\", JString(i.evaluatorResults)) ::\n          JField(\"evaluatorResultsHTML\", JString(i.evaluatorResultsHTML)) ::\n          JField(\"evaluatorResultsJSON\", JString(i.evaluatorResultsJSON)) ::\n          Nil\n      )\n  }\n  )\n)\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/Event.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.joda.time.DateTime\nimport org.joda.time.DateTimeZone\n\n/** Each event in the Event Store can be represented by fields in this case\n  * class.\n  *\n  * @param eventId Unique ID of this event.\n  * @param event Name of this event.\n  * @param entityType Type of the entity associated with this event.\n  * @param entityId ID of the entity associated with this event.\n  * @param targetEntityType Type of the target entity associated with this\n  *                         event.\n  * @param targetEntityId ID of the target entity associated with this event.\n  * @param properties Properties associated with this event.\n  * @param eventTime Time of the happening of this event.\n  * @param tags Tags of this event.\n  * @param prId PredictedResultId of this event.\n  * @param creationTime Time of creation in the system of this event.\n  * @group Event Data\n  */\ncase class Event(\n  val eventId: Option[String] = None,\n  val event: String,\n  val entityType: String,\n  val entityId: String,\n  val targetEntityType: Option[String] = None,\n  val targetEntityId: Option[String] = None,\n  val properties: DataMap = DataMap(), // default empty\n  val eventTime: DateTime = DateTime.now,\n  val tags: Seq[String] = Nil,\n  val prId: Option[String] = None,\n  val creationTime: DateTime = DateTime.now\n) {\n  override def toString(): String = {\n    s\"Event(id=$eventId,event=$event,eType=$entityType,eId=$entityId,\" +\n    s\"tType=$targetEntityType,tId=$targetEntityId,p=$properties,t=$eventTime,\" +\n    s\"tags=$tags,pKey=$prId,ct=$creationTime)\"\n  }\n}\n\n/** :: DeveloperApi ::\n  * Utilities for validating [[Event]]s\n  *\n  * @group Event Data\n  */\n@DeveloperApi\nobject EventValidation {\n  /** Default time zone is set to UTC */\n  val defaultTimeZone = DateTimeZone.UTC\n\n  /** Checks whether an event name contains a reserved prefix\n    *\n    * @param name Event name\n    * @return true if event name starts with \\$ or pio_, false otherwise\n    */\n  def isReservedPrefix(name: String): Boolean = name.startsWith(\"$\") ||\n    name.startsWith(\"pio_\")\n\n  /** PredictionIO reserves some single entity event names. They are currently\n    * \\$set, \\$unset, and \\$delete.\n    */\n  val specialEvents = Set(\"$set\", \"$unset\", \"$delete\")\n\n  /** Checks whether an event name is a special PredictionIO event name\n    *\n    * @param name Event name\n    * @return true if the name is a special event, false otherwise\n    */\n  def isSpecialEvents(name: String): Boolean = specialEvents.contains(name)\n\n  /** Validate an [[Event]], throwing exceptions when the candidate violates any\n    * of the following:\n    *\n    *  - event name must not be empty\n    *  - entityType must not be empty\n    *  - entityId must not be empty\n    *  - targetEntityType must not be Some of empty\n    *  - targetEntityId must not be Some of empty\n    *  - targetEntityType and targetEntityId must be both Some or None\n    *  - properties must not be empty when event is \\$unset\n    *  - event name must be a special event if it has a reserved prefix\n    *  - targetEntityType and targetEntityId must be None if the event name has\n    *    a reserved prefix\n    *  - entityType must be a built-in entity type if entityType has a\n    *    reserved prefix\n    *  - targetEntityType must be a built-in entity type if targetEntityType is\n    *    Some and has a reserved prefix\n    *\n    * @param e Event to be validated\n    */\n  def validate(e: Event): Unit = {\n\n    require(!e.event.isEmpty, \"event must not be empty.\")\n    require(!e.entityType.isEmpty, \"entityType must not be empty string.\")\n    require(!e.entityId.isEmpty, \"entityId must not be empty string.\")\n    require(e.targetEntityType.map(!_.isEmpty).getOrElse(true),\n      \"targetEntityType must not be empty string\")\n    require(e.targetEntityId.map(!_.isEmpty).getOrElse(true),\n      \"targetEntityId must not be empty string.\")\n    require(!((e.targetEntityType != None) && (e.targetEntityId == None)),\n      \"targetEntityType and targetEntityId must be specified together.\")\n    require(!((e.targetEntityType == None) && (e.targetEntityId != None)),\n      \"targetEntityType and targetEntityId must be specified together.\")\n    require(!((e.event == \"$unset\") && e.properties.isEmpty),\n      \"properties cannot be empty for $unset event\")\n    require(!isReservedPrefix(e.event) || isSpecialEvents(e.event),\n      s\"${e.event} is not a supported reserved event name.\")\n    require(!isSpecialEvents(e.event) ||\n      ((e.targetEntityType == None) && (e.targetEntityId == None)),\n      s\"Reserved event ${e.event} cannot have targetEntity\")\n    require(!isReservedPrefix(e.entityType) ||\n      isBuiltinEntityTypes(e.entityType),\n      s\"The entityType ${e.entityType} is not allowed. \" +\n        s\"'pio_' is a reserved name prefix.\")\n    require(e.targetEntityType.map{ t =>\n      (!isReservedPrefix(t) || isBuiltinEntityTypes(t))}.getOrElse(true),\n      s\"The targetEntityType ${e.targetEntityType.get} is not allowed. \" +\n        s\"'pio_' is a reserved name prefix.\")\n    validateProperties(e)\n  }\n\n  /** Defines built-in entity types. The current built-in type is pio_pr. */\n  val builtinEntityTypes: Set[String] = Set(\"pio_pr\")\n\n  /** Defines built-in properties. This is currently empty. */\n  val builtinProperties: Set[String] = Set()\n\n  /** Checks whether an entity type is a built-in entity type */\n  def isBuiltinEntityTypes(name: String): Boolean = builtinEntityTypes.contains(name)\n\n  /** Validate event properties, throwing exceptions when the candidate violates\n    * any of the following:\n    *\n    *  - property name must not contain a reserved prefix\n    *\n    * @param e Event to be validated\n    */\n  def validateProperties(e: Event): Unit = {\n    e.properties.keySet.foreach { k =>\n      require(!isReservedPrefix(k) || builtinProperties.contains(k),\n        s\"The property ${k} is not allowed. \" +\n          s\"'pio_' is a reserved name prefix.\")\n    }\n  }\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/EventJson4sSupport.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.predictionio.data.{Utils => DataUtils}\nimport org.joda.time.DateTime\nimport org.json4s._\nimport scala.util.{Try, Success, Failure}\n\n/** :: DeveloperApi ::\n  * Support library for dealing with [[Event]] and JSON4S\n  *\n  * @group Event Data\n  */\n@DeveloperApi\nobject EventJson4sSupport {\n  /** This is set to org.json4s.DefaultFormats. Do not use JSON4S to serialize\n    * or deserialize Joda-Time DateTime because it has some issues with timezone\n    * (as of version 3.2.10)\n    */\n  implicit val formats = DefaultFormats\n\n  /** :: DeveloperApi ::\n    * Convert JSON from Event Server to [[Event]]\n    *\n    * @return deserialization routine used by [[APISerializer]]\n    */\n  @DeveloperApi\n  def readJson: PartialFunction[JValue, Event] = {\n    case JObject(x) => {\n      val fields = new DataMap(x.toMap)\n      // use get() if required in json\n      // use getOpt() if not required in json\n      try {\n        val event = fields.get[String](\"event\")\n        val eventId = fields.getOpt[String](\"eventId\")\n        val entityType = fields.get[String](\"entityType\")\n        val entityId = fields.get[String](\"entityId\")\n        val targetEntityType = fields.getOpt[String](\"targetEntityType\")\n        val targetEntityId = fields.getOpt[String](\"targetEntityId\")\n        val properties = fields.getOrElse[Map[String, JValue]](\n          \"properties\", Map.empty)\n        // default currentTime expressed as UTC timezone\n        lazy val currentTime = DateTime.now(EventValidation.defaultTimeZone)\n        val eventTime = fields.getOpt[String](\"eventTime\")\n          .map{ s =>\n            try {\n              DataUtils.stringToDateTime(s)\n            } catch {\n              case _: Exception =>\n                throw new MappingException(s\"Fail to extract eventTime ${s}\")\n            }\n          }.getOrElse(currentTime)\n\n        // disable tags from API for now.\n      // val tags = fields.getOpt[Seq[String]](\"tags\").getOrElse(List())\n\n        val prId = fields.getOpt[String](\"prId\")\n\n        // don't allow user set creationTime from API for now.\n        val creationTime = currentTime\n      // val creationTime = fields.getOpt[String](\"creationTime\")\n      //   .map{ s =>\n      //     try {\n      //       DataUtils.stringToDateTime(s)\n      //     } catch {\n      //       case _: Exception =>\n      //         throw new MappingException(s\"Fail to extract creationTime ${s}\")\n      //     }\n      //   }.getOrElse(currentTime)\n\n\n        val newEvent = Event(\n          eventId=eventId,\n          event = event,\n          entityType = entityType,\n          entityId = entityId,\n          targetEntityType = targetEntityType,\n          targetEntityId = targetEntityId,\n          properties = DataMap(properties),\n          eventTime = eventTime,\n          prId = prId,\n          creationTime = creationTime\n        )\n        EventValidation.validate(newEvent)\n        newEvent\n      } catch {\n        case e: Exception => throw new MappingException(e.toString, e)\n      }\n    }\n  }\n\n  /** :: DeveloperApi ::\n    * Convert [[Event]] to JSON for use by the Event Server\n    *\n    * @return serialization routine used by [[APISerializer]]\n    */\n  @DeveloperApi\n  def writeJson: PartialFunction[Any, JValue] = {\n    case d: Event => {\n      JObject(\n        JField(\"eventId\",\n          d.eventId.map( eid => JString(eid)).getOrElse(JNothing)) ::\n        JField(\"event\", JString(d.event)) ::\n        JField(\"entityType\", JString(d.entityType)) ::\n        JField(\"entityId\", JString(d.entityId)) ::\n        JField(\"targetEntityType\",\n          d.targetEntityType.map(JString(_)).getOrElse(JNothing)) ::\n        JField(\"targetEntityId\",\n          d.targetEntityId.map(JString(_)).getOrElse(JNothing)) ::\n        JField(\"properties\", d.properties.toJObject) ::\n        JField(\"eventTime\", JString(DataUtils.dateTimeToString(d.eventTime))) ::\n        // disable tags from API for now\n        // JField(\"tags\", JArray(d.tags.toList.map(JString(_)))) ::\n        // disable tags from API for now\n        JField(\"prId\",\n          d.prId.map(JString(_)).getOrElse(JNothing)) ::\n        // don't show creationTime for now\n        JField(\"creationTime\",\n          JString(DataUtils.dateTimeToString(d.creationTime))) ::\n        Nil)\n    }\n  }\n\n  /** :: DeveloperApi ::\n    * Convert JSON4S JValue to [[Event]]\n    *\n    * @return deserialization routine used by [[DBSerializer]]\n    */\n  @DeveloperApi\n  def deserializeFromJValue: PartialFunction[JValue, Event] = {\n    case jv: JValue => {\n      val event = (jv \\ \"event\").extract[String]\n      val entityType = (jv \\ \"entityType\").extract[String]\n      val entityId = (jv \\ \"entityId\").extract[String]\n      val targetEntityType = (jv \\ \"targetEntityType\").extract[Option[String]]\n      val targetEntityId = (jv \\ \"targetEntityId\").extract[Option[String]]\n      val properties = (jv \\ \"properties\").extract[JObject]\n      val eventTime = DataUtils.stringToDateTime(\n        (jv \\ \"eventTime\").extract[String])\n      val tags = (jv \\ \"tags\").extract[Seq[String]]\n      val prId = (jv \\ \"prId\").extract[Option[String]]\n      val creationTime = DataUtils.stringToDateTime(\n        (jv \\ \"creationTime\").extract[String])\n      Event(\n        event = event,\n        entityType = entityType,\n        entityId = entityId,\n        targetEntityType = targetEntityType,\n        targetEntityId = targetEntityId,\n        properties = DataMap(properties),\n        eventTime = eventTime,\n        tags = tags,\n        prId = prId,\n        creationTime = creationTime)\n    }\n  }\n\n  /** :: DeveloperApi ::\n    * Convert [[Event]] to JSON4S JValue\n    *\n    * @return serialization routine used by [[DBSerializer]]\n    */\n  @DeveloperApi\n  def serializeToJValue: PartialFunction[Any, JValue] = {\n    case d: Event => {\n      JObject(\n        JField(\"event\", JString(d.event)) ::\n        JField(\"entityType\", JString(d.entityType)) ::\n        JField(\"entityId\", JString(d.entityId)) ::\n        JField(\"targetEntityType\",\n          d.targetEntityType.map(JString(_)).getOrElse(JNothing)) ::\n        JField(\"targetEntityId\",\n          d.targetEntityId.map(JString(_)).getOrElse(JNothing)) ::\n        JField(\"properties\", d.properties.toJObject) ::\n        JField(\"eventTime\", JString(DataUtils.dateTimeToString(d.eventTime))) ::\n        JField(\"tags\", JArray(d.tags.toList.map(JString(_)))) ::\n        JField(\"prId\",\n          d.prId.map(JString(_)).getOrElse(JNothing)) ::\n        JField(\"creationTime\",\n          JString(DataUtils.dateTimeToString(d.creationTime))) ::\n        Nil)\n    }\n  }\n\n  /** :: DeveloperApi ::\n    * Custom JSON4S serializer for [[Event]] intended to be used by database\n    * access, or anywhere that demands serdes of [[Event]] to/from JSON4S JValue\n    */\n  @DeveloperApi\n  class DBSerializer extends CustomSerializer[Event](format => (\n    deserializeFromJValue, serializeToJValue))\n\n  /** :: DeveloperApi ::\n    * Custom JSON4S serializer for [[Event]] intended to be used by the Event\n    * Server, or anywhere that demands serdes of [[Event]] to/from JSON\n    */\n  @DeveloperApi\n  class APISerializer extends CustomSerializer[Event](format => (\n    readJson, writeJson))\n}\n\n\n@DeveloperApi\nobject BatchEventsJson4sSupport {\n  implicit val formats = DefaultFormats\n\n  @DeveloperApi\n  def readJson: PartialFunction[JValue, Seq[Try[Event]]] = {\n    case JArray(events) => {\n      events.map { event =>\n        try {\n          Success(EventJson4sSupport.readJson(event))\n        } catch {\n          case e: Exception => Failure(e)\n        }\n      }\n    }\n  }\n\n  @DeveloperApi\n  class APISerializer extends CustomSerializer[Seq[Try[Event]]](format => (readJson, Map.empty))\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/LEventAggregator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.joda.time.DateTime\n\n/** :: DeveloperApi ::\n  * Provides aggregation support of [[Event]]s to [[LEvents]]. Engine developers\n  * should use [[org.apache.predictionio.data.store.LEventStore]] instead of using this\n  * directly.\n  *\n  * @group Event Data\n  */\n@DeveloperApi\nobject LEventAggregator {\n  /** :: DeveloperApi ::\n    * Aggregate all properties grouped by entity type given an iterator of\n    * [[Event]]s with the latest property values from all [[Event]]s, and their\n    * first and last updated time\n    *\n    * @param events An iterator of [[Event]]s whose properties will be aggregated\n    * @return A map of entity type to [[PropertyMap]]\n    */\n  @DeveloperApi\n  def aggregateProperties(events: Iterator[Event]): Map[String, PropertyMap] = {\n    events.toList\n      .groupBy(_.entityId)\n      .mapValues(_.sortBy(_.eventTime.getMillis)\n        .foldLeft[Prop](Prop())(propAggregator))\n      .filter{ case (k, v) => v.dm.isDefined }\n      .mapValues{ v =>\n        require(v.firstUpdated.isDefined,\n          \"Unexpected Error: firstUpdated cannot be None.\")\n        require(v.lastUpdated.isDefined,\n          \"Unexpected Error: lastUpdated cannot be None.\")\n\n        PropertyMap(\n          fields = v.dm.get.fields,\n          firstUpdated = v.firstUpdated.get,\n          lastUpdated = v.lastUpdated.get\n        )\n      }\n  }\n\n  /** :: DeveloperApi ::\n    * Aggregate all properties given an iterator of [[Event]]s with the latest\n    * property values from all [[Event]]s, and their first and last updated time\n    *\n    * @param events An iterator of [[Event]]s whose properties will be aggregated\n    * @return An optional [[PropertyMap]]\n    */\n  @DeveloperApi\n  def aggregatePropertiesSingle(events: Iterator[Event])\n  : Option[PropertyMap] = {\n    val prop = events.toList\n      .sortBy(_.eventTime.getMillis)\n      .foldLeft[Prop](Prop())(propAggregator)\n\n    prop.dm.map{ d =>\n      require(prop.firstUpdated.isDefined,\n        \"Unexpected Error: firstUpdated cannot be None.\")\n      require(prop.lastUpdated.isDefined,\n        \"Unexpected Error: lastUpdated cannot be None.\")\n\n      PropertyMap(\n        fields = d.fields,\n        firstUpdated = prop.firstUpdated.get,\n        lastUpdated = prop.lastUpdated.get\n      )\n    }\n  }\n\n  /** Event names that control aggregation: \\$set, \\$unset, and \\$delete */\n  val eventNames = List(\"$set\", \"$unset\", \"$delete\")\n\n  private\n  def dataMapAggregator: ((Option[DataMap], Event) => Option[DataMap]) = {\n    (p, e) => {\n      e.event match {\n        case \"$set\" => {\n          if (p == None) {\n            Some(e.properties)\n          } else {\n            p.map(_ ++ e.properties)\n          }\n        }\n        case \"$unset\" => {\n          if (p == None) {\n            None\n          } else {\n            p.map(_ -- e.properties.keySet)\n          }\n        }\n        case \"$delete\" => None\n        case _ => p // do nothing for others\n      }\n    }\n  }\n\n  private\n  def propAggregator: ((Prop, Event) => Prop) = {\n    (p, e) => {\n      e.event match {\n        case \"$set\" | \"$unset\" | \"$delete\" => {\n          Prop(\n            dm = dataMapAggregator(p.dm, e),\n            firstUpdated = p.firstUpdated.map { t =>\n              first(t, e.eventTime)\n            }.orElse(Some(e.eventTime)),\n            lastUpdated = p.lastUpdated.map { t =>\n              last(t, e.eventTime)\n            }.orElse(Some(e.eventTime))\n          )\n        }\n        case _ => p // do nothing for others\n      }\n    }\n  }\n\n  private\n  def first(a: DateTime, b: DateTime): DateTime = if (b.isBefore(a)) b else a\n\n  private\n  def last(a: DateTime, b: DateTime): DateTime = if (b.isAfter(a)) b else a\n\n  private case class Prop(\n    dm: Option[DataMap] = None,\n    firstUpdated: Option[DateTime] = None,\n    lastUpdated: Option[DateTime] = None\n  )\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/LEvents.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.predictionio.annotation.Experimental\n\nimport scala.concurrent.Future\nimport scala.concurrent.Await\nimport scala.concurrent.duration.Duration\nimport scala.concurrent.ExecutionContext\nimport scala.concurrent.TimeoutException\n\nimport org.joda.time.DateTime\n\n/** :: DeveloperApi ::\n  * Base trait of a data access object that directly returns [[Event]] without\n  * going through Spark's parallelization. Engine developers should use\n  * [[org.apache.predictionio.data.store.LEventStore]] instead of using this directly.\n  *\n  * @group Event Data\n  */\n@DeveloperApi\ntrait LEvents {\n  /** Default timeout for asynchronous operations that is set to 1 minute */\n  val defaultTimeout = Duration(60, \"seconds\")\n\n  /** :: DeveloperApi ::\n    * Initialize Event Store for an app ID and optionally a channel ID.\n    * This routine is to be called when an app is first created.\n    *\n    * @param appId App ID\n    * @param channelId Optional channel ID\n    * @return true if initialization was successful; false otherwise.\n    */\n  @DeveloperApi\n  def init(appId: Int, channelId: Option[Int] = None): Boolean\n\n  /** :: DeveloperApi ::\n    * Remove Event Store for an app ID and optional channel ID.\n    *\n    * @param appId App ID\n    * @param channelId Optional channel ID\n    * @return true if removal was successful; false otherwise.\n    */\n  @DeveloperApi\n  def remove(appId: Int, channelId: Option[Int] = None): Boolean\n\n  /** :: DeveloperApi ::\n    * Close this Event Store interface object, e.g. close connection, release\n    * resources, etc.\n    */\n  @DeveloperApi\n  def close(): Unit\n\n  /** :: DeveloperApi ::\n    * Insert an [[Event]] in a non-blocking fashion.\n    *\n    * @param event An [[Event]] to be inserted\n    * @param appId App ID for the [[Event]] to be inserted to\n    */\n  @DeveloperApi\n  def futureInsert(event: Event, appId: Int)(implicit ec: ExecutionContext):\n    Future[String] = futureInsert(event, appId, None)\n\n  /** :: DeveloperApi ::\n    * Insert an [[Event]] in a non-blocking fashion.\n    *\n    * @param event An [[Event]] to be inserted\n    * @param appId App ID for the [[Event]] to be inserted to\n    * @param channelId Optional channel ID for the [[Event]] to be inserted to\n    */\n  @DeveloperApi\n  def futureInsert(\n    event: Event, appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext): Future[String]\n\n  /** :: DeveloperApi ::\n    * Insert [[Event]]s in a non-blocking fashion.\n    *\n    * Default implementation of this method is calling\n    * [[LEvents.futureInsert(Event, Int, Option[Int])]] per event.\n    * Override in the storage implementation if the storage has\n    * a better way to insert multiple data at once.\n    *\n    * @param events [[Event]]s to be inserted\n    * @param appId App ID for the [[Event]]s to be inserted to\n    * @param channelId Optional channel ID for the [[Event]]s to be inserted to\n    */\n  @DeveloperApi\n  def futureInsertBatch(events: Seq[Event], appId: Int, channelId: Option[Int])\n    (implicit ec: ExecutionContext): Future[Seq[String]] = {\n    val seq = events.map { event =>\n      futureInsert(event, appId, channelId)\n    }\n    Future.sequence(seq)\n  }\n\n  /** :: DeveloperApi ::\n    * Get an [[Event]] in a non-blocking fashion.\n    *\n    * @param eventId ID of the [[Event]]\n    * @param appId ID of the app that contains the [[Event]]\n    */\n  @DeveloperApi\n  def futureGet(eventId: String, appId: Int)(implicit ec: ExecutionContext):\n    Future[Option[Event]] = futureGet(eventId, appId, None)\n\n  /** :: DeveloperApi ::\n    * Get an [[Event]] in a non-blocking fashion.\n    *\n    * @param eventId ID of the [[Event]]\n    * @param appId ID of the app that contains the [[Event]]\n    * @param channelId Optional channel ID that contains the [[Event]]\n    */\n  @DeveloperApi\n  def futureGet(\n      eventId: String,\n      appId: Int,\n      channelId: Option[Int]\n    )(implicit ec: ExecutionContext): Future[Option[Event]]\n\n  /** :: DeveloperApi ::\n    * Delete an [[Event]] in a non-blocking fashion.\n    *\n    * @param eventId ID of the [[Event]]\n    * @param appId ID of the app that contains the [[Event]]\n    */\n  @DeveloperApi\n  def futureDelete(eventId: String, appId: Int)(implicit ec: ExecutionContext):\n    Future[Boolean] = futureDelete(eventId, appId, None)\n\n  /** :: DeveloperApi ::\n    * Delete an [[Event]] in a non-blocking fashion.\n    *\n    * @param eventId ID of the [[Event]]\n    * @param appId ID of the app that contains the [[Event]]\n    * @param channelId Optional channel ID that contains the [[Event]]\n    */\n  @DeveloperApi\n  def futureDelete(\n      eventId: String,\n      appId: Int,\n      channelId: Option[Int]\n    )(implicit ec: ExecutionContext): Future[Boolean]\n\n  /** :: DeveloperApi ::\n    * Reads from database and returns a Future of Iterator of [[Event]]s.\n    *\n    * @param appId return events of this app ID\n    * @param channelId return events of this channel ID (default channel if it's None)\n    * @param startTime return events with eventTime >= startTime\n    * @param untilTime return events with eventTime < untilTime\n    * @param entityType return events of this entityType\n    * @param entityId return events of this entityId\n    * @param eventNames return events with any of these event names.\n    * @param targetEntityType return events of this targetEntityType:\n    *   - None means no restriction on targetEntityType\n    *   - Some(None) means no targetEntityType for this event\n    *   - Some(Some(x)) means targetEntityType should match x.\n    * @param targetEntityId return events of this targetEntityId\n    *   - None means no restriction on targetEntityId\n    *   - Some(None) means no targetEntityId for this event\n    *   - Some(Some(x)) means targetEntityId should match x.\n    * @param limit Limit number of events. Get all events if None or Some(-1)\n    * @param reversed Reverse the order.\n    *   - return oldest events first if None or Some(false) (default)\n    *   - return latest events first if Some(true)\n    * @param ec ExecutionContext\n    * @return Future[Iterator[Event]]\n    */\n  @DeveloperApi\n  def futureFind(\n      appId: Int,\n      channelId: Option[Int] = None,\n      startTime: Option[DateTime] = None,\n      untilTime: Option[DateTime] = None,\n      entityType: Option[String] = None,\n      entityId: Option[String] = None,\n      eventNames: Option[Seq[String]] = None,\n      targetEntityType: Option[Option[String]] = None,\n      targetEntityId: Option[Option[String]] = None,\n      limit: Option[Int] = None,\n      reversed: Option[Boolean] = None\n    )(implicit ec: ExecutionContext): Future[Iterator[Event]]\n\n  /** Aggregate properties of entities based on these special events:\n    * \\$set, \\$unset, \\$delete events.\n    * and returns a Future of Map of entityId to properties.\n    *\n    * @param appId use events of this app ID\n    * @param channelId use events of this channel ID (default channel if it's None)\n    * @param entityType aggregate properties of the entities of this entityType\n    * @param startTime use events with eventTime >= startTime\n    * @param untilTime use events with eventTime < untilTime\n    * @param required only keep entities with these required properties defined\n    * @param ec ExecutionContext\n    * @return Future[Map[String, PropertyMap]]\n    */\n  private[predictionio] def futureAggregateProperties(\n    appId: Int,\n    channelId: Option[Int] = None,\n    entityType: String,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    required: Option[Seq[String]] = None)(implicit ec: ExecutionContext):\n    Future[Map[String, PropertyMap]] = {\n      futureFind(\n        appId = appId,\n        channelId = channelId,\n        startTime = startTime,\n        untilTime = untilTime,\n        entityType = Some(entityType),\n        eventNames = Some(LEventAggregator.eventNames)\n      ).map{ eventIt =>\n        val dm = LEventAggregator.aggregateProperties(eventIt)\n        if (required.isDefined) {\n          dm.filter { case (k, v) =>\n            required.get.map(v.contains(_)).reduce(_ && _)\n          }\n        } else dm\n      }\n    }\n\n  /**\n    * :: Experimental ::\n    *\n    * Aggregate properties of the specified entity (entityType + entityId)\n    * based on these special events:\n    * \\$set, \\$unset, \\$delete events.\n    * and returns a Future of Option[PropertyMap]\n    *\n    * @param appId use events of this app ID\n    * @param channelId use events of this channel ID (default channel if it's None)\n    * @param entityType the entityType\n    * @param entityId the entityId\n    * @param startTime use events with eventTime >= startTime\n    * @param untilTime use events with eventTime < untilTime\n    * @param ec ExecutionContext\n    * @return Future[Option[PropertyMap]]\n    */\n  @Experimental\n  private[predictionio] def futureAggregatePropertiesOfEntity(\n    appId: Int,\n    channelId: Option[Int] = None,\n    entityType: String,\n    entityId: String,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None)(implicit ec: ExecutionContext):\n    Future[Option[PropertyMap]] = {\n      futureFind(\n        appId = appId,\n        channelId = channelId,\n        startTime = startTime,\n        untilTime = untilTime,\n        entityType = Some(entityType),\n        entityId = Some(entityId),\n        eventNames = Some(LEventAggregator.eventNames)\n      ).map{ eventIt =>\n        LEventAggregator.aggregatePropertiesSingle(eventIt)\n      }\n    }\n\n  // following is blocking\n  private[predictionio] def insert(event: Event, appId: Int,\n    channelId: Option[Int] = None,\n    timeout: Duration = defaultTimeout)(implicit ec: ExecutionContext):\n    String = {\n    Await.result(futureInsert(event, appId, channelId), timeout)\n  }\n\n  private[predictionio] def get(eventId: String, appId: Int,\n    channelId: Option[Int] = None,\n    timeout: Duration = defaultTimeout)(implicit ec: ExecutionContext):\n    Option[Event] = {\n    Await.result(futureGet(eventId, appId, channelId), timeout)\n  }\n\n  private[predictionio] def delete(eventId: String, appId: Int,\n    channelId: Option[Int] = None,\n    timeout: Duration = defaultTimeout)(implicit ec: ExecutionContext):\n    Boolean = {\n    Await.result(futureDelete(eventId, appId, channelId), timeout)\n  }\n\n  /** reads from database and returns events iterator.\n    *\n    * @param appId return events of this app ID\n    * @param channelId return events of this channel ID (default channel if it's None)\n    * @param startTime return events with eventTime >= startTime\n    * @param untilTime return events with eventTime < untilTime\n    * @param entityType return events of this entityType\n    * @param entityId return events of this entityId\n    * @param eventNames return events with any of these event names.\n    * @param targetEntityType return events of this targetEntityType:\n    *   - None means no restriction on targetEntityType\n    *   - Some(None) means no targetEntityType for this event\n    *   - Some(Some(x)) means targetEntityType should match x.\n    * @param targetEntityId return events of this targetEntityId\n    *   - None means no restriction on targetEntityId\n    *   - Some(None) means no targetEntityId for this event\n    *   - Some(Some(x)) means targetEntityId should match x.\n    * @param limit Limit number of events. Get all events if None or Some(-1)\n    * @param reversed Reverse the order (should be used with both\n    *   targetEntityType and targetEntityId specified)\n    *   - return oldest events first if None or Some(false) (default)\n    *   - return latest events first if Some(true)\n    * @param ec ExecutionContext\n    * @return Iterator[Event]\n    */\n  private[predictionio] def find(\n    appId: Int,\n    channelId: Option[Int] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    entityType: Option[String] = None,\n    entityId: Option[String] = None,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None,\n    limit: Option[Int] = None,\n    reversed: Option[Boolean] = None,\n    timeout: Duration = defaultTimeout)(implicit ec: ExecutionContext):\n    Iterator[Event] = {\n      Await.result(futureFind(\n        appId = appId,\n        channelId = channelId,\n        startTime = startTime,\n        untilTime = untilTime,\n        entityType = entityType,\n        entityId = entityId,\n        eventNames = eventNames,\n        targetEntityType = targetEntityType,\n        targetEntityId = targetEntityId,\n        limit = limit,\n        reversed = reversed), timeout)\n  }\n\n  // NOTE: remove in next release\n  @deprecated(\"Use find() instead.\", \"0.9.2\")\n  private[predictionio] def findLegacy(\n    appId: Int,\n    channelId: Option[Int] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    entityType: Option[String] = None,\n    entityId: Option[String] = None,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None,\n    limit: Option[Int] = None,\n    reversed: Option[Boolean] = None,\n    timeout: Duration = defaultTimeout)(implicit ec: ExecutionContext):\n    Either[StorageError, Iterator[Event]] = {\n      try {\n        // return Either for legacy usage\n        Right(Await.result(futureFind(\n          appId = appId,\n          channelId = channelId,\n          startTime = startTime,\n          untilTime = untilTime,\n          entityType = entityType,\n          entityId = entityId,\n          eventNames = eventNames,\n          targetEntityType = targetEntityType,\n          targetEntityId = targetEntityId,\n          limit = limit,\n          reversed = reversed), timeout))\n      } catch {\n        case e: TimeoutException => Left(StorageError(s\"${e}\"))\n        case e: Exception => Left(StorageError(s\"${e}\"))\n      }\n  }\n\n  /** reads events of the specified entity.\n    *\n    * @param appId return events of this app ID\n    * @param channelId return events of this channel ID (default channel if it's None)\n    * @param entityType return events of this entityType\n    * @param entityId return events of this entityId\n    * @param eventNames return events with any of these event names.\n    * @param targetEntityType return events of this targetEntityType:\n    *   - None means no restriction on targetEntityType\n    *   - Some(None) means no targetEntityType for this event\n    *   - Some(Some(x)) means targetEntityType should match x.\n    * @param targetEntityId return events of this targetEntityId\n    *   - None means no restriction on targetEntityId\n    *   - Some(None) means no targetEntityId for this event\n    *   - Some(Some(x)) means targetEntityId should match x.\n    * @param startTime return events with eventTime >= startTime\n    * @param untilTime return events with eventTime < untilTime\n    * @param limit Limit number of events. Get all events if None or Some(-1)\n    * @param latest Return latest event first (default true)\n    * @param ec ExecutionContext\n    * @return Either[StorageError, Iterator[Event]]\n    */\n  // NOTE: remove this function in next release\n  @deprecated(\"Use LEventStore.findByEntity() instead.\", \"0.9.2\")\n  def findSingleEntity(\n    appId: Int,\n    channelId: Option[Int] = None,\n    entityType: String,\n    entityId: String,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    limit: Option[Int] = None,\n    latest: Boolean = true,\n    timeout: Duration = defaultTimeout)(implicit ec: ExecutionContext):\n    Either[StorageError, Iterator[Event]] = {\n\n    findLegacy(\n      appId = appId,\n      channelId = channelId,\n      startTime = startTime,\n      untilTime = untilTime,\n      entityType = Some(entityType),\n      entityId = Some(entityId),\n      eventNames = eventNames,\n      targetEntityType = targetEntityType,\n      targetEntityId = targetEntityId,\n      limit = limit,\n      reversed = Some(latest),\n      timeout = timeout)\n\n  }\n\n  /** Aggregate properties of entities based on these special events:\n    * \\$set, \\$unset, \\$delete events.\n    * and returns a Map of entityId to properties.\n    *\n    * @param appId use events of this app ID\n    * @param channelId use events of this channel ID (default channel if it's None)\n    * @param entityType aggregate properties of the entities of this entityType\n    * @param startTime use events with eventTime >= startTime\n    * @param untilTime use events with eventTime < untilTime\n    * @param required only keep entities with these required properties defined\n    * @param ec ExecutionContext\n    * @return Map[String, PropertyMap]\n    */\n  private[predictionio] def aggregateProperties(\n    appId: Int,\n    channelId: Option[Int] = None,\n    entityType: String,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    required: Option[Seq[String]] = None,\n    timeout: Duration = defaultTimeout)(implicit ec: ExecutionContext):\n    Map[String, PropertyMap] = {\n    Await.result(futureAggregateProperties(\n      appId = appId,\n      channelId = channelId,\n      entityType = entityType,\n      startTime = startTime,\n      untilTime = untilTime,\n      required = required), timeout)\n  }\n\n  /**\n    * :: Experimental ::\n    *\n    * Aggregate properties of the specified entity (entityType + entityId)\n    * based on these special events:\n    * \\$set, \\$unset, \\$delete events.\n    * and returns Option[PropertyMap]\n    *\n    * @param appId use events of this app ID\n    * @param channelId use events of this channel ID\n    * @param entityType the entityType\n    * @param entityId the entityId\n    * @param startTime use events with eventTime >= startTime\n    * @param untilTime use events with eventTime < untilTime\n    * @param ec ExecutionContext\n    * @return Future[Option[PropertyMap]]\n    */\n  @Experimental\n  private[predictionio] def aggregatePropertiesOfEntity(\n    appId: Int,\n    channelId: Option[Int] = None,\n    entityType: String,\n    entityId: String,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    timeout: Duration = defaultTimeout)(implicit ec: ExecutionContext):\n    Option[PropertyMap] = {\n\n    Await.result(futureAggregatePropertiesOfEntity(\n      appId = appId,\n      channelId = channelId,\n      entityType = entityType,\n      entityId = entityId,\n      startTime = startTime,\n      untilTime = untilTime), timeout)\n  }\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/Models.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport com.google.common.io.BaseEncoding\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.json4s._\n\n/** :: DeveloperApi ::\n  * Stores model for each engine instance\n  *\n  * @param id ID of the model, which should be the same as engine instance ID\n  * @param models Trained models of all algorithms\n  * @group Model Data\n  */\n@DeveloperApi\ncase class Model(\n  id: String,\n  models: Array[Byte])\n\n/** :: DeveloperApi ::\n  * Base trait for of the [[Model]] data access object\n  *\n  * @group Model Data\n  */\n@DeveloperApi\ntrait Models {\n  /** Insert a new [[Model]] */\n  def insert(i: Model): Unit\n\n  /** Get a [[Model]] by ID */\n  def get(id: String): Option[Model]\n\n  /** Delete a [[Model]] */\n  def delete(id: String): Unit\n}\n\n/** :: DeveloperApi ::\n  * JSON4S serializer for [[Model]]\n  *\n  * @group Model Data\n  */\n@DeveloperApi\nclass ModelSerializer extends CustomSerializer[Model](\n  format => ({\n    case JObject(fields) =>\n      implicit val formats = DefaultFormats\n      val seed = Model(\n          id = \"\",\n          models = Array[Byte]())\n      fields.foldLeft(seed) { case (i, field) =>\n        field match {\n          case JField(\"id\", JString(id)) => i.copy(id = id)\n          case JField(\"models\", JString(models)) =>\n            i.copy(models = BaseEncoding.base64.decode(models))\n          case _ => i\n        }\n      }\n  },\n  {\n    case i: Model =>\n      JObject(\n        JField(\"id\", JString(i.id)) ::\n        JField(\"models\", JString(BaseEncoding.base64.encode(i.models))) ::\n        Nil)\n  }\n))\n\n// Use where models are saved outside the usual methods in pio\ncase class NullModel()\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/PEventAggregator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport org.joda.time.DateTime\n\nimport org.json4s.JValue\n\nimport org.apache.spark.rdd.RDD\n\n// each JValue data associated with the time it is set\nprivate[predictionio] case class PropTime(val d: JValue, val t: Long)\n    extends Serializable\n\nprivate[predictionio] case class SetProp (\n  val fields: Map[String, PropTime],\n  // last set time. Note: fields could be empty with valid set time\n  val t: Long) extends Serializable {\n\n  def ++ (that: SetProp): SetProp = {\n    val commonKeys = fields.keySet.intersect(that.fields.keySet)\n\n    val common: Map[String, PropTime] = commonKeys.map { k =>\n      val thisData = this.fields(k)\n      val thatData = that.fields(k)\n      // only keep the value with latest time\n      val v = if (thisData.t > thatData.t) thisData else thatData\n      (k, v)\n    }.toMap\n\n    val combinedFields = common ++\n      (this.fields -- commonKeys) ++ (that.fields -- commonKeys)\n\n    // keep the latest set time\n    val combinedT = if (this.t > that.t) this.t else that.t\n\n    SetProp(\n      fields = combinedFields,\n      t = combinedT\n    )\n  }\n}\n\nprivate[predictionio] case class UnsetProp (fields: Map[String, Long])\n    extends Serializable {\n  def ++ (that: UnsetProp): UnsetProp = {\n    val commonKeys = fields.keySet.intersect(that.fields.keySet)\n\n    val common: Map[String, Long] = commonKeys.map { k =>\n      val thisData = this.fields(k)\n      val thatData = that.fields(k)\n      // only keep the value with latest time\n      val v = if (thisData > thatData) thisData else thatData\n      (k, v)\n    }.toMap\n\n    val combinedFields = common ++\n      (this.fields -- commonKeys) ++ (that.fields -- commonKeys)\n\n    UnsetProp(\n      fields = combinedFields\n    )\n  }\n}\n\nprivate[predictionio] case class DeleteEntity (t: Long) extends Serializable {\n  def ++ (that: DeleteEntity): DeleteEntity = {\n    if (this.t > that.t) this else that\n  }\n}\n\nprivate[predictionio] case class EventOp (\n  val setProp: Option[SetProp] = None,\n  val unsetProp: Option[UnsetProp] = None,\n  val deleteEntity: Option[DeleteEntity] = None,\n  val firstUpdated: Option[DateTime] = None,\n  val lastUpdated: Option[DateTime] = None\n) extends Serializable {\n\n  def ++ (that: EventOp): EventOp = {\n    val firstUp = (this.firstUpdated ++ that.firstUpdated).reduceOption{\n      (a, b) => if (b.getMillis < a.getMillis) b else a\n    }\n    val lastUp = (this.lastUpdated ++ that.lastUpdated).reduceOption {\n      (a, b) => if (b.getMillis > a.getMillis) b else a\n    }\n\n    EventOp(\n      setProp = (setProp ++ that.setProp).reduceOption(_ ++ _),\n      unsetProp = (unsetProp ++ that.unsetProp).reduceOption(_ ++ _),\n      deleteEntity = (deleteEntity ++ that.deleteEntity).reduceOption(_ ++ _),\n      firstUpdated = firstUp,\n      lastUpdated = lastUp\n    )\n  }\n\n  def toPropertyMap(): Option[PropertyMap] = {\n    setProp.flatMap { set =>\n\n      val unsetKeys: Set[String] = unsetProp.map( unset =>\n        unset.fields.filter{ case (k, v) => (v >= set.fields(k).t) }.keySet\n      ).getOrElse(Set())\n\n      val combinedFields = deleteEntity.map { delete =>\n        if (delete.t >= set.t) {\n          None\n        } else {\n          val deleteKeys: Set[String] = set.fields\n            .filter { case (k, PropTime(kv, t)) =>\n              (delete.t >= t)\n            }.keySet\n          Some(set.fields -- unsetKeys -- deleteKeys)\n        }\n      }.getOrElse{\n        Some(set.fields -- unsetKeys)\n      }\n\n      // Note: mapValues() doesn't return concrete Map and causes\n      // NotSerializableException issue. Use map(identity) to work around this.\n      // see https://issues.scala-lang.org/browse/SI-7005\n      combinedFields.map{ f =>\n        require(firstUpdated.isDefined,\n          \"Unexpected Error: firstUpdated cannot be None.\")\n        require(lastUpdated.isDefined,\n          \"Unexpected Error: lastUpdated cannot be None.\")\n        PropertyMap(\n          fields = f.mapValues(_.d).map(identity),\n          firstUpdated = firstUpdated.get,\n          lastUpdated = lastUpdated.get\n        )\n      }\n    }\n  }\n\n}\n\nprivate[predictionio] object EventOp {\n  // create EventOp from Event object\n  def apply(e: Event): EventOp = {\n    val t = e.eventTime.getMillis\n    e.event match {\n      case \"$set\" => {\n        val fields = e.properties.fields.mapValues(jv =>\n          PropTime(jv, t)\n        ).map(identity)\n\n        EventOp(\n          setProp = Some(SetProp(fields = fields, t = t)),\n          firstUpdated = Some(e.eventTime),\n          lastUpdated = Some(e.eventTime)\n        )\n      }\n      case \"$unset\" => {\n        val fields = e.properties.fields.mapValues(jv => t).map(identity)\n        EventOp(\n          unsetProp = Some(UnsetProp(fields = fields)),\n          firstUpdated = Some(e.eventTime),\n          lastUpdated = Some(e.eventTime)\n        )\n      }\n      case \"$delete\" => {\n        EventOp(\n          deleteEntity = Some(DeleteEntity(t)),\n          firstUpdated = Some(e.eventTime),\n          lastUpdated = Some(e.eventTime)\n        )\n      }\n      case _ => {\n        EventOp()\n      }\n    }\n  }\n}\n\n\nprivate[predictionio] object PEventAggregator {\n\n  val eventNames = List(\"$set\", \"$unset\", \"$delete\")\n\n  def aggregateProperties(eventsRDD: RDD[Event]): RDD[(String, PropertyMap)] = {\n    eventsRDD\n      .map( e => (e.entityId, EventOp(e) ))\n      .aggregateByKey[EventOp](EventOp())(\n        // within same partition\n        seqOp = { case (u, v) => u ++ v },\n        // across partition\n        combOp = { case (accu, u) => accu ++ u }\n      )\n      .mapValues(_.toPropertyMap)\n      .filter{ case (k, v) => v.isDefined }\n      .map{ case (k, v) => (k, v.get) }\n  }\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/PEvents.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport grizzled.slf4j.Logger\nimport org.apache.predictionio.annotation.DeveloperApi\nimport org.apache.predictionio.annotation.Experimental\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\nimport org.joda.time.DateTime\n\nimport scala.reflect.ClassTag\n\n/** :: DeveloperApi ::\n  * Base trait of a data access object that returns [[Event]] related RDD data\n  * structure. Engine developers should use\n  * [[org.apache.predictionio.data.store.PEventStore]] instead of using this directly.\n  *\n  * @group Event Data\n  */\n@DeveloperApi\ntrait PEvents extends Serializable {\n  @transient protected lazy val logger = Logger[this.type]\n  @deprecated(\"Use PEventStore.find() instead.\", \"0.9.2\")\n  def getByAppIdAndTimeAndEntity(appId: Int,\n    startTime: Option[DateTime],\n    untilTime: Option[DateTime],\n    entityType: Option[String],\n    entityId: Option[String])(sc: SparkContext): RDD[Event] = {\n      find(\n        appId = appId,\n        startTime = startTime,\n        untilTime = untilTime,\n        entityType = entityType,\n        entityId = entityId,\n        eventNames = None\n      )(sc)\n    }\n\n  /** :: DeveloperApi ::\n    * Read from database and return the events. The deprecation here is intended\n    * to engine developers only.\n    *\n    * @param appId return events of this app ID\n    * @param channelId return events of this channel ID (default channel if it's None)\n    * @param startTime return events with eventTime >= startTime\n    * @param untilTime return events with eventTime < untilTime\n    * @param entityType return events of this entityType\n    * @param entityId return events of this entityId\n    * @param eventNames return events with any of these event names.\n    * @param targetEntityType return events of this targetEntityType:\n    *   - None means no restriction on targetEntityType\n    *   - Some(None) means no targetEntityType for this event\n    *   - Some(Some(x)) means targetEntityType should match x.\n    * @param targetEntityId return events of this targetEntityId\n    *   - None means no restriction on targetEntityId\n    *   - Some(None) means no targetEntityId for this event\n    *   - Some(Some(x)) means targetEntityId should match x.\n    * @param sc Spark context\n    * @return RDD[Event]\n    */\n  @deprecated(\"Use PEventStore.find() instead.\", \"0.9.2\")\n  @DeveloperApi\n  def find(\n    appId: Int,\n    channelId: Option[Int] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    entityType: Option[String] = None,\n    entityId: Option[String] = None,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None)(sc: SparkContext): RDD[Event]\n\n  /** Aggregate properties of entities based on these special events:\n    * \\$set, \\$unset, \\$delete events. The deprecation here is intended to\n    * engine developers only.\n    *\n    * @param appId use events of this app ID\n    * @param channelId use events of this channel ID (default channel if it's None)\n    * @param entityType aggregate properties of the entities of this entityType\n    * @param startTime use events with eventTime >= startTime\n    * @param untilTime use events with eventTime < untilTime\n    * @param required only keep entities with these required properties defined\n    * @param sc Spark context\n    * @return RDD[(String, PropertyMap)] RDD of entityId and PropertyMap pair\n    */\n  @deprecated(\"Use PEventStore.aggregateProperties() instead.\", \"0.9.2\")\n  def aggregateProperties(\n    appId: Int,\n    channelId: Option[Int] = None,\n    entityType: String,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    required: Option[Seq[String]] = None)\n    (sc: SparkContext): RDD[(String, PropertyMap)] = {\n    val eventRDD = find(\n      appId = appId,\n      channelId = channelId,\n      startTime = startTime,\n      untilTime = untilTime,\n      entityType = Some(entityType),\n      eventNames = Some(PEventAggregator.eventNames))(sc)\n\n    val dmRDD = PEventAggregator.aggregateProperties(eventRDD)\n\n    required map { r =>\n      dmRDD.filter { case (k, v) =>\n        r.map(v.contains(_)).reduce(_ && _)\n      }\n    } getOrElse dmRDD\n  }\n\n  /** :: Experimental ::\n    * Extract EntityMap[A] from events for the entityType\n    * NOTE: it is local EntityMap[A]\n    */\n  @deprecated(\"Use PEventStore.aggregateProperties() instead.\", \"0.9.2\")\n  @Experimental\n  def extractEntityMap[A: ClassTag](\n    appId: Int,\n    entityType: String,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    required: Option[Seq[String]] = None)\n    (sc: SparkContext)(extract: DataMap => A): EntityMap[A] = {\n    val idToData: Map[String, A] = aggregateProperties(\n      appId = appId,\n      entityType = entityType,\n      startTime = startTime,\n      untilTime = untilTime,\n      required = required\n    )(sc).map{ case (id, dm) =>\n      try {\n        (id, extract(dm))\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get extract entity from DataMap $dm of \" +\n            s\"entityId $id.\", e)\n          throw e\n        }\n      }\n    }.collectAsMap.toMap\n\n    new EntityMap(idToData)\n  }\n\n  /** :: DeveloperApi ::\n    * Write events to database\n    *\n    * @param events RDD of Event\n    * @param appId the app ID\n    * @param sc Spark Context\n    */\n  @DeveloperApi\n  def write(events: RDD[Event], appId: Int)(sc: SparkContext): Unit =\n    write(events, appId, None)(sc)\n\n  /** :: DeveloperApi ::\n    * Write events to database\n    *\n    * @param events RDD of Event\n    * @param appId the app ID\n    * @param channelId  channel ID (default channel if it's None)\n    * @param sc Spark Context\n    */\n  @DeveloperApi\n  def write(events: RDD[Event], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit\n\n  @DeveloperApi\n  def delete(eventIds: RDD[String], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/PropertyMap.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage\n\nimport org.joda.time.DateTime\n\nimport org.json4s.JValue\nimport org.json4s.JObject\nimport org.json4s.native.JsonMethods.parse\n\n/** A PropertyMap stores aggregated properties of the entity.\n  * Internally it is a Map\n  * whose keys are property names and values are corresponding JSON values\n  * respectively. Use the get() method to retrieve the value of mandatory\n  * property or use getOpt() to retrieve the value of the optional property.\n  *\n  * @param fields Map of property name to JValue\n  * @param firstUpdated first updated time of this PropertyMap\n  * @param lastUpdated last updated time of this PropertyMap\n  */\nclass PropertyMap(\n  fields: Map[String, JValue],\n  val firstUpdated: DateTime,\n  val lastUpdated: DateTime\n) extends DataMap(fields) {\n\n  override\n  def toString: String = s\"PropertyMap(${fields}, ${firstUpdated}, ${lastUpdated})\"\n\n  override\n  def hashCode: Int =\n    41 * (\n      41 * (\n        41 + fields.hashCode\n      ) + firstUpdated.hashCode\n    ) + lastUpdated.hashCode\n\n  override\n  def equals(other: Any): Boolean = other match {\n    case that: PropertyMap => {\n      (that.canEqual(this)) &&\n      (super.equals(that)) &&\n      (this.firstUpdated.equals(that.firstUpdated)) &&\n      (this.lastUpdated.equals(that.lastUpdated))\n    }\n    case that: DataMap => { // for testing purpose\n      super.equals(that)\n    }\n    case _ => false\n  }\n\n  override\n  def canEqual(other: Any): Boolean = other.isInstanceOf[PropertyMap]\n}\n\n/** Companion object of the [[PropertyMap]] class. */\nobject PropertyMap {\n\n  /** Create an PropertyMap from a Map of String to JValue,\n    * firstUpdated and lastUpdated time.\n    *\n    * @param fields a Map of String to JValue\n    * @param firstUpdated First updated time\n    * @param lastUpdated Last updated time\n    * @return a new PropertyMap\n    */\n  def apply(fields: Map[String, JValue],\n    firstUpdated: DateTime, lastUpdated: DateTime): PropertyMap =\n    new PropertyMap(fields, firstUpdated, lastUpdated)\n\n  /** Create an PropertyMap from a JSON String and firstUpdated and lastUpdated\n    * time.\n    * @param js JSON String. eg \"\"\"{ \"a\": 1, \"b\": \"foo\" }\"\"\"\n    * @param firstUpdated First updated time\n    * @param lastUpdated Last updated time\n    * @return a new PropertyMap\n    */\n  def apply(js: String, firstUpdated: DateTime, lastUpdated: DateTime)\n  : PropertyMap = apply(\n      fields = parse(js).asInstanceOf[JObject].obj.toMap,\n      firstUpdated = firstUpdated,\n      lastUpdated = lastUpdated\n    )\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/Storage.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport grizzled.slf4j.Logging\nimport org.apache.commons.lang3.exception.ExceptionUtils\nimport org.apache.predictionio.annotation.DeveloperApi\n\nimport scala.concurrent.ExecutionContext.Implicits.global\nimport scala.language.existentials\nimport scala.reflect.runtime.universe._\n\n/** :: DeveloperApi ::\n  * Any storage backend drivers will need to implement this trait with exactly\n  * '''StorageClient''' as the class name. PredictionIO storage layer will look\n  * for this class when it instantiates the actual backend for use by higher\n  * level storage access APIs.\n  *\n  * @group Storage System\n  */\n@DeveloperApi\ntrait BaseStorageClient {\n  /** Configuration of the '''StorageClient''' */\n  val config: StorageClientConfig\n\n  /** The actual client object. This could be a database connection or any kind\n    * of database access object.\n    */\n  val client: AnyRef\n\n  /** Set a prefix for storage class discovery. As an example, if this prefix\n    * is set as ''JDBC'', when the storage layer instantiates an implementation\n    * of [[Apps]], it will try to look for a class named ''JDBCApps''.\n    */\n  val prefix: String = \"\"\n}\n\n/** :: DeveloperApi ::\n  * A wrapper of storage client configuration that will be populated by\n  * PredictionIO automatically, and passed to the StorageClient during\n  * instantiation.\n  *\n  * @param parallel This is set to true by PredictionIO when the storage client\n  *                 is instantiated in a parallel data source.\n  * @param test This is set to true by PredictionIO when tests are being run.\n  * @param properties This is populated by PredictionIO automatically from\n  *                   environmental configuration variables. If you have these\n  *                   variables,\n  *                   - PIO_STORAGE_SOURCES_PGSQL_TYPE=jdbc\n  *                   - PIO_STORAGE_SOURCES_PGSQL_USERNAME=abc\n  *                   - PIO_STORAGE_SOURCES_PGSQL_PASSWORD=xyz\n  *\n  *                   this field will be filled as a map of string to string:\n  *                   - TYPE -> jdbc\n  *                   - USERNAME -> abc\n  *                   - PASSWORD -> xyz\n  *\n  * @group Storage System\n  */\n@DeveloperApi\ncase class StorageClientConfig(\n  parallel: Boolean = false, // parallelized access (RDD)?\n  test: Boolean = false, // test mode config\n  properties: Map[String, String] = Map.empty)\n\n/** :: DeveloperApi ::\n  * Thrown when a StorageClient runs into an exceptional condition\n  *\n  * @param message Exception error message\n  * @param cause The underlying exception that caused the exception\n  * @group Storage System\n  */\n@DeveloperApi\nclass StorageClientException(message: String, cause: Throwable)\n  extends RuntimeException(message, cause)\n\n/** DEPRECATED. Use [[StorageException]].\n  *\n  * @deprecated Use [[StorageException]]\n  */\nprivate[predictionio] case class StorageError(message: String)\n\n/** :: DeveloperApi ::\n  * Thrown by data access objects when they run into exceptional conditions\n  *\n  * @param message Exception error message\n  * @param cause The underlying exception that caused the exception\n  *\n  * @group Storage System\n  */\n@DeveloperApi\nclass StorageException(message: String, cause: Throwable)\n  extends Exception(message, cause) {\n\n  def this(message: String) = this(message, null)\n}\n\nclass EnvironmentService{\n\n  def envKeys(): Iterable[String] = {\n    sys.env.keys\n  }\n\n  def getByKey(key: String): String = {\n    sys.env(key)\n  }\n\n  def filter(filterExpression: ((String, String)) => Boolean): Map[String, String] = {\n    sys.env.filter(filterExpression)\n  }\n}\n\nobject EnvironmentFactory{\n\n  var environmentService: Option[EnvironmentService] = None\n\n  def create(): EnvironmentService = {\n    if(environmentService.isEmpty){\n      environmentService = new Some[EnvironmentService](new EnvironmentService)\n    }\n    environmentService.get\n  }\n}\n\n/** Backend-agnostic data storage layer with lazy initialization. Use this\n  * object when you need to interface with Event Store in your engine.\n  *\n  * @group Storage System\n  */\nobject Storage extends Logging {\n  private case class ClientMeta(\n    sourceType: String,\n    client: BaseStorageClient,\n    config: StorageClientConfig)\n\n  var environmentService: EnvironmentService = EnvironmentFactory.create\n\n  private case class DataObjectMeta(sourceName: String, namespace: String)\n\n  private var errors = 0\n\n  private val sourcesPrefix = \"PIO_STORAGE_SOURCES\"\n\n  private val sourceTypesRegex = \"\"\"PIO_STORAGE_SOURCES_([^_]+)_TYPE\"\"\".r\n\n  private val sourceKeys: Seq[String] = environmentService.envKeys.toSeq.flatMap { k =>\n    sourceTypesRegex findFirstIn k match {\n      case Some(sourceTypesRegex(sourceType)) => Seq(sourceType)\n      case None => Nil\n    }\n  }\n\n  if (sourceKeys.size == 0) warn(\"There is no properly configured data source.\")\n\n  private val s2cm = scala.collection.mutable.Map[String, Option[ClientMeta]]()\n\n  /** Reference to the app data repository. */\n  private val EventDataRepository = \"EVENTDATA\"\n  private val ModelDataRepository = \"MODELDATA\"\n  private val MetaDataRepository = \"METADATA\"\n\n  private val repositoriesPrefix = \"PIO_STORAGE_REPOSITORIES\"\n\n  private val repositoryNamesRegex =\n    \"\"\"PIO_STORAGE_REPOSITORIES_([^_]+)_NAME\"\"\".r\n\n  private val repositoryKeys: Seq[String] = environmentService.envKeys.toSeq.flatMap { k =>\n    repositoryNamesRegex findFirstIn k match {\n      case Some(repositoryNamesRegex(repositoryName)) => Seq(repositoryName)\n      case None => Nil\n    }\n  }\n\n  if (repositoryKeys.size == 0) {\n    warn(\"There is no properly configured repository.\")\n  }\n\n  private val requiredRepositories = Seq(MetaDataRepository)\n\n  requiredRepositories foreach { r =>\n    if (!repositoryKeys.contains(r)) {\n      error(s\"Required repository (${r}) configuration is missing.\")\n      errors += 1\n    }\n  }\n  private val repositoriesToDataObjectMeta: Map[String, DataObjectMeta] =\n    repositoryKeys.map(r =>\n      try {\n        val keyedPath = repositoriesPrefixPath(r)\n        val name = environmentService.getByKey(prefixPath(keyedPath, \"NAME\"))\n        val sourceName = environmentService.getByKey(prefixPath(keyedPath, \"SOURCE\"))\n        if (sourceKeys.contains(sourceName)) {\n          r -> DataObjectMeta(\n            sourceName = sourceName,\n            namespace = name)\n        } else {\n          error(s\"$sourceName is not a configured storage source.\")\n          r -> DataObjectMeta(\"\", \"\")\n        }\n      } catch {\n        case e: Throwable =>\n          val stackTrace = ExceptionUtils.getStackTrace(e)\n          error(s\"${e.getMessage}\\n${stackTrace}\\n\\n\")\n          errors += 1\n          r -> DataObjectMeta(\"\", \"\")\n      }\n    ).toMap\n\n  if (errors > 0) {\n    error(s\"There were $errors configuration errors. Exiting.\")\n    sys.exit(errors)\n  }\n\n  // End of constructor and field definitions and begin method definitions\n\n  private def prefixPath(prefix: String, body: String) = s\"${prefix}_$body\"\n\n  private def sourcesPrefixPath(body: String) = prefixPath(sourcesPrefix, body)\n\n  private def repositoriesPrefixPath(body: String) =\n    prefixPath(repositoriesPrefix, body)\n\n  private def sourcesToClientMeta(\n      source: String,\n      parallel: Boolean,\n      test: Boolean): Option[ClientMeta] = {\n    val sourceName = if (parallel) s\"parallel-$source\" else source\n    s2cm.getOrElseUpdate(sourceName, updateS2CM(source, parallel, test))\n  }\n\n  private def getClient(\n    clientConfig: StorageClientConfig,\n    pkg: String): BaseStorageClient = {\n    val className = \"org.apache.predictionio.data.storage.\" + pkg + \".StorageClient\"\n    try {\n      Class.forName(className).getConstructors()(0).newInstance(clientConfig).\n        asInstanceOf[BaseStorageClient]\n    } catch {\n      case e: ClassNotFoundException =>\n        val originalClassName = pkg + \".StorageClient\"\n        Class.forName(originalClassName).getConstructors()(0).\n          newInstance(clientConfig).asInstanceOf[BaseStorageClient]\n      case e: java.lang.reflect.InvocationTargetException =>\n        throw e.getCause\n    }\n  }\n\n  /** Get the StorageClient config data from PIO Framework's environment variables */\n  def getConfig(sourceName: String): Option[StorageClientConfig] = {\n    if (s2cm.contains(sourceName) && s2cm.get(sourceName).nonEmpty\n      && s2cm.get(sourceName).get.nonEmpty) {\n      Some(s2cm.get(sourceName).get.get.config)\n    } else None\n  }\n\n  private def updateS2CM(k: String, parallel: Boolean, test: Boolean):\n  Option[ClientMeta] = {\n    try {\n      val keyedPath = sourcesPrefixPath(k)\n      val sourceType = environmentService.getByKey(prefixPath(keyedPath, \"TYPE\"))\n      val props = environmentService.filter(t => t._1.startsWith(keyedPath)).map(\n        t => t._1.replace(s\"${keyedPath}_\", \"\") -> t._2)\n      val clientConfig = StorageClientConfig(\n        properties = props,\n        parallel = parallel,\n        test = test)\n      val client = getClient(clientConfig, sourceType)\n      Some(ClientMeta(sourceType, client, clientConfig))\n    } catch {\n      case e: Throwable =>\n        val stackTrace = ExceptionUtils.getStackTrace(e)\n        error(s\"Error initializing storage client for source ${k}.\\n\" +\n          s\"${stackTrace}\\n\\n\")\n        errors += 1\n        None\n    }\n  }\n\n  private[predictionio]\n  def getDataObjectFromRepo[T](repo: String, test: Boolean = false)\n    (implicit tag: TypeTag[T]): T = {\n    val repoDOMeta = repositoriesToDataObjectMeta(repo)\n    val repoDOSourceName = repoDOMeta.sourceName\n    getDataObject[T](repoDOSourceName, repoDOMeta.namespace, test = test)\n  }\n\n  private[predictionio]\n  def getPDataObject[T](repo: String)(implicit tag: TypeTag[T]): T = {\n    val repoDOMeta = repositoriesToDataObjectMeta(repo)\n    val repoDOSourceName = repoDOMeta.sourceName\n    getPDataObject[T](repoDOSourceName, repoDOMeta.namespace)\n  }\n\n  private[predictionio] def getDataObject[T](\n      sourceName: String,\n      namespace: String,\n      parallel: Boolean = false,\n      test: Boolean = false)(implicit tag: TypeTag[T]): T = {\n    val clientMeta = sourcesToClientMeta(sourceName, parallel, test) getOrElse {\n      throw new StorageClientException(\n        s\"Data source $sourceName was not properly initialized.\", null)\n    }\n    val sourceType = clientMeta.sourceType\n    val ctorArgs = dataObjectCtorArgs(clientMeta.client, namespace)\n    val classPrefix = clientMeta.client.prefix\n    val originalClassName = tag.tpe.toString.split('.')\n    val rawClassName = sourceType + \".\" + classPrefix + originalClassName.last\n    val className = \"org.apache.predictionio.data.storage.\" + rawClassName\n    val clazz = try {\n      Class.forName(className)\n    } catch {\n      case e: ClassNotFoundException =>\n        try {\n          Class.forName(rawClassName)\n        } catch {\n          case e: ClassNotFoundException =>\n            throw new StorageClientException(\"No storage backend \" +\n              \"implementation can be found (tried both \" +\n              s\"$className and $rawClassName)\", e)\n        }\n    }\n    val constructor = clazz.getConstructors()(0)\n    try {\n      constructor.newInstance(ctorArgs: _*).\n        asInstanceOf[T]\n    } catch {\n      case e: IllegalArgumentException =>\n        error(\n          \"Unable to instantiate data object with class '\" +\n          constructor.getDeclaringClass.getName + \" because its constructor\" +\n          \" does not have the right number of arguments.\" +\n          \" Number of required constructor arguments: \" +\n          ctorArgs.size + \".\" +\n          \" Number of existing constructor arguments: \" +\n          constructor.getParameterTypes.size + \".\" +\n          s\" Storage source name: ${sourceName}.\" +\n          s\" Exception message: ${e.getMessage}).\", e)\n        errors += 1\n        throw e\n      case e: java.lang.reflect.InvocationTargetException =>\n        throw e.getCause\n    }\n  }\n\n  private def getPDataObject[T](\n      sourceName: String,\n      databaseName: String)(implicit tag: TypeTag[T]): T =\n    getDataObject[T](sourceName, databaseName, true)\n\n  private def dataObjectCtorArgs(\n      client: BaseStorageClient,\n      namespace: String): Seq[AnyRef] = {\n    Seq(client.client, client.config, namespace)\n  }\n\n  private[predictionio] def verifyAllDataObjects(): Unit = {\n    info(\"Verifying Meta Data Backend (Source: \" +\n      s\"${repositoriesToDataObjectMeta(MetaDataRepository).sourceName})...\")\n    getMetaDataEngineInstances()\n    getMetaDataEvaluationInstances()\n    getMetaDataApps()\n    getMetaDataAccessKeys()\n    info(\"Verifying Model Data Backend (Source: \" +\n      s\"${repositoriesToDataObjectMeta(ModelDataRepository).sourceName})...\")\n    getModelDataModels()\n    info(\"Verifying Event Data Backend (Source: \" +\n      s\"${repositoriesToDataObjectMeta(EventDataRepository).sourceName})...\")\n    val eventsDb = getLEvents(test = true)\n    info(\"Test writing to Event Store (App Id 0)...\")\n    // use appId=0 for testing purpose\n    eventsDb.init(0)\n    eventsDb.insert(Event(\n      event = \"test\",\n      entityType = \"test\",\n      entityId = \"test\"), 0)\n    eventsDb.remove(0)\n    eventsDb.close()\n  }\n\n  /** :: DeveloperApi ::\n    * Get a data access object for [[EngineInstances]]\n    *\n    * @return An implementation of [[EngineInstances]], depending on the runtime configuration\n    */\n  def getMetaDataEngineInstances(): EngineInstances =\n    getDataObjectFromRepo[EngineInstances](MetaDataRepository)\n\n  /** :: DeveloperApi ::\n    * Get a data access object for [[EvaluationInstances]]\n    *\n    * @return An implementation of [[EvaluationInstances]], depending on the runtime configuration\n    */\n  def getMetaDataEvaluationInstances(): EvaluationInstances =\n    getDataObjectFromRepo[EvaluationInstances](MetaDataRepository)\n\n  /** :: DeveloperApi ::\n    * Get a data access object for [[Apps]]\n    *\n    * @return An implementation of [[Apps]], depending on the runtime configuration\n    */\n  def getMetaDataApps(): Apps =\n    getDataObjectFromRepo[Apps](MetaDataRepository)\n\n  /** :: DeveloperApi ::\n    * Get a data access object for [[AccessKeys]]\n    *\n    * @return An implementation of [[AccessKeys]], depending on the runtime configuration\n    */\n  def getMetaDataAccessKeys(): AccessKeys =\n    getDataObjectFromRepo[AccessKeys](MetaDataRepository)\n\n  /** :: DeveloperApi ::\n    * Get a data access object for [[Channels]]\n    *\n    * @return An implementation of [[Channels]], depending on the runtime configuration\n    */\n  def getMetaDataChannels(): Channels =\n    getDataObjectFromRepo[Channels](MetaDataRepository)\n\n  /** :: DeveloperApi ::\n    * Get a data access object for [[Models]]\n    *\n    * @return An implementation of [[Models]], depending on the runtime configuration\n    */\n  def getModelDataModels(): Models =\n    getDataObjectFromRepo[Models](ModelDataRepository)\n\n  /** Obtains a data access object that returns [[Event]] related local data\n    * structure.\n    */\n  def getLEvents(test: Boolean = false): LEvents =\n    getDataObjectFromRepo[LEvents](EventDataRepository, test = test)\n\n  /** Obtains a data access object that returns [[Event]] related RDD data\n    * structure.\n    */\n  def getPEvents(): PEvents =\n    getPDataObject[PEvents](EventDataRepository)\n\n  def config: Map[String, Map[String, Map[String, String]]] = Map(\n    \"sources\" -> s2cm.toMap.map { case (source, clientMeta) =>\n      source -> clientMeta.map { cm =>\n        Map(\n          \"type\" -> cm.sourceType,\n          \"config\" -> cm.config.properties.map(t => s\"${t._1} -> ${t._2}\").mkString(\", \")\n        )\n      }.getOrElse(Map.empty)\n    }\n  )\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/Utils.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport org.joda.time.DateTime\nimport org.joda.time.format.ISODateTimeFormat\n\n/** Backend-agnostic storage utilities. */\nprivate[predictionio] object Utils {\n\n  /** Add prefix to custom attribute keys. */\n  def addPrefixToAttributeKeys[T](\n      attributes: Map[String, T],\n      prefix: String = \"ca_\"): Map[String, T] = {\n    attributes map { case (k, v) => (prefix + k, v) }\n  }\n\n  /** Remove prefix from custom attribute keys. */\n  def removePrefixFromAttributeKeys[T](\n      attributes: Map[String, T],\n      prefix: String = \"ca_\"): Map[String, T] = {\n    attributes map { case (k, v) => (k.stripPrefix(prefix), v) }\n  }\n\n  /** Appends App ID to any ID.\n    * Used for distinguishing different app's data within a single collection.\n    */\n  def idWithAppid(appid: Int, id: String): String = appid + \"_\" + id\n\n  def stringToDateTime(dt: String): DateTime =\n    ISODateTimeFormat.dateTimeParser.parseDateTime(dt)\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/storage/package.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data\n\n/** If you are an engine developer, please refer to the [[store]] package.\n  *\n  * This package provides convenient access to underlying data access objects.\n  * The common entry point is [[Storage]].\n  *\n  * Developer APIs are available to advanced developers to add support of other\n  * data store backends.\n  */\npackage object storage {}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/store/Common.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.store\n\nimport org.apache.predictionio.data.storage.Storage\nimport scala.collection.mutable\nimport grizzled.slf4j.Logger\n\nprivate[predictionio] object Common {\n\n  @transient lazy val logger = Logger[this.type]\n  @transient lazy private val appsDb = Storage.getMetaDataApps()\n  @transient lazy private val channelsDb = Storage.getMetaDataChannels()\n  // Memoize app & channel name-to-ID resolution to avoid excessive storage IO\n  @transient lazy val appNameToIdCache =\n    mutable.Map[(String, Option[String]), (Int, Option[Int])]()\n\n  /* throw exception if invalid app name or channel name */\n  def appNameToId(appName: String, channelName: Option[String]): (Int, Option[Int]) = {\n    appNameToIdCache.getOrElseUpdate((appName, channelName), {\n      val appOpt = appsDb.getByName(appName)\n\n      appOpt.map { app =>\n        val channelMap: Map[String, Int] = channelsDb.getByAppid(app.id)\n          .map(c => (c.name, c.id)).toMap\n\n        val channelId: Option[Int] = channelName.map { ch =>\n          if (channelMap.contains(ch)) {\n            channelMap(ch)\n          } else {\n            logger.error(s\"Invalid channel name ${ch}.\")\n            throw new IllegalArgumentException(s\"Invalid channel name ${ch}.\")\n          }\n        }\n\n        appNameToIdCache((appName, channelName)) = (app.id, channelId)\n        (app.id, channelId)\n      }.getOrElse {\n        logger.error(s\"Invalid app name ${appName}\")\n        throw new IllegalArgumentException(s\"Invalid app name ${appName}\")\n      }\n    })\n  }\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/store/LEventStore.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.store\n\nimport org.apache.predictionio.data.storage.Storage\nimport org.apache.predictionio.data.storage.Event\nimport org.joda.time.DateTime\n\nimport scala.concurrent.{Await, ExecutionContext, Future}\nimport scala.concurrent.duration.Duration\n\n/** This object provides a set of operation to access Event Store\n  * without going through Spark's parallelization.\n  *\n  * Note that blocking methods of this object uses\n  * `scala.concurrent.ExecutionContext.Implicits.global` internally.\n  * Since this is a thread pool which has a number of threads equal to available\n  * processors, parallelism is limited up to the number of processors.\n  *\n  * If this limitation become bottleneck of resource usage, you can increase the\n  * number of threads by declaring following VM options before calling \"pio deploy\":\n  *\n  * <pre>\n  * export JAVA_OPTS=\"$JAVA_OPTS \\\n  *   -Dscala.concurrent.context.numThreads=1000 \\\n  *   -Dscala.concurrent.context.maxThreads=1000\"\n  * </pre>\n  *\n  * You can learn more about the global execution context in the Scala documentation:\n  * [[https://docs.scala-lang.org/overviews/core/futures.html#the-global-execution-context]]\n  */\nobject LEventStore {\n\n  private val defaultTimeout = Duration(60, \"seconds\")\n\n  @transient lazy private val eventsDb = Storage.getLEvents()\n\n  /** Reads events of the specified entity. May use this in Algorithm's predict()\n    * or Serving logic to have fast event store access.\n    *\n    * @param appName return events of this app\n    * @param entityType return events of this entityType\n    * @param entityId return events of this entityId\n    * @param channelName return events of this channel (default channel if it's None)\n    * @param eventNames return events with any of these event names.\n    * @param targetEntityType return events of this targetEntityType:\n    *   - None means no restriction on targetEntityType\n    *   - Some(None) means no targetEntityType for this event\n    *   - Some(Some(x)) means targetEntityType should match x.\n    * @param targetEntityId return events of this targetEntityId\n    *   - None means no restriction on targetEntityId\n    *   - Some(None) means no targetEntityId for this event\n    *   - Some(Some(x)) means targetEntityId should match x.\n    * @param startTime return events with eventTime >= startTime\n    * @param untilTime return events with eventTime < untilTime\n    * @param limit Limit number of events. Get all events if None or Some(-1)\n    * @param latest Return latest event first (default true)\n    * @return Iterator[Event]\n    */\n  def findByEntity(\n    appName: String,\n    entityType: String,\n    entityId: String,\n    channelName: Option[String] = None,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    limit: Option[Int] = None,\n    latest: Boolean = true,\n    timeout: Duration = defaultTimeout): Iterator[Event] = {\n\n    // Import here to ensure ExecutionContext.Implicits.global is used only in this method\n    import scala.concurrent.ExecutionContext.Implicits.global\n\n    Await.result(findByEntityAsync(\n      appName = appName,\n      entityType = entityType,\n      entityId = entityId,\n      channelName = channelName,\n      eventNames = eventNames,\n      targetEntityType = targetEntityType,\n      targetEntityId = targetEntityId,\n      startTime = startTime,\n      untilTime = untilTime,\n      limit = limit,\n      latest = latest),\n      timeout)\n  }\n\n  /** Reads events of the specified entity. May use this in Algorithm's predict()\n    * or Serving logic to have fast event store access.\n    *\n    * @param appName return events of this app\n    * @param entityType return events of this entityType\n    * @param entityId return events of this entityId\n    * @param channelName return events of this channel (default channel if it's None)\n    * @param eventNames return events with any of these event names.\n    * @param targetEntityType return events of this targetEntityType:\n    *   - None means no restriction on targetEntityType\n    *   - Some(None) means no targetEntityType for this event\n    *   - Some(Some(x)) means targetEntityType should match x.\n    * @param targetEntityId return events of this targetEntityId\n    *   - None means no restriction on targetEntityId\n    *   - Some(None) means no targetEntityId for this event\n    *   - Some(Some(x)) means targetEntityId should match x.\n    * @param startTime return events with eventTime >= startTime\n    * @param untilTime return events with eventTime < untilTime\n    * @param limit Limit number of events. Get all events if None or Some(-1)\n    * @param latest Return latest event first (default true)\n    * @return Future[Iterator[Event]]\n    */\n  def findByEntityAsync(\n    appName: String,\n    entityType: String,\n    entityId: String,\n    channelName: Option[String] = None,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    limit: Option[Int] = None,\n    latest: Boolean = true)(implicit ec: ExecutionContext): Future[Iterator[Event]] = {\n\n    val (appId, channelId) = Common.appNameToId(appName, channelName)\n\n    eventsDb.futureFind(\n      appId = appId,\n      channelId = channelId,\n      startTime = startTime,\n      untilTime = untilTime,\n      entityType = Some(entityType),\n      entityId = Some(entityId),\n      eventNames = eventNames,\n      targetEntityType = targetEntityType,\n      targetEntityId = targetEntityId,\n      limit = limit,\n      reversed = Some(latest))\n  }\n\n  /** Reads events generically. If entityType or entityId is not specified, it\n    * results in table scan.\n    *\n    * @param appName return events of this app\n    * @param entityType return events of this entityType\n    *   - None means no restriction on entityType\n    *   - Some(x) means entityType should match x.\n    * @param entityId return events of this entityId\n    *   - None means no restriction on entityId\n    *   - Some(x) means entityId should match x.\n    * @param channelName return events of this channel (default channel if it's None)\n    * @param eventNames return events with any of these event names.\n    * @param targetEntityType return events of this targetEntityType:\n    *   - None means no restriction on targetEntityType\n    *   - Some(None) means no targetEntityType for this event\n    *   - Some(Some(x)) means targetEntityType should match x.\n    * @param targetEntityId return events of this targetEntityId\n    *   - None means no restriction on targetEntityId\n    *   - Some(None) means no targetEntityId for this event\n    *   - Some(Some(x)) means targetEntityId should match x.\n    * @param startTime return events with eventTime >= startTime\n    * @param untilTime return events with eventTime < untilTime\n    * @param limit Limit number of events. Get all events if None or Some(-1)\n    * @return Iterator[Event]\n    */\n  def find(\n    appName: String,\n    entityType: Option[String] = None,\n    entityId: Option[String] = None,\n    channelName: Option[String] = None,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    limit: Option[Int] = None,\n    timeout: Duration = defaultTimeout): Iterator[Event] = {\n\n    // Import here to ensure ExecutionContext.Implicits.global is used only in this method\n    import scala.concurrent.ExecutionContext.Implicits.global\n\n    Await.result(findAsync(\n      appName = appName,\n      entityType = entityType,\n      entityId = entityId,\n      channelName = channelName,\n      eventNames = eventNames,\n      targetEntityType = targetEntityType,\n      targetEntityId = targetEntityId,\n      startTime = startTime,\n      untilTime = untilTime,\n      limit = limit), timeout)\n  }\n\n  /** Reads events generically. If entityType or entityId is not specified, it\n    * results in table scan.\n    *\n    * @param appName return events of this app\n    * @param entityType return events of this entityType\n    *   - None means no restriction on entityType\n    *   - Some(x) means entityType should match x.\n    * @param entityId return events of this entityId\n    *   - None means no restriction on entityId\n    *   - Some(x) means entityId should match x.\n    * @param channelName return events of this channel (default channel if it's None)\n    * @param eventNames return events with any of these event names.\n    * @param targetEntityType return events of this targetEntityType:\n    *   - None means no restriction on targetEntityType\n    *   - Some(None) means no targetEntityType for this event\n    *   - Some(Some(x)) means targetEntityType should match x.\n    * @param targetEntityId return events of this targetEntityId\n    *   - None means no restriction on targetEntityId\n    *   - Some(None) means no targetEntityId for this event\n    *   - Some(Some(x)) means targetEntityId should match x.\n    * @param startTime return events with eventTime >= startTime\n    * @param untilTime return events with eventTime < untilTime\n    * @param limit Limit number of events. Get all events if None or Some(-1)\n    * @return Future[Iterator[Event]]\n    */\n  def findAsync(\n    appName: String,\n    entityType: Option[String] = None,\n    entityId: Option[String] = None,\n    channelName: Option[String] = None,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    limit: Option[Int] = None)(implicit ec: ExecutionContext): Future[Iterator[Event]] = {\n\n    val (appId, channelId) = Common.appNameToId(appName, channelName)\n\n    eventsDb.futureFind(\n      appId = appId,\n      channelId = channelId,\n      startTime = startTime,\n      untilTime = untilTime,\n      entityType = entityType,\n      entityId = entityId,\n      eventNames = eventNames,\n      targetEntityType = targetEntityType,\n      targetEntityId = targetEntityId,\n      limit = limit)\n  }\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/store/PEventStore.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.store\n\nimport org.apache.predictionio.data.storage.Storage\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.storage.PropertyMap\n\nimport org.joda.time.DateTime\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\nimport scala.concurrent.ExecutionContext\n\n/** This object provides a set of operation to access Event Store\n  * with Spark's parallelization\n  */\nobject PEventStore {\n\n  @transient lazy private val eventsDb = Storage.getPEvents()\n\n  /** Read events from Event Store\n    *\n    * @param appName return events of this app\n    * @param channelName return events of this channel (default channel if it's None)\n    * @param startTime return events with eventTime >= startTime\n    * @param untilTime return events with eventTime < untilTime\n    * @param entityType return events of this entityType\n    * @param entityId return events of this entityId\n    * @param eventNames return events with any of these event names.\n    * @param targetEntityType return events of this targetEntityType:\n    *   - None means no restriction on targetEntityType\n    *   - Some(None) means no targetEntityType for this event\n    *   - Some(Some(x)) means targetEntityType should match x.\n    * @param targetEntityId return events of this targetEntityId\n    *   - None means no restriction on targetEntityId\n    *   - Some(None) means no targetEntityId for this event\n    *   - Some(Some(x)) means targetEntityId should match x.\n    * @param sc Spark context\n    * @return RDD[Event]\n    */\n  def find(\n    appName: String,\n    channelName: Option[String] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    entityType: Option[String] = None,\n    entityId: Option[String] = None,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None\n  )(sc: SparkContext): RDD[Event] = {\n\n    val (appId, channelId) = Common.appNameToId(appName, channelName)\n\n    eventsDb.find(\n      appId = appId,\n      channelId = channelId,\n      startTime = startTime,\n      untilTime = untilTime,\n      entityType = entityType,\n      entityId = entityId,\n      eventNames = eventNames,\n      targetEntityType = targetEntityType,\n      targetEntityId = targetEntityId\n    )(sc)\n\n  }\n\n  /** Aggregate properties of entities based on these special events:\n    * \\$set, \\$unset, \\$delete events.\n    *\n    * @param appName use events of this app\n    * @param entityType aggregate properties of the entities of this entityType\n    * @param channelName use events of this channel (default channel if it's None)\n    * @param startTime use events with eventTime >= startTime\n    * @param untilTime use events with eventTime < untilTime\n    * @param required only keep entities with these required properties defined\n    * @param sc Spark context\n    * @return RDD[(String, PropertyMap)] RDD of entityId and PropetyMap pair\n    */\n  def aggregateProperties(\n    appName: String,\n    entityType: String,\n    channelName: Option[String] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    required: Option[Seq[String]] = None)\n    (sc: SparkContext): RDD[(String, PropertyMap)] = {\n\n      val (appId, channelId) = Common.appNameToId(appName, channelName)\n\n      eventsDb.aggregateProperties(\n        appId = appId,\n        entityType = entityType,\n        channelId = channelId,\n        startTime = startTime,\n        untilTime = untilTime,\n        required = required\n      )(sc)\n\n    }\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/store/java/LJavaEventStore.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.store.java\n\nimport java.util.concurrent.{CompletableFuture, CompletionStage, ExecutorService}\n\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.store.LEventStore\nimport org.joda.time.DateTime\n\nimport scala.collection.JavaConversions\nimport scala.concurrent.duration.Duration\nimport scala.compat.java8.FutureConverters._\n\n/** This Java-friendly object provides a set of operation to access Event Store\n  * without going through Spark's parallelization.\n  *\n  * Note that blocking methods of this object uses\n  * `scala.concurrent.ExecutionContext.Implicits.global` internally.\n  * Since this is a thread pool which has a number of threads equal to available\n  * processors, parallelism is limited up to the number of processors.\n  *\n  * If this limitation become bottleneck of resource usage, you can increase the\n  * number of threads by declaring following VM options before calling \"pio deploy\":\n  *\n  * <pre>\n  * export JAVA_OPTS=\"$JAVA_OPTS \\\n  *   -Dscala.concurrent.context.numThreads=1000 \\\n  *   -Dscala.concurrent.context.maxThreads=1000\"\n  * </pre>\n  *\n  * You can learn more about the global execution context in the Scala documentation:\n  * [[https://docs.scala-lang.org/overviews/core/futures.html#the-global-execution-context]]\n  */\nobject LJavaEventStore {\n\n  /** Reads events of the specified entity. May use this in Algorithm's predict()\n    * or Serving logic to have fast event store access.\n    *\n    * @param appName return events of this app\n    * @param entityType return events of this entityType\n    * @param entityId return events of this entityId\n    * @param channelName return events of this channel (default channel if it's None)\n    * @param eventNames return events with any of these event names.\n    * @param targetEntityType return events of this targetEntityType:\n    *   - None means no restriction on targetEntityType\n    *   - Some(None) means no targetEntityType for this event\n    *   - Some(Some(x)) means targetEntityType should match x.\n    * @param targetEntityId return events of this targetEntityId\n    *   - None means no restriction on targetEntityId\n    *   - Some(None) means no targetEntityId for this event\n    *   - Some(Some(x)) means targetEntityId should match x.\n    * @param startTime return events with eventTime >= startTime\n    * @param untilTime return events with eventTime < untilTime\n    * @param limit Limit number of events. Get all events if None or Some(-1)\n    * @param latest Return latest event first\n    * @return java.util.List[Event]\n    */\n  def findByEntity(\n    appName: String,\n    entityType: String,\n    entityId: String,\n    channelName: Option[String],\n    eventNames: Option[java.util.List[String]],\n    targetEntityType: Option[Option[String]],\n    targetEntityId: Option[Option[String]],\n    startTime: Option[DateTime],\n    untilTime: Option[DateTime],\n    limit: Option[Integer],\n    latest: Boolean,\n    timeout: Duration): java.util.List[Event] = {\n\n    val eventNamesSeq = eventNames.map(JavaConversions.asScalaBuffer(_).toSeq)\n    val limitInt = limit.map(_.intValue())\n\n    JavaConversions.seqAsJavaList(\n      LEventStore.findByEntity(\n        appName,\n        entityType,\n        entityId,\n        channelName,\n        eventNamesSeq,\n        targetEntityType,\n        targetEntityId,\n        startTime,\n        untilTime,\n        limitInt,\n        latest,\n        timeout\n      ).toSeq)\n  }\n\n  /** Reads events of the specified entity. May use this in Algorithm's predict()\n    * or Serving logic to have fast event store access.\n    *\n    * @param appName return events of this app\n    * @param entityType return events of this entityType\n    * @param entityId return events of this entityId\n    * @param channelName return events of this channel (default channel if it's None)\n    * @param eventNames return events with any of these event names.\n    * @param targetEntityType return events of this targetEntityType:\n    *   - None means no restriction on targetEntityType\n    *   - Some(None) means no targetEntityType for this event\n    *   - Some(Some(x)) means targetEntityType should match x.\n    * @param targetEntityId return events of this targetEntityId\n    *   - None means no restriction on targetEntityId\n    *   - Some(None) means no targetEntityId for this event\n    *   - Some(Some(x)) means targetEntityId should match x.\n    * @param startTime return events with eventTime >= startTime\n    * @param untilTime return events with eventTime < untilTime\n    * @param limit Limit number of events. Get all events if None or Some(-1)\n    * @param latest Return latest event first\n    * @return CompletableFuture[java.util.List[Event]]\n    */\n  def findByEntityAsync(\n    appName: String,\n    entityType: String,\n    entityId: String,\n    channelName: Option[String],\n    eventNames: Option[java.util.List[String]],\n    targetEntityType: Option[Option[String]],\n    targetEntityId: Option[Option[String]],\n    startTime: Option[DateTime],\n    untilTime: Option[DateTime],\n    limit: Option[Integer],\n    latest: Boolean,\n    executorService: ExecutorService): CompletableFuture[java.util.List[Event]] = {\n\n    val eventNamesSeq = eventNames.map(JavaConversions.asScalaBuffer(_).toSeq)\n    val limitInt = limit.map(_.intValue())\n    implicit val ec = fromExecutorService(executorService)\n\n    LEventStore.findByEntityAsync(\n      appName,\n      entityType,\n      entityId,\n      channelName,\n      eventNamesSeq,\n      targetEntityType,\n      targetEntityId,\n      startTime,\n      untilTime,\n      limitInt,\n      latest\n    ).map { x => JavaConversions.seqAsJavaList(x.toSeq) }.toJava.toCompletableFuture\n  }\n\n  /** Reads events generically. If entityType or entityId is not specified, it\n    * results in table scan.\n    *\n    * @param appName return events of this app\n    * @param entityType return events of this entityType\n    *   - None means no restriction on entityType\n    *   - Some(x) means entityType should match x.\n    * @param entityId return events of this entityId\n    *   - None means no restriction on entityId\n    *   - Some(x) means entityId should match x.\n    * @param channelName return events of this channel (default channel if it's None)\n    * @param eventNames return events with any of these event names.\n    * @param targetEntityType return events of this targetEntityType:\n    *   - None means no restriction on targetEntityType\n    *   - Some(None) means no targetEntityType for this event\n    *   - Some(Some(x)) means targetEntityType should match x.\n    * @param targetEntityId return events of this targetEntityId\n    *   - None means no restriction on targetEntityId\n    *   - Some(None) means no targetEntityId for this event\n    *   - Some(Some(x)) means targetEntityId should match x.\n    * @param startTime return events with eventTime >= startTime\n    * @param untilTime return events with eventTime < untilTime\n    * @param limit Limit number of events. Get all events if None or Some(-1)\n    * @return java.util.List[Event]\n    */\n  def find(\n    appName: String,\n    entityType: Option[String],\n    entityId: Option[String],\n    channelName: Option[String],\n    eventNames: Option[java.util.List[String]],\n    targetEntityType: Option[Option[String]],\n    targetEntityId: Option[Option[String]],\n    startTime: Option[DateTime],\n    untilTime: Option[DateTime],\n    limit: Option[Integer],\n    timeout: Duration): java.util.List[Event] = {\n\n    val eventNamesSeq = eventNames.map(JavaConversions.asScalaBuffer(_).toSeq)\n    val limitInt = limit.map(_.intValue())\n\n    JavaConversions.seqAsJavaList(\n      LEventStore.find(\n        appName,\n        entityType,\n        entityId,\n        channelName,\n        eventNamesSeq,\n        targetEntityType,\n        targetEntityId,\n        startTime,\n        untilTime,\n        limitInt,\n        timeout\n      ).toSeq)\n  }\n\n  /** Reads events generically. If entityType or entityId is not specified, it\n    * results in table scan.\n    *\n    * @param appName return events of this app\n    * @param entityType return events of this entityType\n    *   - None means no restriction on entityType\n    *   - Some(x) means entityType should match x.\n    * @param entityId return events of this entityId\n    *   - None means no restriction on entityId\n    *   - Some(x) means entityId should match x.\n    * @param channelName return events of this channel (default channel if it's None)\n    * @param eventNames return events with any of these event names.\n    * @param targetEntityType return events of this targetEntityType:\n    *   - None means no restriction on targetEntityType\n    *   - Some(None) means no targetEntityType for this event\n    *   - Some(Some(x)) means targetEntityType should match x.\n    * @param targetEntityId return events of this targetEntityId\n    *   - None means no restriction on targetEntityId\n    *   - Some(None) means no targetEntityId for this event\n    *   - Some(Some(x)) means targetEntityId should match x.\n    * @param startTime return events with eventTime >= startTime\n    * @param untilTime return events with eventTime < untilTime\n    * @param limit Limit number of events. Get all events if None or Some(-1)\n    * @return CompletableFuture[java.util.List[Event]]\n    */\n  def findAsync(\n    appName: String,\n    entityType: Option[String],\n    entityId: Option[String],\n    channelName: Option[String],\n    eventNames: Option[java.util.List[String]],\n    targetEntityType: Option[Option[String]],\n    targetEntityId: Option[Option[String]],\n    startTime: Option[DateTime],\n    untilTime: Option[DateTime],\n    limit: Option[Integer],\n    executorService: ExecutorService): CompletableFuture[java.util.List[Event]] = {\n\n    val eventNamesSeq = eventNames.map(JavaConversions.asScalaBuffer(_).toSeq)\n    val limitInt = limit.map(_.intValue())\n    implicit val ec = fromExecutorService(executorService)\n\n    LEventStore.findAsync(\n      appName,\n      entityType,\n      entityId,\n      channelName,\n      eventNamesSeq,\n      targetEntityType,\n      targetEntityId,\n      startTime,\n      untilTime,\n      limitInt\n    ).map { x => JavaConversions.seqAsJavaList(x.toSeq) }.toJava.toCompletableFuture\n  }\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/store/java/OptionHelper.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.store.java\n\n/** Used by Java-based engines to mock Some and None */\nobject OptionHelper {\n  /** Mimics a None from Java-based engine */\n  def none[T]: Option[T] = {\n    Option(null.asInstanceOf[T])\n  }\n\n  /** Mimics a Some from Java-based engine */\n  def some[T](value: T): Option[T] = {\n    Some(value)\n  }\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/store/java/PJavaEventStore.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.store.java\n\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.storage.PropertyMap\nimport org.apache.predictionio.data.store.PEventStore\nimport org.apache.spark.SparkContext\nimport org.apache.spark.api.java.JavaRDD\nimport org.joda.time.DateTime\n\nimport scala.collection.JavaConversions\n\n/** This Java-friendly object provides a set of operation to access Event Store\n  * with Spark's parallelization\n  */\nobject PJavaEventStore {\n\n  /** Read events from Event Store\n    *\n    * @param appName return events of this app\n    * @param channelName return events of this channel (default channel if it's None)\n    * @param startTime return events with eventTime >= startTime\n    * @param untilTime return events with eventTime < untilTime\n    * @param entityType return events of this entityType\n    * @param entityId return events of this entityId\n    * @param eventNames return events with any of these event names.\n    * @param targetEntityType return events of this targetEntityType:\n    *   - None means no restriction on targetEntityType\n    *   - Some(None) means no targetEntityType for this event\n    *   - Some(Some(x)) means targetEntityType should match x.\n    * @param targetEntityId return events of this targetEntityId\n    *   - None means no restriction on targetEntityId\n    *   - Some(None) means no targetEntityId for this event\n    *   - Some(Some(x)) means targetEntityId should match x.\n    * @param sc Spark context\n    * @return JavaRDD[Event]\n    */\n  def find(\n    appName: String,\n    channelName: Option[String],\n    startTime: Option[DateTime],\n    untilTime: Option[DateTime],\n    entityType: Option[String],\n    entityId: Option[String],\n    eventNames: Option[java.util.List[String]],\n    targetEntityType: Option[Option[String]],\n    targetEntityId: Option[Option[String]],\n    sc: SparkContext): JavaRDD[Event] = {\n\n    val eventNamesSeq = eventNames.map(JavaConversions.asScalaBuffer(_).toSeq)\n\n    PEventStore.find(\n      appName,\n      channelName,\n      startTime,\n      untilTime,\n      entityType,\n      entityId,\n      eventNamesSeq,\n      targetEntityType,\n      targetEntityId\n    )(sc)\n  }\n\n  /** Aggregate properties of entities based on these special events:\n    * \\$set, \\$unset, \\$delete events.\n    *\n    * @param appName use events of this app\n    * @param entityType aggregate properties of the entities of this entityType\n    * @param channelName use events of this channel (default channel if it's None)\n    * @param startTime use events with eventTime >= startTime\n    * @param untilTime use events with eventTime < untilTime\n    * @param required only keep entities with these required properties defined\n    * @param sc Spark context\n    * @return JavaRDD[(String, PropertyMap)] JavaRDD of entityId and PropetyMap pair\n    */\n  def aggregateProperties(\n    appName: String,\n    entityType: String,\n    channelName: Option[String],\n    startTime: Option[DateTime],\n    untilTime: Option[DateTime],\n    required: Option[java.util.List[String]],\n    sc: SparkContext): JavaRDD[(String, PropertyMap)] = {\n\n    PEventStore.aggregateProperties(\n      appName,\n    entityType,\n    channelName,\n    startTime,\n    untilTime\n    )(sc)\n  }\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/store/package.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data\n\n/** Provides high level interfaces to the Event Store from within a prediction\n  * engine.\n  */\npackage object store {}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/store/python/PPythonEventStore.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\npackage org.apache.predictionio.data.store.python\n\nimport java.sql.Timestamp\n\nimport org.apache.predictionio.data.store.PEventStore\nimport org.apache.spark.sql.{DataFrame, SparkSession}\nimport org.joda.time.DateTime\n\n\n/** This object provides a set of operation to access Event Store\n  * with Spark's parallelization\n  */\nobject PPythonEventStore {\n\n\n  /** Read events from Event Store\n    *\n    * @param appName          return events of this app\n    * @param channelName      return events of this channel (default channel if it's None)\n    * @param startTime        return events with eventTime >= startTime\n    * @param untilTime        return events with eventTime < untilTime\n    * @param entityType       return events of this entityType\n    * @param entityId         return events of this entityId\n    * @param eventNames       return events with any of these event names.\n    * @param targetEntityType return events of this targetEntityType:\n    *   - None means no restriction on targetEntityType\n    *   - Some(None) means no targetEntityType for this event\n    *   - Some(Some(x)) means targetEntityType should match x.\n    * @param targetEntityId   return events of this targetEntityId\n    *   - None means no restriction on targetEntityId\n    *   - Some(None) means no targetEntityId for this event\n    *   - Some(Some(x)) means targetEntityId should match x.\n    * @param spark            Spark context\n    * @return DataFrame\n    */\n  def find(\n            appName: String,\n            channelName: String,\n            startTime: Timestamp,\n            untilTime: Timestamp,\n            entityType: String,\n            entityId: String,\n            eventNames: Array[String],\n            targetEntityType: String,\n            targetEntityId: String\n          )(spark: SparkSession): DataFrame = {\n    import spark.implicits._\n    val colNames: Seq[String] =\n      Seq(\n        \"eventId\",\n        \"event\",\n        \"entityType\",\n        \"entityId\",\n        \"targetEntityType\",\n        \"targetEntityId\",\n        \"eventTime\",\n        \"tags\",\n        \"prId\",\n        \"creationTime\",\n        \"fields\"\n      )\n    PEventStore.find(appName,\n      Option(channelName),\n      Option(startTime).map(t => new DateTime(t.getTime)),\n      Option(untilTime).map(t => new DateTime(t.getTime)),\n      Option(entityType),\n      Option(entityId),\n      Option(eventNames),\n      targetEntityType match {\n        case null => None\n        case \"\" => Option(None)\n        case _ => Option(Option(targetEntityType))\n      },\n      targetEntityId match {\n        case null => None\n        case \"\" => Option(None)\n        case _ => Option(Option(targetEntityId))\n      }\n      )(spark.sparkContext).map { e =>\n      (\n        e.eventId,\n        e.event,\n        e.entityType,\n        e.entityId,\n        e.targetEntityType.orNull,\n        e.targetEntityId.orNull,\n        new Timestamp(e.eventTime.getMillis),\n        e.tags.mkString(\"\\t\"),\n        e.prId.orNull,\n        new Timestamp(e.creationTime.getMillis),\n        e.properties.fields.mapValues(_.values.toString)\n      )\n    }.toDF(colNames: _*)\n  }\n\n  /** Aggregate properties of entities based on these special events:\n    * \\$set, \\$unset, \\$delete events.\n    *\n    * @param appName     use events of this app\n    * @param entityType  aggregate properties of the entities of this entityType\n    * @param channelName use events of this channel (default channel if it's None)\n    * @param startTime   use events with eventTime >= startTime\n    * @param untilTime   use events with eventTime < untilTime\n    * @param required    only keep entities with these required properties defined\n    * @param spark       Spark session\n    * @return DataFrame  DataFrame of entityId and PropetyMap pair\n    */\n  def aggregateProperties(\n                           appName: String,\n                           entityType: String,\n                           channelName: String,\n                           startTime: Timestamp,\n                           untilTime: Timestamp,\n                           required: Array[String]\n                         )\n                         (spark: SparkSession): DataFrame = {\n    import spark.implicits._\n    val colNames: Seq[String] =\n      Seq(\n        \"entityId\",\n        \"firstUpdated\",\n        \"lastUpdated\",\n        \"fields\"\n      )\n    PEventStore.aggregateProperties(appName,\n      entityType,\n      Option(channelName),\n      Option(startTime).map(t => new DateTime(t.getTime)),\n      Option(untilTime).map(t => new DateTime(t.getTime)),\n      Option(required.toSeq))(spark.sparkContext).map { x =>\n      val m = x._2\n      (x._1,\n        new Timestamp(m.firstUpdated.getMillis),\n        new Timestamp(m.lastUpdated.getMillis),\n        m.fields.mapValues(_.values.toString)\n      )\n    }.toDF(colNames: _*)\n  }\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/view/DataView.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.view\n\nimport org.apache.predictionio.annotation.Experimental\nimport org.apache.predictionio.data.storage.Event\nimport grizzled.slf4j.Logger\nimport org.apache.predictionio.data.store.PEventStore\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}\nimport org.apache.spark.SparkContext\nimport org.joda.time.DateTime\n\nimport scala.reflect.ClassTag\nimport scala.reflect.runtime.universe._\nimport scala.util.hashing.MurmurHash3\n\n/** :: Experimental :: */\n@Experimental\nobject DataView {\n  /**\n    * :: Experimental ::\n    *\n    * Create a DataFrame from events of a specified app.\n    *\n    * @param appName return events of this app\n    * @param channelName use events of this channel (default channel if it's None)\n    * @param startTime return events with eventTime >= startTime\n    * @param untilTime return events with eventTime < untilTime\n    * @param conversionFunction a function that turns raw Events into events of interest.\n    *                           If conversionFunction returns None, such events are dropped.\n    * @param name identify the DataFrame created\n    * @param version used to track changes to the conversionFunction, e.g. version = \"20150413\"\n    *                and update whenever the function is changed.\n    * @tparam E the output type of the conversion function. The type needs to extend Product\n    *           (e.g. case class)\n    * @return a DataFrame of events\n    */\n  @Experimental\n  def create[E <: Product: TypeTag: ClassTag](\n    appName: String,\n    channelName: Option[String] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    conversionFunction: Event => Option[E],\n    name: String = \"\",\n    version: String = \"\")(sc: SparkContext): DataFrame = {\n\n    @transient lazy val logger = Logger[this.type]\n\n    val sqlSession = SparkSession.builder().getOrCreate()\n\n    val beginTime = startTime match {\n      case Some(t) => t\n      case None => new DateTime(0L)\n    }\n    val endTime = untilTime match {\n      case Some(t) => t\n      case None => DateTime.now() // fix the current time\n    }\n    // detect changes to the case class\n    val uid = java.io.ObjectStreamClass.lookup(implicitly[reflect.ClassTag[E]].runtimeClass)\n        .getSerialVersionUID\n    val hash = MurmurHash3.stringHash(s\"$beginTime-$endTime-$version-$uid\")\n    val baseDir = s\"${sys.env(\"PIO_FS_BASEDIR\")}/view\"\n    val fileName = s\"$baseDir/$name-$appName-$hash.parquet\"\n    try {\n      sqlSession.read.parquet(fileName)\n    } catch {\n      case e: java.io.FileNotFoundException =>\n        logger.info(\"Cached copy not found, reading from DB.\")\n        // if cached copy is found, use it. If not, grab from Storage\n        val result: RDD[E] = PEventStore.find(\n            appName = appName,\n            channelName = channelName,\n            startTime = startTime,\n            untilTime = Some(endTime))(sc)\n          .flatMap((e) => conversionFunction(e))\n        import sqlSession.implicits._ // needed for RDD.toDF()\n        val resultDF = result.toDF()\n\n        resultDF.write.mode(SaveMode.ErrorIfExists).parquet(fileName)\n        sqlSession.read.parquet(fileName)\n      case e: java.lang.RuntimeException =>\n        if (e.toString.contains(\"is not a Parquet file\")) {\n          logger.error(s\"$fileName does not contain a valid Parquet file. \" +\n            \"Please delete it and try again.\")\n        }\n        throw e\n    }\n  }\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/view/LBatchView.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.view\n\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.storage.EventValidation\nimport org.apache.predictionio.data.storage.DataMap\nimport org.apache.predictionio.data.storage.Storage\n\nimport org.joda.time.DateTime\nimport scala.language.implicitConversions\n\nimport scala.concurrent.ExecutionContext.Implicits.global // TODO\n\nobject ViewPredicates {\n  @deprecated(\"Use LEvents or LEventStore instead.\", \"0.9.2\")\n  def getStartTimePredicate(startTimeOpt: Option[DateTime])\n  : (Event => Boolean) = {\n    startTimeOpt.map(getStartTimePredicate).getOrElse(_ => true)\n  }\n\n  @deprecated(\"Use LEvents or LEventStore instead.\", \"0.9.2\")\n  def getStartTimePredicate(startTime: DateTime): (Event => Boolean) = {\n    e => (!(e.eventTime.isBefore(startTime) || e.eventTime.isEqual(startTime)))\n  }\n\n  @deprecated(\"Use LEvents or LEventStore instead.\", \"0.9.2\")\n  def getUntilTimePredicate(untilTimeOpt: Option[DateTime])\n  : (Event => Boolean) = {\n    untilTimeOpt.map(getUntilTimePredicate).getOrElse(_ => true)\n  }\n\n  @deprecated(\"Use LEvents or LEventStore instead.\", \"0.9.2\")\n  def getUntilTimePredicate(untilTime: DateTime): (Event => Boolean) = {\n    _.eventTime.isBefore(untilTime)\n  }\n\n  @deprecated(\"Use LEvents or LEventStore instead.\", \"0.9.2\")\n  def getEntityTypePredicate(entityTypeOpt: Option[String]): (Event => Boolean)\n  = {\n    entityTypeOpt.map(getEntityTypePredicate).getOrElse(_ => true)\n  }\n\n  @deprecated(\"Use LEvents or LEventStore instead.\", \"0.9.2\")\n  def getEntityTypePredicate(entityType: String): (Event => Boolean) = {\n    (_.entityType == entityType)\n  }\n\n  @deprecated(\"Use LEvents or LEventStore instead.\", \"0.9.2\")\n  def getEventPredicate(eventOpt: Option[String]): (Event => Boolean)\n  = {\n    eventOpt.map(getEventPredicate).getOrElse(_ => true)\n  }\n\n  @deprecated(\"Use LEvents or LEventStore instead.\", \"0.9.2\")\n  def getEventPredicate(event: String): (Event => Boolean) = {\n    (_.event == event)\n  }\n}\n\nobject ViewAggregators {\n  @deprecated(\"Use LEvents instead.\", \"0.9.2\")\n  def getDataMapAggregator(): ((Option[DataMap], Event) => Option[DataMap]) = {\n    (p, e) => {\n      e.event match {\n        case \"$set\" => {\n          if (p == None) {\n            Some(e.properties)\n          } else {\n            p.map(_ ++ e.properties)\n          }\n        }\n        case \"$unset\" => {\n          if (p == None) {\n            None\n          } else {\n            p.map(_ -- e.properties.keySet)\n          }\n        }\n        case \"$delete\" => None\n        case _ => p // do nothing for others\n      }\n    }\n  }\n}\n\nobject EventSeq {\n  // Need to\n  // >>> import scala.language.implicitConversions\n  // to enable implicit conversion. Only import in the code where this is\n  // necessary to avoid confusion.\n  @deprecated(\"Use LEvents instead.\", \"0.9.2\")\n  implicit def eventSeqToList(es: EventSeq): List[Event] = es.events\n  @deprecated(\"Use LEvents instead.\", \"0.9.2\")\n  implicit def listToEventSeq(l: List[Event]): EventSeq = new EventSeq(l)\n}\n\n\nclass EventSeq(val events: List[Event]) {\n  @deprecated(\"Use LEvents instead.\", \"0.9.2\")\n  def filter(\n    eventOpt: Option[String] = None,\n    entityTypeOpt: Option[String] = None,\n    startTimeOpt: Option[DateTime] = None,\n    untilTimeOpt: Option[DateTime] = None): EventSeq = {\n\n    events\n    .filter(ViewPredicates.getEventPredicate(eventOpt))\n    .filter(ViewPredicates.getStartTimePredicate(startTimeOpt))\n    .filter(ViewPredicates.getUntilTimePredicate(untilTimeOpt))\n    .filter(ViewPredicates.getEntityTypePredicate(entityTypeOpt))\n  }\n\n  @deprecated(\"Use LEvents instead.\", \"0.9.2\")\n  def filter(p: (Event => Boolean)): EventSeq = events.filter(p)\n\n  @deprecated(\"Use LEvents instead.\", \"0.9.2\")\n  def aggregateByEntityOrdered[T](init: T, op: (T, Event) => T)\n  : Map[String, T] = {\n    events\n    .groupBy( _.entityId )\n    .mapValues( _.sortBy(_.eventTime.getMillis).foldLeft[T](init)(op))\n  }\n\n\n}\n\n\nclass LBatchView(\n  val appId: Int,\n  val startTime: Option[DateTime],\n  val untilTime: Option[DateTime]) {\n\n  @transient lazy val eventsDb = Storage.getLEvents()\n\n  @transient lazy val _events = eventsDb.find(\n    appId = appId,\n    startTime = startTime,\n    untilTime = untilTime).toList\n\n  @transient lazy val events: EventSeq = new EventSeq(_events)\n\n  /* Aggregate event data\n   *\n   * @param entityType only aggregate event with entityType\n   * @param startTimeOpt if specified, only aggregate event after (inclusive)\n   * startTimeOpt\n   * @param untilTimeOpt if specified, only aggregate event until (exclusive)\n   * endTimeOpt\n   */\n  @deprecated(\"Use LEventStore instead.\", \"0.9.2\")\n  def aggregateProperties(\n      entityType: String,\n      startTimeOpt: Option[DateTime] = None,\n      untilTimeOpt: Option[DateTime] = None\n      ): Map[String, DataMap] = {\n\n    events\n    .filter(entityTypeOpt = Some(entityType))\n    .filter(e => EventValidation.isSpecialEvents(e.event))\n    .aggregateByEntityOrdered(\n      init = None,\n      op = ViewAggregators.getDataMapAggregator())\n    .filter{ case (k, v) => (v != None) }\n    .mapValues(_.get)\n\n  }\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.view\n\nimport org.apache.predictionio.data.storage.{DataMap, Event, EventValidation, Storage}\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\nimport org.joda.time.DateTime\nimport org.json4s.JValue\n\n\n// each JValue data associated with the time it is set\nprivate[predictionio] case class PropTime(val d: JValue, val t: Long) extends Serializable\n\nprivate[predictionio] case class SetProp (\n  val fields: Map[String, PropTime],\n  // last set time. Note: fields could be empty with valid set time\n  val t: Long) extends Serializable {\n\n  def ++ (that: SetProp): SetProp = {\n    val commonKeys = fields.keySet.intersect(that.fields.keySet)\n\n    val common: Map[String, PropTime] = commonKeys.map { k =>\n      val thisData = this.fields(k)\n      val thatData = that.fields(k)\n      // only keep the value with latest time\n      val v = if (thisData.t > thatData.t) thisData else thatData\n      (k, v)\n    }.toMap\n\n    val combinedFields = common ++\n      (this.fields -- commonKeys) ++ (that.fields -- commonKeys)\n\n    // keep the latest set time\n    val combinedT = if (this.t > that.t) this.t else that.t\n\n    SetProp(\n      fields = combinedFields,\n      t = combinedT\n    )\n  }\n}\n\nprivate[predictionio] case class UnsetProp (fields: Map[String, Long]) extends Serializable {\n  def ++ (that: UnsetProp): UnsetProp = {\n    val commonKeys = fields.keySet.intersect(that.fields.keySet)\n\n    val common: Map[String, Long] = commonKeys.map { k =>\n      val thisData = this.fields(k)\n      val thatData = that.fields(k)\n      // only keep the value with latest time\n      val v = if (thisData > thatData) thisData else thatData\n      (k, v)\n    }.toMap\n\n    val combinedFields = common ++\n      (this.fields -- commonKeys) ++ (that.fields -- commonKeys)\n\n    UnsetProp(\n      fields = combinedFields\n    )\n  }\n}\n\nprivate[predictionio] case class DeleteEntity (t: Long) extends Serializable {\n  def ++ (that: DeleteEntity): DeleteEntity = {\n    if (this.t > that.t) this else that\n  }\n}\n\nprivate[predictionio] case class EventOp (\n  val setProp: Option[SetProp] = None,\n  val unsetProp: Option[UnsetProp] = None,\n  val deleteEntity: Option[DeleteEntity] = None\n) extends Serializable {\n\n  def ++ (that: EventOp): EventOp = {\n    EventOp(\n      setProp = (setProp ++ that.setProp).reduceOption(_ ++ _),\n      unsetProp = (unsetProp ++ that.unsetProp).reduceOption(_ ++ _),\n      deleteEntity = (deleteEntity ++ that.deleteEntity).reduceOption(_ ++ _)\n    )\n  }\n\n  def toDataMap(): Option[DataMap] = {\n    setProp.flatMap { set =>\n\n      val unsetKeys: Set[String] = unsetProp.map( unset =>\n        unset.fields.filter{ case (k, v) => (v >= set.fields(k).t) }.keySet\n      ).getOrElse(Set())\n\n      val combinedFields = deleteEntity.map { delete =>\n        if (delete.t >= set.t) {\n          None\n        } else {\n          val deleteKeys: Set[String] = set.fields\n            .filter { case (k, PropTime(kv, t)) =>\n              (delete.t >= t)\n            }.keySet\n          Some(set.fields -- unsetKeys -- deleteKeys)\n        }\n      }.getOrElse{\n        Some(set.fields -- unsetKeys)\n      }\n\n      // Note: mapValues() doesn't return concrete Map and causes\n      // NotSerializableException issue. Use map(identity) to work around this.\n      // see https://issues.scala-lang.org/browse/SI-7005\n      combinedFields.map(f => DataMap(f.mapValues(_.d).map(identity)))\n    }\n  }\n\n}\n\nprivate[predictionio] object EventOp {\n  def apply(e: Event): EventOp = {\n    val t = e.eventTime.getMillis\n    e.event match {\n      case \"$set\" => {\n        val fields = e.properties.fields.mapValues(jv =>\n          PropTime(jv, t)\n        ).map(identity)\n\n        EventOp(\n          setProp = Some(SetProp(fields = fields, t = t))\n        )\n      }\n      case \"$unset\" => {\n        val fields = e.properties.fields.mapValues(jv => t).map(identity)\n        EventOp(\n          unsetProp = Some(UnsetProp(fields = fields))\n        )\n      }\n      case \"$delete\" => {\n        EventOp(\n          deleteEntity = Some(DeleteEntity(t))\n        )\n      }\n      case _ => {\n        EventOp()\n      }\n    }\n  }\n}\n\n@deprecated(\"Use PEvents or PEventStore instead.\", \"0.9.2\")\nclass PBatchView(\n  val appId: Int,\n  val startTime: Option[DateTime],\n  val untilTime: Option[DateTime],\n  val sc: SparkContext) {\n\n  // NOTE: parallel Events DB interface\n  @transient lazy val eventsDb = Storage.getPEvents()\n\n  @transient lazy val _events: RDD[Event] =\n    eventsDb.getByAppIdAndTimeAndEntity(\n      appId = appId,\n      startTime = startTime,\n      untilTime = untilTime,\n      entityType = None,\n      entityId = None)(sc)\n\n  // TODO: change to use EventSeq?\n  @transient lazy val events: RDD[Event] = _events\n\n  def aggregateProperties(\n    entityType: String,\n    startTimeOpt: Option[DateTime] = None,\n    untilTimeOpt: Option[DateTime] = None\n  ): RDD[(String, DataMap)] = {\n\n    _events\n      .filter( e => ((e.entityType == entityType) &&\n        (EventValidation.isSpecialEvents(e.event))) )\n      .map( e => (e.entityId, EventOp(e) ))\n      .aggregateByKey[EventOp](EventOp())(\n        // within same partition\n        seqOp = { case (u, v) => u ++ v },\n        // across partition\n        combOp = { case (accu, u) => accu ++ u }\n      )\n      .mapValues(_.toDataMap)\n      .filter{ case (k, v) => v.isDefined }\n      .map{ case (k, v) => (k, v.get) }\n  }\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/view/QuickTest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.view\n\nimport org.apache.predictionio.data.storage.Storage\n\nimport scala.concurrent.ExecutionContext.Implicits.global // TODO\n\nimport grizzled.slf4j.Logger\nimport org.joda.time.DateTime\n\nimport scala.language.implicitConversions\n\nclass TestHBLEvents() {\n  @transient lazy val eventsDb = Storage.getLEvents()\n\n  def run(): Unit = {\n    val r = eventsDb.find(\n      appId = 1,\n      startTime = None,\n      untilTime = None,\n      entityType = Some(\"pio_user\"),\n      entityId = Some(\"3\")).toList\n    println(r)\n  }\n}\n\nclass TestSource(val appId: Int) {\n  @transient lazy val logger = Logger[this.type]\n  @transient lazy val batchView = new LBatchView(appId,\n    None, None)\n\n  def run(): Unit = {\n    println(batchView.events)\n  }\n}\n\nobject QuickTest {\n\n  def main(args: Array[String]) {\n    val t = new TestHBLEvents()\n    t.run()\n\n    // val ts = new TestSource(args(0).toInt)\n    // ts.run()\n  }\n}\n\nobject TestEventTime {\n  @transient lazy val batchView = new LBatchView(9, None, None)\n\n  // implicit def back2list(es: EventSeq) = es.events\n\n  def main(args: Array[String]) {\n    val e = batchView.events.filter(\n      eventOpt = Some(\"rate\"),\n      startTimeOpt = Some(new DateTime(1998, 1, 1, 0, 0))\n      // untilTimeOpt = Some(new DateTime(1997, 1, 1, 0, 0))\n    )\n      // untilTimeOpt = Some(new DateTime(2000, 1, 1, 0, 0)))\n\n    e.foreach { println }\n    println()\n    println()\n    println()\n    val u = batchView.aggregateProperties(\"pio_item\")\n    u.foreach { println }\n    println()\n    println()\n    println()\n\n    // val l: Seq[Event] = e\n    val l = e.map { _.entityId }\n    l.foreach { println }\n  }\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/webhooks/ConnectorException.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.webhooks\n\n/** Webhooks Connnector Exception\n  *\n  * @param message the detail message\n  * @param cause the cause\n  */\nprivate[predictionio] class ConnectorException(message: String, cause: Throwable)\n  extends Exception(message, cause) {\n\n  /** Webhooks Connnector Exception with cause being set to null\n    *\n    * @param message the detail message\n    */\n  def this(message: String) = this(message, null)\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/webhooks/ConnectorUtil.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.webhooks\n\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.storage.EventJson4sSupport\n\nimport org.json4s.Formats\nimport org.json4s.DefaultFormats\nimport org.json4s.JObject\nimport org.json4s.native.Serialization.read\nimport org.json4s.native.Serialization.write\n\n\nprivate[predictionio] object ConnectorUtil {\n\n  implicit val eventJson4sFormats: Formats = DefaultFormats +\n    new EventJson4sSupport.APISerializer\n\n  // intentionally use EventJson4sSupport.APISerializer to convert\n  // from JSON to Event object. Don't allow connector directly create\n  // Event object so that the Event object formation is consistent\n  // by enforcing JSON format\n\n  def toEvent(connector: JsonConnector, data: JObject): Event = {\n    read[Event](write(connector.toEventJson(data)))\n  }\n\n  def toEvent(connector: FormConnector, data: Map[String, String]): Event = {\n    read[Event](write(connector.toEventJson(data)))\n  }\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/webhooks/FormConnector.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.webhooks\n\nimport org.json4s.JObject\n\n/** Connector for Webhooks connection with Form submission data format\n  */\nprivate[predictionio] trait FormConnector {\n\n  // TODO: support conversion to multiple events?\n\n  /** Convert from original Form submission data to Event JObject\n    * @param data Map of key-value pairs in String type received through webhooks\n    * @return Event JObject\n   */\n  def toEventJson(data: Map[String, String]): JObject\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/webhooks/JsonConnector.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.webhooks\n\nimport org.json4s.JObject\n\n/** Connector for Webhooks connection */\nprivate[predictionio] trait JsonConnector {\n\n  // TODO: support conversion to multiple events?\n\n  /** Convert from original JObject to Event JObject\n    * @param data original JObject recevived through webhooks\n    * @return Event JObject\n   */\n  def toEventJson(data: JObject): JObject\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/webhooks/exampleform/ExampleFormConnector.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.webhooks.exampleform\n\nimport org.apache.predictionio.data.webhooks.FormConnector\nimport org.apache.predictionio.data.webhooks.ConnectorException\n\nimport org.json4s.JObject\n\n\n/** Example FormConnector with following types of webhook form data inputs:\n  *\n  * UserAction\n  *\n  *   \"type\"=\"userAction\"\n  *   \"userId\"=\"as34smg4\",\n  *   \"event\"=\"do_something\",\n  *   \"context[ip]\"=\"24.5.68.47\", // optional\n  *   \"context[prop1]\"=\"2.345\", // optional\n  *   \"context[prop2]\"=\"value1\" // optional\n  *   \"anotherProperty1\"=\"100\",\n  *   \"anotherProperty2\"=\"optional1\", // optional\n  *   \"timestamp\"=\"2015-01-02T00:30:12.984Z\"\n  *\n  * UserActionItem\n  *\n  *   \"type\"=\"userActionItem\"\n  *   \"userId\"=\"as34smg4\",\n  *   \"event\"=\"do_something_on\",\n  *   \"itemId\"=\"kfjd312bc\",\n  *   \"context[ip]\"=\"1.23.4.56\",\n  *   \"context[prop1]\"=\"2.345\",\n  *   \"context[prop2]\"=\"value1\",\n  *   \"anotherPropertyA\"=\"4.567\", // optional\n  *   \"anotherPropertyB\"=\"false\", // optional\n  *   \"timestamp\"=\"2015-01-15T04:20:23.567Z\"\n  *\n  */\nprivate[predictionio] object ExampleFormConnector extends FormConnector {\n\n  override\n  def toEventJson(data: Map[String, String]): JObject = {\n    val json = try {\n      data.get(\"type\") match {\n        case Some(\"userAction\") => userActionToEventJson(data)\n        case Some(\"userActionItem\") => userActionItemToEventJson(data)\n        case Some(x) => throw new ConnectorException(\n          s\"Cannot convert unknown type ${x} to event JSON\")\n        case None => throw new ConnectorException(\n          s\"The field 'type' is required.\")\n      }\n    } catch {\n      case e: ConnectorException => throw e\n      case e: Exception => throw new ConnectorException(\n        s\"Cannot convert ${data} to event JSON. ${e.getMessage()}\", e)\n    }\n    json\n  }\n\n  def userActionToEventJson(data: Map[String, String]): JObject = {\n    import org.json4s.JsonDSL._\n\n    // two level optional data\n    val context = if (data.exists(_._1.startsWith(\"context[\"))) {\n      Some(\n        (\"ip\" -> data.get(\"context[ip]\")) ~\n        (\"prop1\" -> data.get(\"context[prop1]\").map(_.toDouble)) ~\n        (\"prop2\" -> data.get(\"context[prop2]\"))\n      )\n    } else {\n      None\n    }\n\n    val json =\n      (\"event\" -> data(\"event\")) ~\n      (\"entityType\" -> \"user\") ~\n      (\"entityId\" -> data(\"userId\")) ~\n      (\"eventTime\" -> data(\"timestamp\")) ~\n      (\"properties\" -> (\n        (\"context\" -> context) ~\n        (\"anotherProperty1\" -> data(\"anotherProperty1\").toInt) ~\n        (\"anotherProperty2\" -> data.get(\"anotherProperty2\"))\n      ))\n    json\n  }\n\n\n  def userActionItemToEventJson(data: Map[String, String]): JObject = {\n    import org.json4s.JsonDSL._\n\n    val json =\n      (\"event\" -> data(\"event\")) ~\n      (\"entityType\" -> \"user\") ~\n      (\"entityId\" -> data(\"userId\")) ~\n      (\"targetEntityType\" -> \"item\") ~\n      (\"targetEntityId\" -> data(\"itemId\")) ~\n      (\"eventTime\" -> data(\"timestamp\")) ~\n      (\"properties\" -> (\n        (\"context\" -> (\n          (\"ip\" -> data(\"context[ip]\")) ~\n          (\"prop1\" -> data(\"context[prop1]\").toDouble) ~\n          (\"prop2\" -> data(\"context[prop2]\"))\n        )) ~\n        (\"anotherPropertyA\" -> data.get(\"anotherPropertyA\").map(_.toDouble)) ~\n        (\"anotherPropertyB\" -> data.get(\"anotherPropertyB\").map(_.toBoolean))\n      ))\n    json\n  }\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/webhooks/examplejson/ExampleJsonConnector.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.webhooks.examplejson\n\nimport org.apache.predictionio.data.webhooks.JsonConnector\nimport org.apache.predictionio.data.webhooks.ConnectorException\n\nimport org.json4s.Formats\nimport org.json4s.DefaultFormats\nimport org.json4s.JObject\n\n/** Example JsonConnector with following types of webhooks JSON input:\n  *\n  * UserAction\n  *\n  * {\n  *   \"type\": \"userAction\"\n  *   \"userId\": \"as34smg4\",\n  *   \"event\": \"do_something\",\n  *   \"context\": {\n  *     \"ip\": \"24.5.68.47\",\n  *     \"prop1\": 2.345,\n  *     \"prop2\": \"value1\"\n  *   },\n  *   \"anotherProperty1\": 100,\n  *   \"anotherProperty2\": \"optional1\",\n  *   \"timestamp\": \"2015-01-02T00:30:12.984Z\"\n  * }\n  *\n  * UserActionItem\n  *\n  * {\n  *   \"type\": \"userActionItem\"\n  *   \"userId\": \"as34smg4\",\n  *   \"event\": \"do_something_on\",\n  *   \"itemId\": \"kfjd312bc\",\n  *   \"context\": {\n  *     \"ip\": \"1.23.4.56\",\n  *     \"prop1\": 2.345,\n  *     \"prop2\": \"value1\"\n  *   },\n  *   \"anotherPropertyA\": 4.567,\n  *   \"anotherPropertyB\": false,\n  *   \"timestamp\": \"2015-01-15T04:20:23.567Z\"\n  * }\n  */\nprivate[predictionio] object ExampleJsonConnector extends JsonConnector {\n\n  implicit val json4sFormats: Formats = DefaultFormats\n\n  override def toEventJson(data: JObject): JObject = {\n    val common = try {\n      data.extract[Common]\n    } catch {\n      case e: Exception => throw new ConnectorException(\n        s\"Cannot extract Common field from ${data}. ${e.getMessage()}\", e)\n    }\n\n    val json = try {\n      common.`type` match {\n        case \"userAction\" =>\n          toEventJson(common = common, userAction = data.extract[UserAction])\n        case \"userActionItem\" =>\n          toEventJson(common = common, userActionItem = data.extract[UserActionItem])\n        case x: String =>\n          throw new ConnectorException(\n            s\"Cannot convert unknown type '${x}' to Event JSON.\")\n      }\n    } catch {\n      case e: ConnectorException => throw e\n      case e: Exception => throw new ConnectorException(\n        s\"Cannot convert ${data} to eventJson. ${e.getMessage()}\", e)\n    }\n\n    json\n  }\n\n  def toEventJson(common: Common, userAction: UserAction): JObject = {\n    import org.json4s.JsonDSL._\n\n    // map to EventAPI JSON\n    val json =\n      (\"event\" -> userAction.event) ~\n        (\"entityType\" -> \"user\") ~\n        (\"entityId\" -> userAction.userId) ~\n        (\"eventTime\" -> userAction.timestamp) ~\n        (\"properties\" -> (\n          (\"context\" -> userAction.context) ~\n            (\"anotherProperty1\" -> userAction.anotherProperty1) ~\n            (\"anotherProperty2\" -> userAction.anotherProperty2)\n          ))\n    json\n  }\n\n  def toEventJson(common: Common, userActionItem: UserActionItem): JObject = {\n    import org.json4s.JsonDSL._\n\n    // map to EventAPI JSON\n    val json =\n      (\"event\" -> userActionItem.event) ~\n        (\"entityType\" -> \"user\") ~\n        (\"entityId\" -> userActionItem.userId) ~\n        (\"targetEntityType\" -> \"item\") ~\n        (\"targetEntityId\" -> userActionItem.itemId) ~\n        (\"eventTime\" -> userActionItem.timestamp) ~\n        (\"properties\" -> (\n          (\"context\" -> userActionItem.context) ~\n            (\"anotherPropertyA\" -> userActionItem.anotherPropertyA) ~\n            (\"anotherPropertyB\" -> userActionItem.anotherPropertyB)\n          ))\n    json\n  }\n\n  // Common required fields\n  case class Common(\n    `type`: String\n  )\n\n  // User Actions fields\n  case class UserAction (\n    userId: String,\n    event: String,\n    context: Option[JObject],\n    anotherProperty1: Int,\n    anotherProperty2: Option[String],\n    timestamp: String\n  )\n\n  // UserActionItem fields\n  case class UserActionItem (\n    userId: String,\n    event: String,\n    itemId: String,\n    context: JObject,\n    anotherPropertyA: Option[Double],\n    anotherPropertyB: Option[Boolean],\n    timestamp: String\n  )\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/webhooks/mailchimp/MailChimpConnector.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\n\npackage org.apache.predictionio.data.webhooks.mailchimp\n\nimport org.apache.predictionio.data.webhooks.FormConnector\nimport org.apache.predictionio.data.webhooks.ConnectorException\nimport org.apache.predictionio.data.storage.EventValidation\nimport org.apache.predictionio.data.Utils\n\nimport org.json4s.JObject\n\nimport org.joda.time.DateTime\nimport org.joda.time.format.DateTimeFormat\n\nprivate[predictionio] object MailChimpConnector extends FormConnector {\n\n  override\n  def toEventJson(data: Map[String, String]): JObject = {\n\n    val json = data.get(\"type\") match {\n      case Some(\"subscribe\") => subscribeToEventJson(data)\n      // UNSUBSCRIBE\n      case Some(\"unsubscribe\") => unsubscribeToEventJson(data)\n      // PROFILE UPDATES\n      case Some(\"profile\") => profileToEventJson(data)\n      // EMAIL UPDATE\n      case Some(\"upemail\") => upemailToEventJson(data)\n      // CLEANED EMAILS\n      case Some(\"cleaned\") => cleanedToEventJson(data)\n      // CAMPAIGN SENDING STATUS\n      case Some(\"campaign\") => campaignToEventJson(data)\n      // invalid type\n      case Some(x) => throw new ConnectorException(\n        s\"Cannot convert unknown MailChimp data type ${x} to event JSON\")\n      case None => throw new ConnectorException(\n        s\"The field 'type' is required for MailChimp data.\")\n    }\n    json\n  }\n\n\n  val mailChimpDateTimeFormat = DateTimeFormat.forPattern(\"yyyy-MM-dd HH:mm:ss\")\n    .withZone(EventValidation.defaultTimeZone)\n\n  def parseMailChimpDateTime(s: String): DateTime = {\n    mailChimpDateTimeFormat.parseDateTime(s)\n  }\n\n  def subscribeToEventJson(data: Map[String, String]): JObject = {\n\n    import org.json4s.JsonDSL._\n\n    /*\n    \"type\": \"subscribe\",\n    \"fired_at\": \"2009-03-26 21:35:57\",\n    \"data[id]\": \"8a25ff1d98\",\n    \"data[list_id]\": \"a6b5da1054\",\n    \"data[email]\": \"api@mailchimp.com\",\n    \"data[email_type]\": \"html\",\n    \"data[merges][EMAIL]\": \"api@mailchimp.com\",\n    \"data[merges][FNAME]\": \"MailChimp\",\n    \"data[merges][LNAME]\": \"API\",\n    \"data[merges][INTERESTS]\": \"Group1,Group2\",\n    \"data[ip_opt]\": \"10.20.10.30\",\n    \"data[ip_signup]\": \"10.20.10.30\"\n    */\n\n    // convert to ISO8601 format\n    val eventTime = Utils.dateTimeToString(parseMailChimpDateTime(data(\"fired_at\")))\n\n    // TODO: handle optional fields\n    val json =\n      (\"event\" -> \"subscribe\") ~\n      (\"entityType\" -> \"user\") ~\n      (\"entityId\" -> data(\"data[id]\")) ~\n      (\"targetEntityType\" -> \"list\") ~\n      (\"targetEntityId\" -> data(\"data[list_id]\")) ~\n      (\"eventTime\" -> eventTime) ~\n      (\"properties\" -> (\n        (\"email\" -> data(\"data[email]\")) ~\n        (\"email_type\" -> data(\"data[email_type]\")) ~\n        (\"merges\" -> (\n          (\"EMAIL\" -> data(\"data[merges][EMAIL]\")) ~\n          (\"FNAME\" -> data(\"data[merges][FNAME]\"))) ~\n          (\"LNAME\" -> data(\"data[merges][LNAME]\")) ~\n          (\"INTERESTS\" -> data.get(\"data[merges][INTERESTS]\"))\n        )) ~\n        (\"ip_opt\" -> data(\"data[ip_opt]\")) ~\n        (\"ip_signup\" -> data(\"data[ip_signup]\")\n      ))\n\n    json\n\n  }\n\n  def unsubscribeToEventJson(data: Map[String, String]): JObject = {\n\n    import org.json4s.JsonDSL._\n\n    /*\n    \"action\" will either be \"unsub\" or \"delete\".\n    The reason will be \"manual\" unless caused by a spam complaint - then it will be \"abuse\"\n\n    \"type\": \"unsubscribe\",\n    \"fired_at\": \"2009-03-26 21:40:57\",\n    \"data[action]\": \"unsub\",\n    \"data[reason]\": \"manual\",\n    \"data[id]\": \"8a25ff1d98\",\n    \"data[list_id]\": \"a6b5da1054\",\n    \"data[email]\": \"api+unsub@mailchimp.com\",\n    \"data[email_type]\": \"html\",\n    \"data[merges][EMAIL]\": \"api+unsub@mailchimp.com\",\n    \"data[merges][FNAME]\": \"MailChimp\",\n    \"data[merges][LNAME]\": \"API\",\n    \"data[merges][INTERESTS]\": \"Group1,Group2\",\n    \"data[ip_opt]\": \"10.20.10.30\",\n    \"data[campaign_id]\": \"cb398d21d2\",\n    */\n\n    // convert to ISO8601 format\n    val eventTime = Utils.dateTimeToString(parseMailChimpDateTime(data(\"fired_at\")))\n\n    val json =\n      (\"event\" -> \"unsubscribe\") ~\n      (\"entityType\" -> \"user\") ~\n      (\"entityId\" -> data(\"data[id]\")) ~\n      (\"targetEntityType\" -> \"list\") ~\n      (\"targetEntityId\" -> data(\"data[list_id]\")) ~\n      (\"eventTime\" -> eventTime) ~\n      (\"properties\" -> (\n        (\"action\" -> data(\"data[action]\")) ~\n        (\"reason\" -> data(\"data[reason]\")) ~\n        (\"email\" -> data(\"data[email]\")) ~\n        (\"email_type\" -> data(\"data[email_type]\")) ~\n        (\"merges\" -> (\n          (\"EMAIL\" -> data(\"data[merges][EMAIL]\")) ~\n          (\"FNAME\" -> data(\"data[merges][FNAME]\"))) ~\n          (\"LNAME\" -> data(\"data[merges][LNAME]\")) ~\n          (\"INTERESTS\" -> data.get(\"data[merges][INTERESTS]\"))\n        )) ~\n        (\"ip_opt\" -> data(\"data[ip_opt]\")) ~\n        (\"campaign_id\" -> data(\"data[campaign_id]\")\n      ))\n\n    json\n\n  }\n\n  def profileToEventJson(data: Map[String, String]): JObject = {\n\n    import org.json4s.JsonDSL._\n\n    /*\n    \"type\": \"profile\",\n    \"fired_at\": \"2009-03-26 21:31:21\",\n    \"data[id]\": \"8a25ff1d98\",\n    \"data[list_id]\": \"a6b5da1054\",\n    \"data[email]\": \"api@mailchimp.com\",\n    \"data[email_type]\": \"html\",\n    \"data[merges][EMAIL]\": \"api@mailchimp.com\",\n    \"data[merges][FNAME]\": \"MailChimp\",\n    \"data[merges][LNAME]\": \"API\",\n    \"data[merges][INTERESTS]\": \"Group1,Group2\", \\\\OPTIONAL\n    \"data[ip_opt]\": \"10.20.10.30\"\n    */\n\n    // convert to ISO8601 format\n    val eventTime = Utils.dateTimeToString(parseMailChimpDateTime(data(\"fired_at\")))\n\n    val json =\n      (\"event\" -> \"profile\") ~\n      (\"entityType\" -> \"user\") ~\n      (\"entityId\" -> data(\"data[id]\")) ~\n      (\"targetEntityType\" -> \"list\") ~\n      (\"targetEntityId\" -> data(\"data[list_id]\")) ~\n      (\"eventTime\" -> eventTime) ~\n      (\"properties\" -> (\n        (\"email\" -> data(\"data[email]\")) ~\n        (\"email_type\" -> data(\"data[email_type]\")) ~\n        (\"merges\" -> (\n          (\"EMAIL\" -> data(\"data[merges][EMAIL]\")) ~\n          (\"FNAME\" -> data(\"data[merges][FNAME]\"))) ~\n          (\"LNAME\" -> data(\"data[merges][LNAME]\")) ~\n          (\"INTERESTS\" -> data.get(\"data[merges][INTERESTS]\"))\n        )) ~\n        (\"ip_opt\" -> data(\"data[ip_opt]\")\n      ))\n\n    json\n\n  }\n\n  def upemailToEventJson(data: Map[String, String]): JObject = {\n\n    import org.json4s.JsonDSL._\n\n    /*\n    \"type\": \"upemail\",\n    \"fired_at\": \"2009-03-26 22:15:09\",\n    \"data[list_id]\": \"a6b5da1054\",\n    \"data[new_id]\": \"51da8c3259\",\n    \"data[new_email]\": \"api+new@mailchimp.com\",\n    \"data[old_email]\": \"api+old@mailchimp.com\"\n    */\n\n    // convert to ISO8601 format\n    val eventTime = Utils.dateTimeToString(parseMailChimpDateTime(data(\"fired_at\")))\n\n    val json =\n      (\"event\" -> \"upemail\") ~\n      (\"entityType\" -> \"user\") ~\n      (\"entityId\" -> data(\"data[new_id]\")) ~\n      (\"targetEntityType\" -> \"list\") ~\n      (\"targetEntityId\" -> data(\"data[list_id]\")) ~\n      (\"eventTime\" -> eventTime) ~\n      (\"properties\" -> (\n        (\"new_email\" -> data(\"data[new_email]\")) ~\n        (\"old_email\" -> data(\"data[old_email]\"))\n      ))\n\n    json\n\n  }\n\n  def cleanedToEventJson(data: Map[String, String]): JObject = {\n\n    import org.json4s.JsonDSL._\n\n    /*\n    Reason will be one of \"hard\" (for hard bounces) or \"abuse\"\n    \"type\": \"cleaned\",\n    \"fired_at\": \"2009-03-26 22:01:00\",\n    \"data[list_id]\": \"a6b5da1054\",\n    \"data[campaign_id]\": \"4fjk2ma9xd\",\n    \"data[reason]\": \"hard\",\n    \"data[email]\": \"api+cleaned@mailchimp.com\"\n    */\n\n    // convert to ISO8601 format\n    val eventTime = Utils.dateTimeToString(parseMailChimpDateTime(data(\"fired_at\")))\n\n    val json =\n      (\"event\" -> \"cleaned\") ~\n      (\"entityType\" -> \"list\") ~\n      (\"entityId\" -> data(\"data[list_id]\")) ~\n      (\"eventTime\" -> eventTime) ~\n      (\"properties\" -> (\n        (\"campaignId\" -> data(\"data[campaign_id]\")) ~\n        (\"reason\" -> data(\"data[reason]\")) ~\n        (\"email\" -> data(\"data[email]\"))\n      ))\n\n    json\n\n  }\n\n  def campaignToEventJson(data: Map[String, String]): JObject = {\n\n    import org.json4s.JsonDSL._\n\n    /*\n    \"type\": \"campaign\",\n    \"fired_at\": \"2009-03-26 21:31:21\",\n    \"data[id]\": \"5aa2102003\",\n    \"data[subject]\": \"Test Campaign Subject\",\n    \"data[status]\": \"sent\",\n    \"data[reason]\": \"\",\n    \"data[list_id]\": \"a6b5da1054\"\n    */\n\n    // convert to ISO8601 format\n    val eventTime = Utils.dateTimeToString(parseMailChimpDateTime(data(\"fired_at\")))\n\n    val json =\n      (\"event\" -> \"campaign\") ~\n      (\"entityType\" -> \"campaign\") ~\n      (\"entityId\" -> data(\"data[id]\")) ~\n      (\"targetEntityType\" -> \"list\") ~\n      (\"targetEntityId\" -> data(\"data[list_id]\")) ~\n      (\"eventTime\" -> eventTime) ~\n      (\"properties\" -> (\n        (\"subject\" -> data(\"data[subject]\")) ~\n        (\"status\" -> data(\"data[status]\")) ~\n        (\"reason\" -> data(\"data[reason]\"))\n      ))\n\n    json\n\n  }\n\n}\n"
  },
  {
    "path": "data/src/main/scala/org/apache/predictionio/data/webhooks/segmentio/SegmentIOConnector.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.webhooks.segmentio\n\nimport org.apache.predictionio.data.webhooks.{ConnectorException, JsonConnector}\nimport org.json4s._\n\nprivate[predictionio] object SegmentIOConnector extends JsonConnector {\n\n  // private lazy val supportedAPI = Vector(\"2\", \"2.0\", \"2.0.0\")\n\n  implicit val json4sFormats: Formats = DefaultFormats\n\n  override\n  def toEventJson(data: JObject): JObject = {\n    try {\n      val version: String = data.values(\"version\").toString\n/*\n      if (!supportedAPI.contains(version)) {\n        throw new ConnectorException(\n          s\"Supported segment.io API versions: [2]. got [$version]\"\n        )\n      }\n*/\n    } catch { case _: Throwable =>\n      throw new ConnectorException(s\"Failed to get segment.io API version.\")\n    }\n\n    val common = try {\n      data.extract[Common]\n    } catch {\n      case e: Throwable => throw new ConnectorException(\n        s\"Cannot extract Common field from $data. ${e.getMessage}\", e\n      )\n    }\n\n    try {\n      common.`type` match {\n        case \"identify\" =>\n          toEventJson(\n            common = common,\n            identify = data.extract[Events.Identify]\n          )\n\n        case \"track\" =>\n          toEventJson(\n            common = common,\n            track = data.extract[Events.Track]\n          )\n\n        case \"alias\" =>\n          toEventJson(\n            common = common,\n            alias = data.extract[Events.Alias]\n          )\n\n        case \"page\" =>\n          toEventJson(\n            common = common,\n            page = data.extract[Events.Page]\n          )\n\n        case \"screen\" =>\n          toEventJson(\n            common = common,\n            screen = data.extract[Events.Screen]\n          )\n\n        case \"group\" =>\n          toEventJson(\n            common = common,\n            group = data.extract[Events.Group]\n          )\n\n        case _ =>\n          throw new ConnectorException(\n            s\"Cannot convert unknown type ${common.`type`} to event JSON.\"\n          )\n      }\n    } catch {\n      case e: ConnectorException => throw e\n      case e: Exception =>\n        throw new ConnectorException(\n          s\"Cannot convert $data to event JSON. ${e.getMessage}\", e\n        )\n    }\n  }\n\n  def toEventJson(common: Common, identify: Events.Identify ): JObject = {\n    import org.json4s.JsonDSL._\n    val eventProperties = \"traits\" -> identify.traits\n    toJson(common, eventProperties)\n  }\n\n  def toEventJson(common: Common, track: Events.Track): JObject = {\n    import org.json4s.JsonDSL._\n    val eventProperties =\n      (\"properties\" -> track.properties) ~\n      (\"event\" -> track.event)\n    toJson(common, eventProperties)\n  }\n\n  def toEventJson(common: Common, alias: Events.Alias): JObject = {\n    import org.json4s.JsonDSL._\n    toJson(common, \"previous_id\" -> alias.previous_id)\n  }\n\n  def toEventJson(common: Common, screen: Events.Screen): JObject = {\n    import org.json4s.JsonDSL._\n    val eventProperties =\n      (\"name\" -> screen.name) ~\n      (\"properties\" -> screen.properties)\n    toJson(common, eventProperties)\n  }\n\n  def toEventJson(common: Common, page: Events.Page): JObject = {\n    import org.json4s.JsonDSL._\n    val eventProperties =\n      (\"name\" -> page.name) ~\n      (\"properties\" -> page.properties)\n    toJson(common, eventProperties)\n  }\n\n  def toEventJson(common: Common, group: Events.Group): JObject = {\n    import org.json4s.JsonDSL._\n    val eventProperties =\n      (\"group_id\" -> group.group_id) ~\n      (\"traits\" -> group.traits)\n    toJson(common, eventProperties)\n  }\n\n  private def toJson(common: Common, props: JObject): JsonAST.JObject = {\n    val commonFields = commonToJson(common)\n    JObject((\"properties\" -> properties(common, props)) :: commonFields.obj)\n  }\n\n  private def properties(common: Common, eventProps: JObject): JObject = {\n    import org.json4s.JsonDSL._\n    common.context map { context =>\n      try {\n        (\"context\" -> Extraction.decompose(context)) ~ eventProps\n      } catch {\n        case e: Throwable =>\n          throw new ConnectorException(\n            s\"Cannot convert $context to event JSON. ${e.getMessage }\", e\n          )\n      }\n    } getOrElse eventProps\n  }\n\n  private def commonToJson(common: Common): JObject =\n    commonToJson(common, common.`type`)\n\n  private def commonToJson(common: Common, typ: String): JObject = {\n    import org.json4s.JsonDSL._\n      common.user_id.orElse(common.anonymous_id) match {\n        case Some(userId) =>\n          (\"event\" -> typ) ~\n            (\"entityType\" -> \"user\") ~\n            (\"entityId\" -> userId) ~\n            (\"eventTime\" -> common.timestamp)\n\n        case None =>\n          throw new ConnectorException(\n            \"there was no `userId` or `anonymousId` in the common fields.\"\n          )\n      }\n  }\n}\n\nobject Events {\n\n  private[predictionio] case class Track(\n    event: String,\n    properties: Option[JObject] = None\n  )\n\n  private[predictionio] case class Alias(previous_id: String, user_id: String)\n\n  private[predictionio] case class Group(\n    group_id: String,\n    traits: Option[JObject] = None\n  )\n\n  private[predictionio] case class Screen(\n    name: Option[String] = None,\n    properties: Option[JObject] = None\n  )\n\n  private[predictionio] case class Page(\n    name: Option[String] = None,\n    properties: Option[JObject] = None\n  )\n\n  private[predictionio] case class Identify(\n    user_id: String,\n    traits: Option[JObject]\n  )\n\n}\n\nobject Common {\n\n  private[predictionio] case class Integrations(\n    All: Boolean = false,\n    Mixpanel: Boolean = false,\n    Marketo: Boolean = false,\n    Salesforse: Boolean = false\n  )\n\n  private[predictionio] case class Context(\n    ip: String,\n    library: Library,\n    user_agent: String,\n    app: Option[App] = None,\n    campaign: Option[Campaign] = None,\n    device: Option[Device] = None,\n    network: Option[Network] = None,\n    location: Option[Location] = None,\n    os: Option[OS] = None,\n    referrer: Option[Referrer] = None,\n    screen: Option[Screen] = None,\n    timezone: Option[String] = None\n  )\n\n  private[predictionio] case class Screen(width: Int, height: Int, density: Int)\n\n  private[predictionio] case class Referrer(id: String, `type`: String)\n\n  private[predictionio] case class OS(name: String, version: String)\n\n  private[predictionio] case class Location(\n    city: Option[String] = None,\n    country: Option[String] = None,\n    latitude: Option[Double] = None,\n    longitude: Option[Double] = None,\n    speed: Option[Int] = None\n  )\n\n  case class Page(\n    path: String,\n    referrer: String,\n    search: String,\n    title: String,\n    url: String\n  )\n\n  private[predictionio] case class Network(\n    bluetooth: Option[Boolean] = None,\n    carrier: Option[String] = None,\n    cellular: Option[Boolean] = None,\n    wifi: Option[Boolean] = None\n  )\n\n  private[predictionio] case class Library(name: String, version: String)\n\n  private[predictionio] case class Device(\n    id: Option[String] = None,\n    advertising_id: Option[String] = None,\n    ad_tracking_enabled: Option[Boolean] = None,\n    manufacturer: Option[String] = None,\n    model: Option[String] = None,\n    name: Option[String] = None,\n    `type`: Option[String] = None,\n    token: Option[String] = None\n  )\n\n  private[predictionio] case class Campaign(\n    name: Option[String] = None,\n    source: Option[String] = None,\n    medium: Option[String] = None,\n    term: Option[String] = None,\n    content: Option[String] = None\n  )\n\n  private[predictionio] case class App(\n    name: Option[String] = None,\n    version: Option[String] = None,\n    build: Option[String] = None\n  )\n\n}\n\nprivate[predictionio] case class Common(\n  `type`: String,\n  sent_at: String,\n  timestamp: String,\n  version: String,\n  anonymous_id: Option[String] = None,\n  user_id: Option[String] = None,\n  context: Option[Common.Context] = None,\n  integrations: Option[Common.Integrations] = None\n)\n"
  },
  {
    "path": "data/src/test/resources/application.conf",
    "content": "org.apache.predictionio.data.storage {\n  sources {\n    mongodb {\n      type = mongodb\n      hosts = [localhost]\n      ports = [27017]\n    }\n    elasticsearch {\n      type = elasticsearch\n      hosts = [localhost]\n      ports = [9300]\n    }\n  }\n  repositories {\n    # This section is dummy just to make storage happy.\n    # The actual testing will not bypass these repository settings completely.\n    # Please refer to StorageTestUtils.scala.\n    settings {\n      name = \"test_predictionio\"\n      source = mongodb\n    }\n\n    appdata {\n      name = \"test_predictionio_appdata\"\n      source = mongodb\n    }\n  }\n}\n"
  },
  {
    "path": "data/src/test/scala/org/apache/predictionio/data/api/EventServiceSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.api\n\nimport akka.event.Logging\nimport org.apache.predictionio.data.storage.Storage\nimport org.specs2.mutable.Specification\nimport akka.http.scaladsl.testkit.Specs2RouteTest\n\n\nclass EventServiceSpec extends Specification with Specs2RouteTest {\n  val eventClient = Storage.getLEvents()\n  val accessKeysClient = Storage.getMetaDataAccessKeys()\n  val channelsClient = Storage.getMetaDataChannels()\n\n  val statsActorRef = system.actorSelection(\"/user/StatsActor\")\n  val pluginsActorRef = system.actorSelection(\"/user/PluginsActor\")\n\n  val logger = Logging(system, getClass)\n  val config = EventServerConfig(ip = \"0.0.0.0\", port = 7070)\n\n  val route = EventServer.createRoute(\n    eventClient,\n    accessKeysClient,\n    channelsClient,\n    logger,\n    statsActorRef,\n    pluginsActorRef,\n    config\n  )\n\n  \"GET / request\" should {\n    \"properly produce OK HttpResponses\" in {\n      Get() ~> route ~> check {\n        status.intValue() shouldEqual 200\n        responseAs[String] shouldEqual \"\"\"{\"status\":\"alive\"}\"\"\"\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "data/src/test/scala/org/apache/predictionio/data/api/SegmentIOAuthSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.api\n\nimport akka.event.Logging\nimport akka.http.scaladsl.model.ContentTypes\nimport akka.http.scaladsl.model.headers.RawHeader\nimport akka.http.scaladsl.server.Route\nimport org.apache.predictionio.data.storage._\nimport org.joda.time.DateTime\nimport org.specs2.mutable.Specification\nimport sun.misc.BASE64Encoder\nimport akka.http.scaladsl.testkit.Specs2RouteTest\n\nimport scala.concurrent.{ExecutionContext, Future}\n\nclass SegmentIOAuthSpec extends Specification with Specs2RouteTest {\n\n  sequential\n  isolated\n  val eventClient = new LEvents {\n    override def init(appId: Int, channelId: Option[Int]): Boolean = true\n\n    override def futureInsert(event: Event, appId: Int, channelId: Option[Int])\n        (implicit ec: ExecutionContext): Future[String] =\n      Future successful \"event_id\"\n\n    override def futureFind(\n      appId: Int, channelId: Option[Int], startTime: Option[DateTime],\n      untilTime: Option[DateTime], entityType: Option[String],\n      entityId: Option[String], eventNames: Option[Seq[String]],\n      targetEntityType: Option[Option[String]],\n      targetEntityId: Option[Option[String]], limit: Option[Int],\n      reversed: Option[Boolean])\n        (implicit ec: ExecutionContext): Future[Iterator[Event]] =\n      Future successful List.empty[Event].iterator\n\n    override def futureGet(eventId: String, appId: Int, channelId: Option[Int])\n        (implicit ec: ExecutionContext): Future[Option[Event]] =\n      Future successful None\n\n    override def remove(appId: Int, channelId: Option[Int]): Boolean = true\n\n    override def futureDelete(eventId: String, appId: Int, channelId: Option[Int])\n        (implicit ec: ExecutionContext): Future[Boolean] =\n      Future successful true\n\n    override def close(): Unit = {}\n  }\n  val appId = 0\n  val accessKeysClient = new AccessKeys {\n    override def insert(k: AccessKey): Option[String] = null\n    override def getByAppid(appid: Int): Seq[AccessKey] = null\n    override def update(k: AccessKey): Unit = {}\n    override def delete(k: String): Unit = {}\n    override def getAll(): Seq[AccessKey] = null\n\n    override def get(k: String): Option[AccessKey] =\n      k match {\n        case \"abc\" => Some(AccessKey(k, appId, Seq.empty))\n        case _ => None\n      }\n  }\n\n  val channelsClient = Storage.getMetaDataChannels()\n\n  val statsActorRef = system.actorSelection(\"/user/StatsActor\")\n  val pluginsActorRef = system.actorSelection(\"/user/PluginsActor\")\n\n  val base64Encoder = new BASE64Encoder\n  val logger = Logging(system, getClass)\n  val config = EventServerConfig(ip = \"0.0.0.0\", port = 7070)\n\n  val route = EventServer.createRoute(\n    eventClient,\n    accessKeysClient,\n    channelsClient,\n    logger,\n    statsActorRef,\n    pluginsActorRef,\n    config\n  )\n\n  \"Event Service\" should {\n    \"reject with CredentialsRejected with invalid credentials\" in new StorageMockContext {\n      val accessKey = \"abc123:\"\n      Post(\"/webhooks/segmentio.json\")\n          .withHeaders(RawHeader(\"Authorization\", s\"Basic $accessKey\")) ~> Route.seal(route) ~> check {\n        status.intValue() shouldEqual 401\n        responseAs[String] shouldEqual \"\"\"{\"message\":\"Invalid accessKey.\"}\"\"\"\n      }\n      success\n    }\n  }\n\n    \"reject with CredentialsMissed without credentials\" in {\n      Post(\"/webhooks/segmentio.json\") ~> Route.seal(route) ~> check {\n        status.intValue() shouldEqual 401\n        responseAs[String] shouldEqual \"\"\"{\"message\":\"Missing accessKey.\"}\"\"\"\n      }\n      success\n    }\n\n    \"process SegmentIO identity request properly\" in {\n      val jsonReq =\n        \"\"\"\n          |{\n          |  \"anonymous_id\": \"507f191e810c19729de860ea\",\n          |  \"channel\": \"browser\",\n          |  \"context\": {\n          |    \"ip\": \"8.8.8.8\",\n          |    \"userAgent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5)\"\n          |  },\n          |  \"message_id\": \"022bb90c-bbac-11e4-8dfc-aa07a5b093db\",\n          |  \"timestamp\": \"2015-02-23T22:28:55.387Z\",\n          |  \"sent_at\": \"2015-02-23T22:28:55.111Z\",\n          |  \"traits\": {\n          |    \"name\": \"Peter Gibbons\",\n          |    \"email\": \"peter@initech.com\",\n          |    \"plan\": \"premium\",\n          |    \"logins\": 5\n          |  },\n          |  \"type\": \"identify\",\n          |  \"user_id\": \"97980cfea0067\",\n          |  \"version\": \"2\"\n          |}\n        \"\"\".stripMargin\n\n      val accessKey = \"abc:\"\n      val accessKeyEncoded = base64Encoder.encodeBuffer(accessKey.getBytes)\n      Post(\"/webhooks/segmentio.json\")\n          .withHeaders(RawHeader(\"Authorization\", s\"Basic $accessKeyEncoded\"))\n          .withEntity(ContentTypes.`application/json`, jsonReq) ~> route ~> check {\n        println(responseAs[String])\n        status.intValue() shouldEqual 201\n        responseAs[String] shouldEqual \"\"\"{\"eventId\":\"event_id\"}\"\"\"\n      }\n      success\n  }\n}\n\n\n"
  },
  {
    "path": "data/src/test/scala/org/apache/predictionio/data/storage/BiMapSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport org.specs2.mutable._\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.SparkConf\nimport org.apache.spark.rdd.RDD\n\nclass BiMapSpec extends Specification {\n\n  System.clearProperty(\"spark.driver.port\")\n  System.clearProperty(\"spark.hostPort\")\n  val sc = new SparkContext(\"local[4]\", \"BiMapSpec test\")\n\n  \"BiMap created with map\" should {\n\n    val keys = Seq(1, 4, 6)\n    val orgValues = Seq(2, 5, 7)\n    val org = keys.zip(orgValues).toMap\n    val bi = BiMap(org)\n\n    \"return correct values for each key of original map\" in {\n      val biValues = keys.map(k => bi(k))\n\n      biValues must beEqualTo(orgValues)\n    }\n\n    \"get return Option[V]\" in {\n      val checkKeys = keys ++ Seq(12345)\n      val biValues = checkKeys.map(k => bi.get(k))\n      val expected = orgValues.map(Some(_)) ++ Seq(None)\n\n      biValues must beEqualTo(expected)\n    }\n\n    \"getOrElse return value for each key of original map\" in {\n      val biValues = keys.map(k => bi.getOrElse(k, -1))\n\n      biValues must beEqualTo(orgValues)\n    }\n\n    \"getOrElse return default values for invalid key\" in {\n      val keys = Seq(999, -1, -2)\n      val defaults = Seq(1234, 5678, 987)\n      val biValues = keys.zip(defaults).map{ case (k,d) => bi.getOrElse(k, d) }\n\n      biValues must beEqualTo(defaults)\n    }\n\n    \"contains() returns true/false correctly\" in {\n      val checkKeys = keys ++ Seq(12345)\n      val biValues = checkKeys.map(k => bi.contains(k))\n      val expected = orgValues.map(_ => true) ++ Seq(false)\n\n      biValues must beEqualTo(expected)\n    }\n\n    \"same size as original map\" in {\n      (bi.size) must beEqualTo(org.size)\n    }\n\n    \"take(2) returns BiMap of size 2\" in {\n      bi.take(2).size must beEqualTo(2)\n    }\n\n    \"toMap contain same element as original map\" in {\n      (bi.toMap) must beEqualTo(org)\n    }\n\n    \"toSeq contain same element as original map\" in {\n      (bi.toSeq) must containTheSameElementsAs(org.toSeq)\n    }\n\n    \"inverse and return correct keys for each values of original map\" in {\n      val biKeys = orgValues.map(v => bi.inverse(v))\n      biKeys must beEqualTo(keys)\n    }\n\n    \"inverse with same size\" in {\n      bi.inverse.size must beEqualTo(org.size)\n    }\n\n    \"inverse's inverse reference back to the same original object\" in {\n      // NOTE: reference equality\n      bi.inverse.inverse == bi\n    }\n  }\n\n  \"BiMap created with duplicated values in map\" should {\n    val dup = Map(1 -> 2, 4 -> 7, 6 -> 7)\n    \"return IllegalArgumentException\" in {\n      BiMap(dup) must throwA[IllegalArgumentException]\n    }\n  }\n\n  \"BiMap.stringLong and stringInt\" should {\n\n    \"create BiMap from set of string\" in {\n      val keys = Set(\"a\", \"b\", \"foo\", \"bar\")\n      val values: Seq[Long] = Seq(0, 1, 2, 3)\n\n      val bi = BiMap.stringLong(keys)\n      val biValues = keys.map(k => bi(k))\n\n      val biInt = BiMap.stringInt(keys)\n      val valuesInt: Seq[Int] = values.map(_.toInt)\n      val biIntValues = keys.map(k => biInt(k))\n\n      biValues must containTheSameElementsAs(values) and\n        (biIntValues must containTheSameElementsAs(valuesInt))\n    }\n\n    \"create BiMap from Array of unique string\" in {\n      val keys = Array(\"a\", \"b\", \"foo\", \"bar\")\n      val values: Seq[Long] = Seq(0, 1, 2, 3)\n\n      val bi = BiMap.stringLong(keys)\n      val biValues = keys.toSeq.map(k => bi(k))\n\n      val biInt = BiMap.stringInt(keys)\n      val valuesInt: Seq[Int] = values.map(_.toInt)\n      val biIntValues = keys.toSeq.map(k => biInt(k))\n\n      biValues must containTheSameElementsAs(values) and\n        (biIntValues must containTheSameElementsAs(valuesInt))\n    }\n\n    \"not guarantee sequential index for Array with duplicated string\" in {\n      val keys = Array(\"a\", \"b\", \"foo\", \"bar\", \"a\", \"b\", \"x\")\n      val dupValues: Seq[Long] = Seq(0, 1, 2, 3, 4, 5, 6)\n      val values = keys.zip(dupValues).toMap.values.toSeq\n\n      val bi = BiMap.stringLong(keys)\n      val biValues = keys.toSet[String].map(k => bi(k))\n\n      val biInt = BiMap.stringInt(keys)\n      val valuesInt: Seq[Int] = values.map(_.toInt)\n      val biIntValues = keys.toSet[String].map(k => biInt(k))\n\n      biValues must containTheSameElementsAs(values) and\n        (biIntValues must containTheSameElementsAs(valuesInt))\n    }\n\n    \"create BiMap from RDD[String]\" in {\n\n      val keys = Seq(\"a\", \"b\", \"foo\", \"bar\")\n      val values: Seq[Long] = Seq(0, 1, 2, 3)\n      val rdd = sc.parallelize(keys)\n\n      val bi = BiMap.stringLong(rdd)\n      val biValues = keys.map(k => bi(k))\n\n      val biInt = BiMap.stringInt(rdd)\n      val valuesInt: Seq[Int] = values.map(_.toInt)\n      val biIntValues = keys.map(k => biInt(k))\n\n      biValues must containTheSameElementsAs(values) and\n        (biIntValues must containTheSameElementsAs(valuesInt))\n    }\n\n    \"create BiMap from RDD[String] with duplicated string\" in {\n\n      val keys = Seq(\"a\", \"b\", \"foo\", \"bar\", \"a\", \"b\", \"x\")\n      val values: Seq[Long] = Seq(0, 1, 2, 3, 4)\n      val rdd = sc.parallelize(keys)\n\n      val bi = BiMap.stringLong(rdd)\n      val biValues = keys.distinct.map(k => bi(k))\n\n      val biInt = BiMap.stringInt(rdd)\n      val valuesInt: Seq[Int] = values.map(_.toInt)\n      val biIntValues = keys.distinct.map(k => biInt(k))\n\n      biValues must containTheSameElementsAs(values) and\n        (biIntValues must containTheSameElementsAs(valuesInt))\n    }\n  }\n\n  step(sc.stop())\n}\n"
  },
  {
    "path": "data/src/test/scala/org/apache/predictionio/data/storage/DataMapSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport org.specs2.mutable._\n\nclass DataMapSpec extends Specification {\n\n  \"DataMap\" should {\n\n    val properties = DataMap(\"\"\"\n      {\n        \"prop1\" : 1,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [1, 2, 3],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\", \"c\"],\n        \"prop6\" : 4.56\n      }\n      \"\"\")\n\n    \"get Int data\" in {\n      properties.get[Int](\"prop1\") must beEqualTo(1)\n      properties.getOpt[Int](\"prop1\") must beEqualTo(Some(1))\n    }\n\n    \"get String data\" in {\n      properties.get[String](\"prop2\") must beEqualTo(\"value2\")\n      properties.getOpt[String](\"prop2\") must beEqualTo(Some(\"value2\"))\n    }\n\n    \"get List of Int data\" in {\n      properties.get[List[Int]](\"prop3\") must beEqualTo(List(1,2,3))\n      properties.getOpt[List[Int]](\"prop3\") must beEqualTo(Some(List(1,2,3)))\n    }\n\n    \"get Boolean data\" in {\n      properties.get[Boolean](\"prop4\") must beEqualTo(true)\n      properties.getOpt[Boolean](\"prop4\") must beEqualTo(Some(true))\n    }\n\n    \"get List of String data\" in {\n      properties.get[List[String]](\"prop5\") must beEqualTo(List(\"a\", \"b\", \"c\", \"c\"))\n      properties.getOpt[List[String]](\"prop5\") must beEqualTo(Some(List(\"a\", \"b\", \"c\", \"c\")))\n    }\n\n    \"get Set of String data\" in {\n      properties.get[Set[String]](\"prop5\") must beEqualTo(Set(\"a\", \"b\", \"c\"))\n      properties.getOpt[Set[String]](\"prop5\") must beEqualTo(Some(Set(\"a\", \"b\", \"c\")))\n    }\n\n    \"get Double data\" in {\n      properties.get[Double](\"prop6\") must beEqualTo(4.56)\n      properties.getOpt[Double](\"prop6\") must beEqualTo(Some(4.56))\n    }\n\n    \"get empty optional Int data\" in {\n      properties.getOpt[Int](\"prop9999\") must beEqualTo(None)\n    }\n\n  }\n\n  \"DataMap with multi-level data\" should {\n    val properties = DataMap(\"\"\"\n      {\n        \"context\": {\n          \"ip\": \"1.23.4.56\",\n          \"prop1\": 2.345\n          \"prop2\": \"value1\",\n          \"prop4\": [1, 2, 3]\n        },\n        \"anotherPropertyA\": 4.567,\n        \"anotherPropertyB\": false\n      }\n      \"\"\")\n\n    \"get case class data\" in {\n      val expected = DataMapSpec.Context(\n        ip = \"1.23.4.56\",\n        prop1 = Some(2.345),\n        prop2 = Some(\"value1\"),\n        prop3 = None,\n        prop4 = List(1,2,3)\n      )\n\n      properties.get[DataMapSpec.Context](\"context\") must beEqualTo(expected)\n    }\n\n    \"get empty optional case class data\" in {\n      properties.getOpt[DataMapSpec.Context](\"context999\") must beEqualTo(None)\n    }\n\n    \"get double data\" in {\n      properties.get[Double](\"anotherPropertyA\") must beEqualTo(4.567)\n    }\n\n    \"get boolean data\" in {\n      properties.get[Boolean](\"anotherPropertyB\") must beEqualTo(false)\n    }\n  }\n\n  \"DataMap extract\" should {\n\n    \"extract to case class object\" in {\n      val properties = DataMap(\"\"\"\n        {\n          \"prop1\" : 1,\n          \"prop2\" : \"value2\",\n          \"prop3\" : [1, 2, 3],\n          \"prop4\" : true,\n          \"prop5\" : [\"a\", \"b\", \"c\", \"c\"],\n          \"prop6\" : 4.56\n        }\n        \"\"\")\n\n      val result = properties.extract[DataMapSpec.BasicProperty]\n      val expected = DataMapSpec.BasicProperty(\n        prop1 = 1,\n        prop2 = \"value2\",\n        prop3 = List(1,2,3),\n        prop4 = true,\n        prop5 = List(\"a\", \"b\", \"c\", \"c\"),\n        prop6 = 4.56\n      )\n\n      result must beEqualTo(expected)\n    }\n\n    \"extract with optional fields\" in {\n      val propertiesEmpty = DataMap(\"\"\"{}\"\"\")\n      val propertiesSome = DataMap(\"\"\"\n        {\n          \"prop1\" : 1,\n          \"prop5\" : [\"a\", \"b\", \"c\", \"c\"],\n          \"prop6\" : 4.56\n        }\n        \"\"\")\n\n      val resultEmpty = propertiesEmpty.extract[DataMapSpec.OptionProperty]\n      val expectedEmpty = DataMapSpec.OptionProperty(\n        prop1 = None,\n        prop2 = None,\n        prop3 = None,\n        prop4 = None,\n        prop5 = None,\n        prop6 = None\n      )\n\n      val resultSome = propertiesSome.extract[DataMapSpec.OptionProperty]\n      val expectedSome = DataMapSpec.OptionProperty(\n        prop1 = Some(1),\n        prop2 = None,\n        prop3 = None,\n        prop4 = None,\n        prop5 = Some(List(\"a\", \"b\", \"c\", \"c\")),\n        prop6 = Some(4.56)\n      )\n\n      resultEmpty must beEqualTo(expectedEmpty)\n      resultSome must beEqualTo(expectedSome)\n    }\n\n    \"extract to multi-level object\" in {\n      val properties = DataMap(\"\"\"\n        {\n          \"context\": {\n            \"ip\": \"1.23.4.56\",\n            \"prop1\": 2.345\n            \"prop2\": \"value1\",\n            \"prop4\": [1, 2, 3]\n          },\n          \"anotherPropertyA\": 4.567,\n          \"anotherPropertyB\": false\n        }\n        \"\"\")\n\n      val result = properties.extract[DataMapSpec.MultiLevelProperty]\n      val expected = DataMapSpec.MultiLevelProperty(\n        context = DataMapSpec.Context(\n          ip = \"1.23.4.56\",\n          prop1 = Some(2.345),\n          prop2 = Some(\"value1\"),\n          prop3 = None,\n          prop4 = List(1,2,3)\n        ),\n        anotherPropertyA = 4.567,\n        anotherPropertyB = false\n      )\n\n      result must beEqualTo(expected)\n    }\n\n  }\n}\n\nobject DataMapSpec {\n\n  // define this case class inside object to avoid case class name conflict with other tests\n  case class Context(\n    ip: String,\n    prop1: Option[Double],\n    prop2: Option[String],\n    prop3: Option[Int],\n    prop4: List[Int]\n  )\n\n  case class BasicProperty(\n    prop1: Int,\n    prop2: String,\n    prop3: List[Int],\n    prop4: Boolean,\n    prop5: List[String],\n    prop6: Double\n  )\n\n  case class OptionProperty(\n    prop1: Option[Int],\n    prop2: Option[String],\n    prop3: Option[List[Int]],\n    prop4: Option[Boolean],\n    prop5: Option[List[String]],\n    prop6: Option[Double]\n  )\n\n  case class MultiLevelProperty(\n    context: Context,\n    anotherPropertyA: Double,\n    anotherPropertyB: Boolean\n  )\n}\n"
  },
  {
    "path": "data/src/test/scala/org/apache/predictionio/data/storage/LEventAggregatorSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport org.specs2.mutable._\n\nimport org.json4s.JObject\nimport org.json4s.native.JsonMethods.parse\n\nimport org.joda.time.DateTime\n\nclass LEventAggregatorSpec extends Specification with TestEvents {\n\n  \"LEventAggregator.aggregateProperties()\" should {\n\n    \"aggregate two entities' properties as DataMap correctly\" in {\n      val events = Vector(u1e5, u2e2, u1e3, u1e1, u2e3, u2e1, u1e4, u1e2)\n      val result: Map[String, DataMap] =\n        LEventAggregator.aggregateProperties(events.toIterator)\n\n      val expected = Map(\n        \"u1\" -> DataMap(u1),\n        \"u2\" -> DataMap(u2)\n      )\n\n      result must beEqualTo(expected)\n    }\n\n    \"aggregate two entities' properties as PropertyMap correctly\" in {\n      val events = Vector(u1e5, u2e2, u1e3, u1e1, u2e3, u2e1, u1e4, u1e2)\n      val result: Map[String, PropertyMap] =\n        LEventAggregator.aggregateProperties(events.toIterator)\n\n      val expected = Map(\n        \"u1\" -> PropertyMap(u1, u1BaseTime, u1LastTime),\n        \"u2\" -> PropertyMap(u2, u2BaseTime, u2LastTime)\n      )\n\n      result must beEqualTo(expected)\n    }\n\n\n    \"aggregate deleted entity correctly\" in {\n      val events = Vector(u1e5, u2e2, u1e3, u1ed, u1e1, u2e3, u2e1, u1e4, u1e2)\n\n      val result = LEventAggregator.aggregateProperties(events.toIterator)\n      val expected = Map(\n        \"u2\" -> PropertyMap(u2, u2BaseTime, u2LastTime)\n      )\n\n      result must beEqualTo(expected)\n    }\n  }\n\n\n  \"LEventAggregator.aggregatePropertiesSingle()\" should {\n\n    \"aggregate single entity properties as DataMap correctly\" in {\n        val events = Vector(u1e5, u1e3, u1e1, u1e4, u1e2)\n        val eventsIt = events.toIterator\n\n        val result: Option[DataMap] = LEventAggregator\n          .aggregatePropertiesSingle(eventsIt)\n        val expected = DataMap(u1)\n\n        result must beEqualTo(Some(expected))\n    }\n\n    \"aggregate single entity properties as PropertyMap correctly\" in {\n        val events = Vector(u1e5, u1e3, u1e1, u1e4, u1e2)\n        val eventsIt = events.toIterator\n\n        val result: Option[PropertyMap] = LEventAggregator\n          .aggregatePropertiesSingle(eventsIt)\n        val expected = PropertyMap(u1, u1BaseTime, u1LastTime)\n\n        result must beEqualTo(Some(expected))\n    }\n\n    \"aggregate deleted entity correctly\" in {\n      // put the delete event in the middle\n      val events = Vector(u1e4, u1e2, u1ed, u1e3, u1e1, u1e5)\n      val eventsIt = events.toIterator\n\n      val result = LEventAggregator.aggregatePropertiesSingle(eventsIt)\n\n      result must beEqualTo(None)\n    }\n  }\n}\n"
  },
  {
    "path": "data/src/test/scala/org/apache/predictionio/data/storage/PEventAggregatorSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport org.specs2.mutable._\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\n\nclass PEventAggregatorSpec extends Specification with TestEvents {\n\n  System.clearProperty(\"spark.driver.port\")\n  System.clearProperty(\"spark.hostPort\")\n  val sc = new SparkContext(\"local[4]\", \"PEventAggregatorSpec test\")\n\n  \"PEventAggregator\" should {\n\n    \"aggregate two entities' properties as DataMap/PropertyMap correctly\" in {\n      val events = sc.parallelize(Seq(\n        u1e5, u2e2, u1e3, u1e1, u2e3, u2e1, u1e4, u1e2))\n\n      val users = PEventAggregator.aggregateProperties(events)\n\n      val userMap = users.collectAsMap.toMap\n      val expectedDM = Map(\n        \"u1\" -> DataMap(u1),\n        \"u2\" -> DataMap(u2)\n      )\n\n      val expectedPM = Map(\n        \"u1\" -> PropertyMap(u1, u1BaseTime, u1LastTime),\n        \"u2\" -> PropertyMap(u2, u2BaseTime, u2LastTime)\n      )\n\n      userMap must beEqualTo(expectedDM)\n      userMap must beEqualTo(expectedPM)\n    }\n\n    \"aggregate deleted entity correctly\" in {\n      // put the delete event in middle\n      val events = sc.parallelize(Seq(\n        u1e5, u2e2, u1e3, u1ed, u1e1, u2e3, u2e1, u1e4, u1e2))\n\n      val users = PEventAggregator.aggregateProperties(events)\n\n      val userMap = users.collectAsMap.toMap\n      val expectedPM = Map(\n        \"u2\" -> PropertyMap(u2, u2BaseTime, u2LastTime)\n      )\n\n      userMap must beEqualTo(expectedPM)\n    }\n\n  }\n\n  step(sc.stop())\n}\n"
  },
  {
    "path": "data/src/test/scala/org/apache/predictionio/data/storage/StorageMockContext.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage\n\nimport org.scalamock.specs2.MockContext\n\ntrait StorageMockContext extends MockContext {\n\n  if(!EnvironmentFactory.environmentService.isDefined){\n    val mockedEnvService = mock[EnvironmentService]\n    (mockedEnvService.envKeys _)\n      .expects\n      .returning(List(\"PIO_STORAGE_REPOSITORIES_METADATA_NAME\",\n        \"PIO_STORAGE_SOURCES_MYSQL_TYPE\",\n        \"PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME\",\n        \"PIO_STORAGE_SOURCES_EVENTDATA_TYPE\"))\n      .twice\n\n    (mockedEnvService.getByKey _)\n      .expects(\"PIO_STORAGE_REPOSITORIES_METADATA_NAME\")\n      .returning(\"test_metadata\")\n\n    (mockedEnvService.getByKey _)\n      .expects(\"PIO_STORAGE_REPOSITORIES_METADATA_SOURCE\")\n      .returning(\"MYSQL\")\n\n    (mockedEnvService.getByKey _)\n      .expects(\"PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME\")\n      .returning(\"test_eventdata\")\n\n    (mockedEnvService.getByKey _)\n      .expects(\"PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE\")\n      .returning(\"MYSQL\")\n\n    (mockedEnvService.getByKey _)\n      .expects(\"PIO_STORAGE_SOURCES_MYSQL_TYPE\")\n      .returning(\"jdbc\")\n\n    (mockedEnvService.filter _)\n      .expects(*)\n      .returning(Map(\n        \"URL\" -> \"jdbc:h2:~/test;MODE=MySQL;AUTO_SERVER=TRUE\",\n        \"USERNAME\" -> \"sa\",\n        \"PASSWORD\" -> \"\")\n      )\n\n    EnvironmentFactory.environmentService = new Some(mockedEnvService)\n  }\n}\n"
  },
  {
    "path": "data/src/test/scala/org/apache/predictionio/data/storage/TestEvents.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\nimport org.joda.time.DateTime\nimport org.joda.time.DateTimeZone\n\ntrait TestEvents {\n\n  val u1BaseTime = new DateTime(654321)\n  val u2BaseTime = new DateTime(6543210)\n  val u3BaseTime = new DateTime(6543410)\n\n  // u1 events\n  val u1e1 = Event(\n    event = \"$set\",\n    entityType = \"user\",\n    entityId = \"u1\",\n    properties = DataMap(\n      \"\"\"{\n        \"a\" : 1,\n        \"b\" : \"value2\",\n        \"d\" : [1, 2, 3],\n      }\"\"\"),\n    eventTime = u1BaseTime\n  )\n\n  val u1e2 = u1e1.copy(\n    event = \"$set\",\n    properties = DataMap(\"\"\"{\"a\" : 2}\"\"\"),\n    eventTime = u1BaseTime.plusDays(1)\n  )\n\n  val u1e3 = u1e1.copy(\n    event = \"$set\",\n    properties = DataMap(\"\"\"{\"b\" : \"value4\"}\"\"\"),\n    eventTime = u1BaseTime.plusDays(2)\n  )\n\n  val u1e4 = u1e1.copy(\n    event = \"$unset\",\n    properties = DataMap(\"\"\"{\"b\" : null}\"\"\"),\n    eventTime = u1BaseTime.plusDays(3)\n  )\n\n  val u1e5 = u1e1.copy(\n    event = \"$set\",\n    properties = DataMap(\"\"\"{\"e\" : \"new\"}\"\"\"),\n    eventTime = u1BaseTime.plusDays(4)\n  )\n\n  val u1LastTime = u1BaseTime.plusDays(4)\n  val u1 = \"\"\"{\"a\": 2, \"d\": [1, 2, 3], \"e\": \"new\"}\"\"\"\n\n  // delete event for u1\n  val u1ed = u1e1.copy(\n    event = \"$delete\",\n    properties = DataMap(),\n    eventTime = u1BaseTime.plusDays(5)\n  )\n\n  // u2 events\n  val u2e1 = Event(\n    event = \"$set\",\n    entityType = \"user\",\n    entityId = \"u2\",\n    properties = DataMap(\n      \"\"\"{\n        \"a\" : 21,\n        \"b\" : \"value12\",\n        \"d\" : [7, 5, 6],\n      }\"\"\"),\n    eventTime = u2BaseTime\n  )\n\n  val u2e2 = u2e1.copy(\n    event = \"$unset\",\n    properties = DataMap(\"\"\"{\"a\" : null}\"\"\"),\n    eventTime = u2BaseTime.plusDays(1)\n  )\n\n  val u2e3 = u2e1.copy(\n    event = \"$set\",\n    properties = DataMap(\"\"\"{\"b\" : \"value9\", \"g\": \"new11\"}\"\"\"),\n    eventTime = u2BaseTime.plusDays(2)\n  )\n\n  val u2LastTime = u2BaseTime.plusDays(2)\n  val u2 = \"\"\"{\"b\": \"value9\", \"d\": [7, 5, 6], \"g\": \"new11\"}\"\"\"\n\n  // u3 events\n  val u3e1 = Event(\n    event = \"$set\",\n    entityType = \"user\",\n    entityId = \"u3\",\n    properties = DataMap(\n      \"\"\"{\n        \"a\" : 22,\n        \"b\" : \"value13\",\n        \"d\" : [5, 6, 1],\n      }\"\"\"),\n    eventTime = u3BaseTime\n  )\n\n  val u3e2 = u3e1.copy(\n    event = \"$unset\",\n    properties = DataMap(\"\"\"{\"a\" : null}\"\"\"),\n    eventTime = u3BaseTime.plusDays(1)\n  )\n\n  val u3e3 = u3e1.copy(\n    event = \"$set\",\n    properties = DataMap(\"\"\"{\"b\" : \"value10\", \"f\": \"new12\", \"d\" : [1, 3, 2]}\"\"\"),\n    eventTime = u3BaseTime.plusDays(2)\n  )\n\n  val u3LastTime = u3BaseTime.plusDays(2)\n  val u3 = \"\"\"{\"b\": \"value10\", \"d\": [1, 3, 2], \"f\": \"new12\"}\"\"\"\n\n  // some random events\n  val r1 = Event(\n    event = \"my_event\",\n    entityType = \"my_entity_type\",\n    entityId = \"my_entity_id\",\n    targetEntityType = Some(\"my_target_entity_type\"),\n    targetEntityId = Some(\"my_target_entity_id\"),\n    properties = DataMap(\n      \"\"\"{\n        \"prop1\" : 1,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [1, 2, 3],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\"],\n        \"prop6\" : 4.56\n      }\"\"\"\n    ),\n    eventTime = DateTime.now,\n    prId = Some(\"my_prid\")\n  )\n  val r2 = Event(\n    event = \"my_event2\",\n    entityType = \"my_entity_type2\",\n    entityId = \"my_entity_id2\"\n  )\n  val r3 = Event(\n    event = \"my_event3\",\n    entityType = \"my_entity_type\",\n    entityId = \"my_entity_id\",\n    targetEntityType = Some(\"my_target_entity_type\"),\n    targetEntityId = Some(\"my_target_entity_id\"),\n    properties = DataMap(\n      \"\"\"{\n        \"propA\" : 1.2345,\n        \"propB\" : \"valueB\",\n      }\"\"\"\n    ),\n    prId = Some(\"my_prid\")\n  )\n  val r4 = Event(\n    event = \"my_event4\",\n    entityType = \"my_entity_type4\",\n    entityId = \"my_entity_id4\",\n    targetEntityType = Some(\"my_target_entity_type4\"),\n    targetEntityId = Some(\"my_target_entity_id4\"),\n    properties = DataMap(\n      \"\"\"{\n        \"prop1\" : 1,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [1, 2, 3],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\"],\n        \"prop6\" : 4.56\n      }\"\"\"),\n    eventTime = DateTime.now\n  )\n  val r5 = Event(\n    event = \"my_event5\",\n    entityType = \"my_entity_type5\",\n    entityId = \"my_entity_id5\",\n    targetEntityType = Some(\"my_target_entity_type5\"),\n    targetEntityId = Some(\"my_target_entity_id5\"),\n    properties = DataMap(\n      \"\"\"{\n        \"prop1\" : 1,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [1, 2, 3],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\"],\n        \"prop6\" : 4.56\n      }\"\"\"\n    ),\n    eventTime = DateTime.now\n  )\n  val r6 = Event(\n    event = \"my_event6\",\n    entityType = \"my_entity_type6\",\n    entityId = \"my_entity_id6\",\n    targetEntityType = Some(\"my_target_entity_type6\"),\n    targetEntityId = Some(\"my_target_entity_id6\"),\n    properties = DataMap(\n      \"\"\"{\n        \"prop1\" : 6,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [6, 7, 8],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\"],\n        \"prop6\" : 4.56\n      }\"\"\"\n    ),\n    eventTime = DateTime.now\n  )\n\n  // timezone\n  val tz1 = Event(\n    event = \"my_event\",\n    entityType = \"my_entity_type\",\n    entityId = \"my_entity_id0\",\n    targetEntityType = Some(\"my_target_entity_type\"),\n    targetEntityId = Some(\"my_target_entity_id\"),\n    properties = DataMap(\n      \"\"\"{\n        \"prop1\" : 1,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [1, 2, 3],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\"],\n        \"prop6\" : 4.56\n      }\"\"\"\n    ),\n    eventTime = new DateTime(12345678, DateTimeZone.forID(\"-08:00\")),\n    prId = Some(\"my_prid\")\n  )\n\n  val tz2 = Event(\n    event = \"my_event\",\n    entityType = \"my_entity_type\",\n    entityId = \"my_entity_id1\",\n    eventTime = new DateTime(12345678, DateTimeZone.forID(\"+02:00\")),\n    prId = Some(\"my_prid\")\n  )\n\n  val tz3 = Event(\n    event = \"my_event\",\n    entityType = \"my_entity_type\",\n    entityId = \"my_entity_id2\",\n    eventTime = new DateTime(12345678, DateTimeZone.forID(\"+08:00\")),\n    prId = Some(\"my_prid\")\n  )\n\n}\n"
  },
  {
    "path": "data/src/test/scala/org/apache/predictionio/data/webhooks/ConnectorTestUtil.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.webhooks\n\nimport org.specs2.execute.Result\nimport org.specs2.mutable._\n\nimport org.json4s.JObject\nimport org.json4s.DefaultFormats\nimport org.json4s.native.JsonMethods.parse\nimport org.json4s.native.Serialization.write\n\n/** TestUtil for JsonConnector */\ntrait ConnectorTestUtil extends Specification {\n\n  implicit val formats = DefaultFormats\n\n  def check(connector: JsonConnector, original: String, event: String): Result = {\n    val originalJson = parse(original).asInstanceOf[JObject]\n    val eventJson = parse(event).asInstanceOf[JObject]\n    // write and parse back to discard any JNothing field\n    val result = parse(write(connector.toEventJson(originalJson))).asInstanceOf[JObject]\n    result.obj must containTheSameElementsAs(eventJson.obj)\n  }\n\n  def check(connector: FormConnector, original: Map[String, String], event: String) = {\n\n    val eventJson = parse(event).asInstanceOf[JObject]\n    // write and parse back to discard any JNothing field\n    val result = parse(write(connector.toEventJson(original))).asInstanceOf[JObject]\n\n    result.obj must containTheSameElementsAs(eventJson.obj)\n  }\n}\n"
  },
  {
    "path": "data/src/test/scala/org/apache/predictionio/data/webhooks/exampleform/ExampleFormConnectorSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.webhooks.exampleform\n\nimport org.apache.predictionio.data.webhooks.ConnectorTestUtil\n\nimport org.specs2.mutable._\n\n/** Test the ExampleFormConnector */\nclass ExampleFormConnectorSpec extends Specification with ConnectorTestUtil {\n\n  \"ExampleFormConnector\" should {\n\n    \"convert userAction to Event JSON\" in {\n      // webhooks input\n      val userAction = Map(\n        \"type\" -> \"userAction\",\n        \"userId\" -> \"as34smg4\",\n        \"event\" -> \"do_something\",\n        \"context[ip]\" -> \"24.5.68.47\", // optional\n        \"context[prop1]\" -> \"2.345\", // optional\n        \"context[prop2]\" -> \"value1\", // optional\n        \"anotherProperty1\" -> \"100\",\n        \"anotherProperty2\"-> \"optional1\", // optional\n        \"timestamp\" -> \"2015-01-02T00:30:12.984Z\"\n      )\n\n      // expected converted Event JSON\n      val expected = \"\"\"\n        {\n          \"event\": \"do_something\",\n          \"entityType\": \"user\",\n          \"entityId\": \"as34smg4\",\n          \"properties\": {\n            \"context\": {\n              \"ip\": \"24.5.68.47\",\n              \"prop1\": 2.345\n              \"prop2\": \"value1\"\n            },\n            \"anotherProperty1\": 100,\n            \"anotherProperty2\": \"optional1\"\n          }\n          \"eventTime\": \"2015-01-02T00:30:12.984Z\"\n        }\n      \"\"\"\n\n      check(ExampleFormConnector, userAction, expected)\n    }\n\n    \"convert userAction without optional fields to Event JSON\" in {\n      // webhooks input\n      val userAction = Map(\n        \"type\" -> \"userAction\",\n        \"userId\" -> \"as34smg4\",\n        \"event\" -> \"do_something\",\n        \"anotherProperty1\" -> \"100\",\n        \"timestamp\" -> \"2015-01-02T00:30:12.984Z\"\n      )\n\n      // expected converted Event JSON\n      val expected = \"\"\"\n        {\n          \"event\": \"do_something\",\n          \"entityType\": \"user\",\n          \"entityId\": \"as34smg4\",\n          \"properties\": {\n            \"anotherProperty1\": 100,\n          }\n          \"eventTime\": \"2015-01-02T00:30:12.984Z\"\n        }\n      \"\"\"\n\n      check(ExampleFormConnector, userAction, expected)\n    }\n\n    \"convert userActionItem to Event JSON\" in {\n      // webhooks input\n      val userActionItem = Map(\n        \"type\" -> \"userActionItem\",\n        \"userId\" -> \"as34smg4\",\n        \"event\" -> \"do_something_on\",\n        \"itemId\" -> \"kfjd312bc\",\n        \"context[ip]\" -> \"1.23.4.56\",\n        \"context[prop1]\" -> \"2.345\",\n        \"context[prop2]\" -> \"value1\",\n        \"anotherPropertyA\" -> \"4.567\", // optional\n        \"anotherPropertyB\" -> \"false\", // optional\n        \"timestamp\" -> \"2015-01-15T04:20:23.567Z\"\n      )\n\n      // expected converted Event JSON\n      val expected = \"\"\"\n        {\n          \"event\": \"do_something_on\",\n          \"entityType\": \"user\",\n          \"entityId\": \"as34smg4\",\n          \"targetEntityType\": \"item\",\n          \"targetEntityId\": \"kfjd312bc\"\n          \"properties\": {\n            \"context\": {\n              \"ip\": \"1.23.4.56\",\n              \"prop1\": 2.345\n              \"prop2\": \"value1\"\n            },\n            \"anotherPropertyA\": 4.567\n            \"anotherPropertyB\": false\n          }\n          \"eventTime\": \"2015-01-15T04:20:23.567Z\"\n        }\n      \"\"\"\n\n      check(ExampleFormConnector, userActionItem, expected)\n    }\n\n    \"convert userActionItem without optional fields to Event JSON\" in {\n      // webhooks input\n      val userActionItem = Map(\n        \"type\" -> \"userActionItem\",\n        \"userId\" -> \"as34smg4\",\n        \"event\" -> \"do_something_on\",\n        \"itemId\" -> \"kfjd312bc\",\n        \"context[ip]\" -> \"1.23.4.56\",\n        \"context[prop1]\" -> \"2.345\",\n        \"context[prop2]\" -> \"value1\",\n        \"timestamp\" -> \"2015-01-15T04:20:23.567Z\"\n      )\n\n      // expected converted Event JSON\n      val expected = \"\"\"\n        {\n          \"event\": \"do_something_on\",\n          \"entityType\": \"user\",\n          \"entityId\": \"as34smg4\",\n          \"targetEntityType\": \"item\",\n          \"targetEntityId\": \"kfjd312bc\"\n          \"properties\": {\n            \"context\": {\n              \"ip\": \"1.23.4.56\",\n              \"prop1\": 2.345\n              \"prop2\": \"value1\"\n            }\n          }\n          \"eventTime\": \"2015-01-15T04:20:23.567Z\"\n        }\n      \"\"\"\n\n      check(ExampleFormConnector, userActionItem, expected)\n    }\n\n  }\n}\n"
  },
  {
    "path": "data/src/test/scala/org/apache/predictionio/data/webhooks/examplejson/ExampleJsonConnectorSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.webhooks.examplejson\n\nimport org.apache.predictionio.data.webhooks.ConnectorTestUtil\n\nimport org.specs2.mutable._\n\n/** Test the ExampleJsonConnector */\nclass ExampleJsonConnectorSpec extends Specification with ConnectorTestUtil {\n\n  \"ExampleJsonConnector\" should {\n\n    \"convert userAction to Event JSON\" in {\n      // webhooks input\n      val userAction = \"\"\"\n        {\n          \"type\": \"userAction\"\n          \"userId\": \"as34smg4\",\n          \"event\": \"do_something\",\n          \"context\": {\n            \"ip\": \"24.5.68.47\",\n            \"prop1\": 2.345\n            \"prop2\": \"value1\"\n          },\n          \"anotherProperty1\": 100,\n          \"anotherProperty2\": \"optional1\",\n          \"timestamp\": \"2015-01-02T00:30:12.984Z\"\n        }\n      \"\"\"\n\n      // expected converted Event JSON\n      val expected = \"\"\"\n        {\n          \"event\": \"do_something\",\n          \"entityType\": \"user\",\n          \"entityId\": \"as34smg4\",\n          \"properties\": {\n            \"context\": {\n              \"ip\": \"24.5.68.47\",\n              \"prop1\": 2.345\n              \"prop2\": \"value1\"\n            },\n            \"anotherProperty1\": 100,\n            \"anotherProperty2\": \"optional1\"\n          }\n          \"eventTime\": \"2015-01-02T00:30:12.984Z\"\n        }\n      \"\"\"\n\n      check(ExampleJsonConnector, userAction, expected)\n    }\n\n    \"convert userAction without optional field to Event JSON\" in {\n      // webhooks input\n      val userAction = \"\"\"\n        {\n          \"type\": \"userAction\"\n          \"userId\": \"as34smg4\",\n          \"event\": \"do_something\",\n          \"anotherProperty1\": 100,\n          \"timestamp\": \"2015-01-02T00:30:12.984Z\"\n        }\n      \"\"\"\n\n      // expected converted Event JSON\n      val expected = \"\"\"\n        {\n          \"event\": \"do_something\",\n          \"entityType\": \"user\",\n          \"entityId\": \"as34smg4\",\n          \"properties\": {\n            \"anotherProperty1\": 100,\n          }\n          \"eventTime\": \"2015-01-02T00:30:12.984Z\"\n        }\n      \"\"\"\n\n      check(ExampleJsonConnector, userAction, expected)\n    }\n\n    \"convert userActionItem to Event JSON\" in {\n      // webhooks input\n      val userActionItem = \"\"\"\n        {\n          \"type\": \"userActionItem\"\n          \"userId\": \"as34smg4\",\n          \"event\": \"do_something_on\",\n          \"itemId\": \"kfjd312bc\",\n          \"context\": {\n            \"ip\": \"1.23.4.56\",\n            \"prop1\": 2.345\n            \"prop2\": \"value1\"\n          },\n          \"anotherPropertyA\": 4.567\n          \"anotherPropertyB\": false\n          \"timestamp\": \"2015-01-15T04:20:23.567Z\"\n      }\n      \"\"\"\n\n      // expected converted Event JSON\n      val expected = \"\"\"\n        {\n          \"event\": \"do_something_on\",\n          \"entityType\": \"user\",\n          \"entityId\": \"as34smg4\",\n          \"targetEntityType\": \"item\",\n          \"targetEntityId\": \"kfjd312bc\"\n          \"properties\": {\n            \"context\": {\n              \"ip\": \"1.23.4.56\",\n              \"prop1\": 2.345\n              \"prop2\": \"value1\"\n            },\n            \"anotherPropertyA\": 4.567\n            \"anotherPropertyB\": false\n          }\n          \"eventTime\": \"2015-01-15T04:20:23.567Z\"\n        }\n      \"\"\"\n\n      check(ExampleJsonConnector, userActionItem, expected)\n    }\n\n    \"convert userActionItem without optional fields to Event JSON\" in {\n      // webhooks input\n      val userActionItem = \"\"\"\n        {\n          \"type\": \"userActionItem\"\n          \"userId\": \"as34smg4\",\n          \"event\": \"do_something_on\",\n          \"itemId\": \"kfjd312bc\",\n          \"context\": {\n            \"ip\": \"1.23.4.56\",\n            \"prop1\": 2.345\n            \"prop2\": \"value1\"\n          }\n          \"timestamp\": \"2015-01-15T04:20:23.567Z\"\n      }\n      \"\"\"\n\n      // expected converted Event JSON\n      val expected = \"\"\"\n        {\n          \"event\": \"do_something_on\",\n          \"entityType\": \"user\",\n          \"entityId\": \"as34smg4\",\n          \"targetEntityType\": \"item\",\n          \"targetEntityId\": \"kfjd312bc\"\n          \"properties\": {\n            \"context\": {\n              \"ip\": \"1.23.4.56\",\n              \"prop1\": 2.345\n              \"prop2\": \"value1\"\n            }\n          }\n          \"eventTime\": \"2015-01-15T04:20:23.567Z\"\n        }\n      \"\"\"\n\n      check(ExampleJsonConnector, userActionItem, expected)\n    }\n\n  }\n\n}\n"
  },
  {
    "path": "data/src/test/scala/org/apache/predictionio/data/webhooks/mailchimp/MailChimpConnectorSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.webhooks.mailchimp\n\nimport org.apache.predictionio.data.webhooks.ConnectorTestUtil\n\nimport org.specs2.mutable._\n\nclass MailChimpConnectorSpec extends Specification with ConnectorTestUtil {\n\n  // TODO: test other events\n  // TODO: test different optional fields\n\n  \"MailChimpConnector\" should {\n\n    \"convert subscribe to event JSON\" in {\n\n      val subscribe = Map(\n        \"type\" -> \"subscribe\",\n        \"fired_at\" -> \"2009-03-26 21:35:57\",\n        \"data[id]\" -> \"8a25ff1d98\",\n        \"data[list_id]\" -> \"a6b5da1054\",\n        \"data[email]\" -> \"api@mailchimp.com\",\n        \"data[email_type]\" -> \"html\",\n        \"data[merges][EMAIL]\" -> \"api@mailchimp.com\",\n        \"data[merges][FNAME]\" -> \"MailChimp\",\n        \"data[merges][LNAME]\" -> \"API\",\n        \"data[merges][INTERESTS]\" -> \"Group1,Group2\", //optional\n        \"data[ip_opt]\" -> \"10.20.10.30\",\n        \"data[ip_signup]\" -> \"10.20.10.30\"\n      )\n\n      val expected = \"\"\"\n        {\n          \"event\" : \"subscribe\",\n          \"entityType\" : \"user\",\n          \"entityId\" : \"8a25ff1d98\",\n          \"targetEntityType\" : \"list\",\n          \"targetEntityId\" : \"a6b5da1054\",\n          \"properties\" : {\n            \"email\" : \"api@mailchimp.com\",\n            \"email_type\" : \"html\",\n            \"merges\" : {\n              \"EMAIL\" : \"api@mailchimp.com\",\n              \"FNAME\" : \"MailChimp\",\n              \"LNAME\" : \"API\"\n              \"INTERESTS\" : \"Group1,Group2\"\n            },\n            \"ip_opt\" : \"10.20.10.30\",\n            \"ip_signup\" : \"10.20.10.30\"\n          },\n          \"eventTime\" : \"2009-03-26T21:35:57.000Z\"\n        }\n      \"\"\"\n\n      check(MailChimpConnector, subscribe, expected)\n    }\n\n    //check unsubscribe to event Json\n    \"convert unsubscribe to event JSON\" in {\n\n      val unsubscribe = Map(\n        \"type\" -> \"unsubscribe\",\n        \"fired_at\" -> \"2009-03-26 21:40:57\",\n        \"data[action]\" -> \"unsub\",\n        \"data[reason]\" -> \"manual\",\n        \"data[id]\" -> \"8a25ff1d98\",\n        \"data[list_id]\" -> \"a6b5da1054\",\n        \"data[email]\" -> \"api+unsub@mailchimp.com\",\n        \"data[email_type]\" -> \"html\",\n        \"data[merges][EMAIL]\" -> \"api+unsub@mailchimp.com\",\n        \"data[merges][FNAME]\" -> \"MailChimp\",\n        \"data[merges][LNAME]\" -> \"API\",\n        \"data[merges][INTERESTS]\" -> \"Group1,Group2\", //optional\n        \"data[ip_opt]\" -> \"10.20.10.30\",\n        \"data[campaign_id]\" -> \"cb398d21d2\"\n      )\n\n      val expected = \"\"\"\n        {\n          \"event\" : \"unsubscribe\",\n          \"entityType\" : \"user\",\n          \"entityId\" : \"8a25ff1d98\",\n          \"targetEntityType\" : \"list\",\n          \"targetEntityId\" : \"a6b5da1054\",\n          \"properties\" : {\n            \"action\" : \"unsub\",\n            \"reason\" : \"manual\",\n            \"email\" : \"api+unsub@mailchimp.com\",\n            \"email_type\" : \"html\",\n            \"merges\" : {\n              \"EMAIL\" : \"api+unsub@mailchimp.com\",\n              \"FNAME\" : \"MailChimp\",\n              \"LNAME\" : \"API\"\n              \"INTERESTS\" : \"Group1,Group2\"\n            },\n            \"ip_opt\" : \"10.20.10.30\",\n            \"campaign_id\" : \"cb398d21d2\"\n          },\n          \"eventTime\" : \"2009-03-26T21:40:57.000Z\"\n        }\n      \"\"\"\n\n      check(MailChimpConnector, unsubscribe, expected)\n    }\n\n    //check profile update to event Json\n    \"convert profile update to event JSON\" in {\n\n      val profileUpdate = Map(\n        \"type\" -> \"profile\",\n        \"fired_at\" -> \"2009-03-26 21:31:21\",\n        \"data[id]\" -> \"8a25ff1d98\",\n        \"data[list_id]\" -> \"a6b5da1054\",\n        \"data[email]\" -> \"api@mailchimp.com\",\n        \"data[email_type]\" -> \"html\",\n        \"data[merges][EMAIL]\" -> \"api@mailchimp.com\",\n        \"data[merges][FNAME]\" -> \"MailChimp\",\n        \"data[merges][LNAME]\" -> \"API\",\n        \"data[merges][INTERESTS]\" -> \"Group1,Group2\", //optional\n        \"data[ip_opt]\" -> \"10.20.10.30\"\n      )\n\n      val expected = \"\"\"\n        {\n          \"event\" : \"profile\",\n          \"entityType\" : \"user\",\n          \"entityId\" : \"8a25ff1d98\",\n          \"targetEntityType\" : \"list\",\n          \"targetEntityId\" : \"a6b5da1054\",\n          \"properties\" : {\n            \"email\" : \"api@mailchimp.com\",\n            \"email_type\" : \"html\",\n            \"merges\" : {\n              \"EMAIL\" : \"api@mailchimp.com\",\n              \"FNAME\" : \"MailChimp\",\n              \"LNAME\" : \"API\"\n              \"INTERESTS\" : \"Group1,Group2\"\n            },\n            \"ip_opt\" : \"10.20.10.30\"\n          },\n          \"eventTime\" : \"2009-03-26T21:31:21.000Z\"\n        }\n      \"\"\"\n\n      check(MailChimpConnector, profileUpdate, expected)\n    }\n\n    //check email update to event Json\n    \"convert email update to event JSON\" in {\n\n      val emailUpdate = Map(\n        \"type\" -> \"upemail\",\n        \"fired_at\" -> \"2009-03-26 22:15:09\",\n        \"data[list_id]\" -> \"a6b5da1054\",\n        \"data[new_id]\" -> \"51da8c3259\",\n        \"data[new_email]\" -> \"api+new@mailchimp.com\",\n        \"data[old_email]\" -> \"api+old@mailchimp.com\"\n      )\n\n      val expected = \"\"\"\n        {\n          \"event\" : \"upemail\",\n          \"entityType\" : \"user\",\n          \"entityId\" : \"51da8c3259\",\n          \"targetEntityType\" : \"list\",\n          \"targetEntityId\" : \"a6b5da1054\",\n          \"properties\" : {\n            \"new_email\" : \"api+new@mailchimp.com\",\n            \"old_email\" : \"api+old@mailchimp.com\"\n          },\n          \"eventTime\" : \"2009-03-26T22:15:09.000Z\"\n        }\n      \"\"\"\n\n      check(MailChimpConnector, emailUpdate, expected)\n    }\n\n    //check cleaned email to event Json\n    \"convert cleaned email to event JSON\" in {\n\n      val cleanedEmail = Map(\n        \"type\" -> \"cleaned\",\n        \"fired_at\" -> \"2009-03-26 22:01:00\",\n        \"data[list_id]\" -> \"a6b5da1054\",\n        \"data[campaign_id]\" -> \"4fjk2ma9xd\",\n        \"data[reason]\" -> \"hard\",\n        \"data[email]\" -> \"api+cleaned@mailchimp.com\"\n      )\n\n      val expected = \"\"\"\n        {\n          \"event\" : \"cleaned\",\n          \"entityType\" : \"list\",\n          \"entityId\" : \"a6b5da1054\",\n          \"properties\" : {\n            \"campaignId\" : \"4fjk2ma9xd\",\n            \"reason\" : \"hard\",\n            \"email\" : \"api+cleaned@mailchimp.com\"\n          },\n          \"eventTime\" : \"2009-03-26T22:01:00.000Z\"\n        }\n      \"\"\"\n\n      check(MailChimpConnector, cleanedEmail, expected)\n    }\n\n    //check campaign sending status to event Json\n    \"convert campaign sending status to event JSON\" in {\n\n      val campaign = Map(\n        \"type\" -> \"campaign\",\n        \"fired_at\" -> \"2009-03-26 22:15:09\",\n        \"data[id]\" -> \"5aa2102003\",\n        \"data[subject]\" -> \"Test Campaign Subject\",\n        \"data[status]\" -> \"sent\",\n        \"data[reason]\" -> \"\",\n        \"data[list_id]\" -> \"a6b5da1054\"\n      )\n\n      val expected = \"\"\"\n        {\n          \"event\" : \"campaign\",\n          \"entityType\" : \"campaign\",\n          \"entityId\" : \"5aa2102003\",\n          \"targetEntityType\" : \"list\",\n          \"targetEntityId\" : \"a6b5da1054\",\n          \"properties\" : {\n            \"subject\" : \"Test Campaign Subject\",\n            \"status\" : \"sent\",\n            \"reason\" : \"\"\n          },\n          \"eventTime\" : \"2009-03-26T22:15:09.000Z\"\n        }\n      \"\"\"\n\n      check(MailChimpConnector, campaign, expected)\n    }\n\n  }\n}\n"
  },
  {
    "path": "data/src/test/scala/org/apache/predictionio/data/webhooks/segmentio/SegmentIOConnectorSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.webhooks.segmentio\n\nimport org.apache.predictionio.data.webhooks.ConnectorTestUtil\n\nimport org.specs2.mutable._\n\nclass SegmentIOConnectorSpec extends Specification with ConnectorTestUtil {\n\n  // TODO: test different optional fields\n\n  val commonFields =\n    s\"\"\"\n       |  \"anonymous_id\": \"id\",\n       |  \"sent_at\": \"sendAt\",\n       |  \"version\": \"2\",\n     \"\"\".stripMargin\n\n  \"SegmentIOConnector\" should {\n\n    \"convert group with context to event JSON\" in {\n      val context =\n        \"\"\"\n          |  \"context\": {\n          |    \"app\": {\n          |      \"name\": \"InitechGlobal\",\n          |      \"version\": \"545\",\n          |      \"build\": \"3.0.1.545\"\n          |    },\n          |    \"campaign\": {\n          |      \"name\": \"TPS Innovation Newsletter\",\n          |      \"source\": \"Newsletter\",\n          |      \"medium\": \"email\",\n          |      \"term\": \"tps reports\",\n          |      \"content\": \"image link\"\n          |    },\n          |    \"device\": {\n          |      \"id\": \"B5372DB0-C21E-11E4-8DFC-AA07A5B093DB\",\n          |      \"advertising_id\": \"7A3CBEA0-BDF5-11E4-8DFC-AA07A5B093DB\",\n          |      \"ad_tracking_enabled\": true,\n          |      \"manufacturer\": \"Apple\",\n          |      \"model\": \"iPhone7,2\",\n          |      \"name\": \"maguro\",\n          |      \"type\": \"ios\",\n          |      \"token\": \"ff15bc0c20c4aa6cd50854ff165fd265c838e5405bfeb9571066395b8c9da449\"\n          |    },\n          |    \"ip\": \"8.8.8.8\",\n          |    \"library\": {\n          |      \"name\": \"analytics-ios\",\n          |      \"version\": \"1.8.0\"\n          |    },\n          |    \"network\": {\n          |      \"bluetooth\": false,\n          |      \"carrier\": \"T-Mobile NL\",\n          |      \"cellular\": true,\n          |      \"wifi\": false\n          |    },\n          |    \"location\": {\n          |      \"city\": \"San Francisco\",\n          |      \"country\": \"United States\",\n          |      \"latitude\": 40.2964197,\n          |      \"longitude\": -76.9411617,\n          |      \"speed\": 0\n          |    },\n          |    \"os\": {\n          |      \"name\": \"iPhone OS\",\n          |      \"version\": \"8.1.3\"\n          |    },\n          |    \"referrer\": {\n          |      \"id\": \"ABCD582CDEFFFF01919\",\n          |      \"type\": \"dataxu\"\n          |    },\n          |    \"screen\": {\n          |      \"width\": 320,\n          |      \"height\": 568,\n          |      \"density\": 2\n          |    },\n          |    \"timezone\": \"Europe/Amsterdam\",\n          |    \"user_agent\": \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5)\"\n          |  }\n        \"\"\".stripMargin\n\n      val group =\n        s\"\"\"\n           |{ $commonFields\n            |  \"type\": \"group\",\n            |  \"group_id\": \"groupId\",\n            |  \"user_id\": \"userIdValue\",\n            |  \"timestamp\" : \"2012-12-02T00:30:08.276Z\",\n            |  \"traits\": {\n            |    \"name\": \"groupName\",\n            |    \"employees\": 329,\n            |  },\n            |  $context\n            |}\n        \"\"\".stripMargin\n\n      val expected =\n        s\"\"\"\n          |{\n          |  \"event\": \"group\",\n          |  \"entityType\": \"user\",\n          |  \"entityId\": \"userIdValue\",\n          |  \"properties\": {\n          |    $context,\n          |    \"group_id\": \"groupId\",\n          |    \"traits\": {\n          |      \"name\": \"groupName\",\n          |      \"employees\": 329\n          |    },\n          |  },\n          |  \"eventTime\" : \"2012-12-02T00:30:08.276Z\"\n          |}\n        \"\"\".stripMargin\n\n      check(SegmentIOConnector, group, expected)\n    }\n\n    \"convert group to event JSON\" in {\n      val group =\n        s\"\"\"\n          |{ $commonFields\n          |  \"type\": \"group\",\n          |  \"group_id\": \"groupId\",\n          |  \"user_id\": \"userIdValue\",\n          |  \"timestamp\" : \"2012-12-02T00:30:08.276Z\",\n          |  \"traits\": {\n          |    \"name\": \"groupName\",\n          |    \"employees\": 329,\n          |  }\n          |}\n        \"\"\".stripMargin\n\n      val expected =\n        \"\"\"\n          |{\n          |  \"event\": \"group\",\n          |  \"entityType\": \"user\",\n          |  \"entityId\": \"userIdValue\",\n          |  \"properties\": {\n          |    \"group_id\": \"groupId\",\n          |    \"traits\": {\n          |      \"name\": \"groupName\",\n          |      \"employees\": 329\n          |    }\n          |  },\n          |  \"eventTime\" : \"2012-12-02T00:30:08.276Z\"\n          |}\n        \"\"\".stripMargin\n\n      check(SegmentIOConnector, group, expected)\n    }\n\n    \"convert screen to event JSON\" in {\n      val screen =\n        s\"\"\"\n          |{ $commonFields\n          |  \"type\": \"screen\",\n          |  \"name\": \"screenName\",\n          |  \"user_id\": \"userIdValue\",\n          |  \"timestamp\" : \"2012-12-02T00:30:08.276Z\",\n          |  \"properties\": {\n          |    \"variation\": \"screenVariation\"\n          |  }\n          |}\n        \"\"\".stripMargin\n\n      val expected =\n        \"\"\"\n          |{\n          |  \"event\": \"screen\",\n          |  \"entityType\": \"user\",\n          |  \"entityId\": \"userIdValue\",\n          |  \"properties\": {\n          |    \"properties\": {\n          |      \"variation\": \"screenVariation\"\n          |    },\n          |    \"name\": \"screenName\"\n          |  },\n          |  \"eventTime\" : \"2012-12-02T00:30:08.276Z\"\n          |}\n        \"\"\".stripMargin\n\n      check(SegmentIOConnector, screen, expected)\n    }\n\n    \"convert page to event JSON\" in {\n      val page =\n       s\"\"\"\n          |{ $commonFields\n          |  \"type\": \"page\",\n          |  \"name\": \"pageName\",\n          |  \"user_id\": \"userIdValue\",\n          |  \"timestamp\" : \"2012-12-02T00:30:08.276Z\",\n          |  \"properties\": {\n          |    \"title\": \"pageTitle\",\n          |    \"url\": \"pageUrl\"\n          |  }\n          |}\n        \"\"\".stripMargin\n\n      val expected =\n        \"\"\"\n          |{\n          |  \"event\": \"page\",\n          |  \"entityType\": \"user\",\n          |  \"entityId\": \"userIdValue\",\n          |  \"properties\": {\n          |    \"properties\": {\n          |      \"title\": \"pageTitle\",\n          |      \"url\": \"pageUrl\"\n          |    },\n          |    \"name\": \"pageName\"\n          |  },\n          |  \"eventTime\" : \"2012-12-02T00:30:08.276Z\"\n          |}\n        \"\"\".stripMargin\n\n      check(SegmentIOConnector, page, expected)\n    }\n\n    \"convert alias to event JSON\" in {\n      val alias =\n        s\"\"\"\n          |{ $commonFields\n          |  \"type\": \"alias\",\n          |  \"previous_id\": \"previousIdValue\",\n          |  \"user_id\": \"userIdValue\",\n          |  \"timestamp\" : \"2012-12-02T00:30:08.276Z\"\n          |}\n        \"\"\".stripMargin\n\n      val expected =\n        \"\"\"\n          |{\n          |  \"event\": \"alias\",\n          |  \"entityType\": \"user\",\n          |  \"entityId\": \"userIdValue\",\n          |  \"properties\": {\n          |    \"previous_id\" : \"previousIdValue\"\n          |  },\n          |  \"eventTime\" : \"2012-12-02T00:30:08.276Z\"\n          |}\n        \"\"\".stripMargin\n\n      check(SegmentIOConnector, alias, expected)\n    }\n\n    \"convert track to event JSON\" in {\n      val track =\n       s\"\"\"\n          |{ $commonFields\n          |  \"user_id\": \"some_user_id\",\n          |  \"type\": \"track\",\n          |  \"event\": \"Registered\",\n          |  \"timestamp\" : \"2012-12-02T00:30:08.276Z\",\n          |  \"properties\": {\n          |    \"plan\": \"Pro Annual\",\n          |    \"accountType\" : \"Facebook\"\n          |  }\n          |}\n        \"\"\".stripMargin\n\n      val expected =\n        \"\"\"\n          |{\n          |  \"event\": \"track\",\n          |  \"entityType\": \"user\",\n          |  \"entityId\": \"some_user_id\",\n          |  \"properties\": {\n          |    \"event\": \"Registered\",\n          |    \"properties\": {\n          |      \"plan\": \"Pro Annual\",\n          |      \"accountType\": \"Facebook\"\n          |    }\n          |  },\n          |  \"eventTime\" : \"2012-12-02T00:30:08.276Z\"\n          |}\n        \"\"\".stripMargin\n\n      check(SegmentIOConnector, track, expected)\n    }\n\n    \"convert identify to event JSON\" in {\n      val identify = s\"\"\"\n        { $commonFields\n          \"type\"      : \"identify\",\n          \"user_id\"    : \"019mr8mf4r\",\n          \"traits\"    : {\n              \"email\"            : \"achilles@segment.com\",\n              \"name\"             : \"Achilles\",\n              \"subscription_plan\" : \"Premium\",\n              \"friendCount\"      : 29\n          },\n          \"timestamp\" : \"2012-12-02T00:30:08.276Z\"\n        }\n      \"\"\"\n\n      val expected = \"\"\"\n        {\n          \"event\" : \"identify\",\n          \"entityType\": \"user\",\n          \"entityId\" : \"019mr8mf4r\",\n          \"properties\" : {\n            \"traits\" : {\n              \"email\"            : \"achilles@segment.com\",\n              \"name\"             : \"Achilles\",\n              \"subscription_plan\" : \"Premium\",\n              \"friendCount\"      : 29\n            }\n          },\n          \"eventTime\" : \"2012-12-02T00:30:08.276Z\"\n        }\n      \"\"\"\n\n      check(SegmentIOConnector, identify, expected)\n    }\n\n  }\n\n}\n"
  },
  {
    "path": "data/test-form.sh",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\naccessKey=$1\n\n# normal subscribe event\ncurl -i -X POST http://localhost:7070/webhooks/mailchimp?accessKey=$accessKey \\\n-H \"Content-type: application/x-www-form-urlencoded\" \\\n--data-urlencode \"type\"=\"subscribe\" \\\n--data-urlencode \"fired_at\"=\"2009-03-26 21:35:57\" \\\n--data-urlencode \"data[id]\"=\"8a25ff1d98\" \\\n--data-urlencode \"data[list_id]\"=\"a6b5da1054\" \\\n--data-urlencode \"data[email]\"=\"api@mailchimp.com\" \\\n--data-urlencode \"data[email_type]\"=\"html\" \\\n--data-urlencode \"data[merges][EMAIL]\"=\"api@mailchimp.com\" \\\n--data-urlencode \"data[merges][FNAME]\"=\"MailChimp\" \\\n--data-urlencode \"data[merges][LNAME]\"=\"API\" \\\n--data-urlencode \"data[merges][INTERESTS]\"=\"Group1,Group2\" \\\n--data-urlencode \"data[ip_opt]\"=\"10.20.10.30\" \\\n--data-urlencode \"data[ip_signup]\"=\"10.20.10.30\" \\\n-w %{time_total}\n\n# normal unsubscribe event\ncurl -i -X POST http://localhost:7070/webhooks/mailchimp?accessKey=$accessKey \\\n-H \"Content-type: application/x-www-form-urlencoded\" \\\n--data-urlencode \"type\"=\"unsubscribe\" \\\n--data-urlencode \"fired_at\"=\"2009-03-26 21:40:57\" \\\n--data-urlencode \"data[action]\"=\"unsub\" \\\n--data-urlencode \"data[reason]\"=\"manual\" \\\n--data-urlencode \"data[id]\"=\"8a25ff1d98\" \\\n--data-urlencode \"data[list_id]\"=\"a6b5da1054\" \\\n--data-urlencode \"data[email]\"=\"api+unsub@mailchimp.com\" \\\n--data-urlencode \"data[email_type]\"=\"html\" \\\n--data-urlencode \"data[merges][EMAIL]\"=\"api+unsub@mailchimp.com\" \\\n--data-urlencode \"data[merges][FNAME]\"=\"MailChimp\" \\\n--data-urlencode \"data[merges][LNAME]\"=\"API\" \\\n--data-urlencode \"data[merges][INTERESTS]\"=\"Group1,Group2\" \\\n--data-urlencode \"data[ip_opt]\"=\"10.20.10.30\" \\\n--data-urlencode \"data[campaign_id]\"=\"cb398d21d2\" \\\n-w %{time_total}\n\n# normal profile update event\ncurl -i -X POST http://localhost:7070/webhooks/mailchimp?accessKey=$accessKey \\\n-H \"Content-type: application/x-www-form-urlencoded\" \\\n--data-urlencode \"type\"=\"profile\" \\\n--data-urlencode \"fired_at\"=\"2009-03-26 21:31:21\" \\\n--data-urlencode \"data[id]\"=\"8a25ff1d98\" \\\n--data-urlencode \"data[list_id]\"=\"a6b5da1054\" \\\n--data-urlencode \"data[email]\"=\"api@mailchimp.com\" \\\n--data-urlencode \"data[email_type]\"=\"html\" \\\n--data-urlencode \"data[merges][EMAIL]\"=\"api@mailchimp.com\" \\\n--data-urlencode \"data[merges][FNAME]\"=\"MailChimp\" \\\n--data-urlencode \"data[merges][LNAME]\"=\"API\" \\\n--data-urlencode \"data[merges][INTERESTS]\"=\"Group1,Group2\" \\\n--data-urlencode \"data[ip_opt]\"=\"10.20.10.30\" \\\n-w %{time_total}\n\n# normal email update event\ncurl -i -X POST http://localhost:7070/webhooks/mailchimp?accessKey=$accessKey \\\n-H \"Content-type: application/x-www-form-urlencoded\" \\\n--data-urlencode \"type\"=\"upemail\" \\\n--data-urlencode \"fired_at\"=\"2009-03-26 22:15:09\" \\\n--data-urlencode \"data[list_id]\"=\"a6b5da1054\" \\\n--data-urlencode \"data[new_id]\"=\"51da8c3259\" \\\n--data-urlencode \"data[new_email]\"=\"api+new@mailchimp.com\" \\\n--data-urlencode \"data[old_email]\"=\"api+old@mailchimp.com\" \\\n-w %{time_total}\n\n# normal cleaned email event\ncurl -i -X POST http://localhost:7070/webhooks/mailchimp?accessKey=$accessKey \\\n-H \"Content-type: application/x-www-form-urlencoded\" \\\n--data-urlencode \"type\"=\"cleaned\" \\\n--data-urlencode \"fired_at\"=\"2009-03-26 22:01:00\" \\\n--data-urlencode \"data[list_id]\"=\"a6b5da1054\" \\\n--data-urlencode \"data[campaign_id]\"=\"4fjk2ma9xd\" \\\n--data-urlencode \"data[reason]\"=\"hard\" \\\n--data-urlencode \"data[email]\"=\"api+cleaned@mailchimp.com\" \\\n-w %{time_total}\n\n# normal campaign sending status event\ncurl -i -X POST http://localhost:7070/webhooks/mailchimp?accessKey=$accessKey \\\n-H \"Content-type: application/x-www-form-urlencoded\" \\\n--data-urlencode \"type\"=\"campaign\" \\\n--data-urlencode \"fired_at\"=\"2009-03-26 21:31:21\" \\\n--data-urlencode \"data[id]\"=\"5aa2102003\" \\\n--data-urlencode \"data[subject]\"=\"Test Campaign Subject\" \\\n--data-urlencode \"data[status]\"=\"sent\" \\\n--data-urlencode \"data[reason]\"=\"\" \\\n--data-urlencode \"data[list_id]\"=\"a6b5da1054\" \\\n-w %{time_total}\n\n# invalid type\ncurl -i -X POST http://localhost:7070/webhooks/mailchimp?accessKey=$accessKey \\\n-H \"Content-type: application/x-www-form-urlencoded\" \\\n--data-urlencode \"type\"=\"something_invalid\" \\\n--data-urlencode \"fired_at\"=\"2009-03-26 21:35:57\" \\\n--data-urlencode \"data[id]\"=\"8a25ff1d98\" \\\n--data-urlencode \"data[list_id]\"=\"a6b5da1054\" \\\n--data-urlencode \"data[email]\"=\"api@mailchimp.com\" \\\n--data-urlencode \"data[email_type]\"=\"html\" \\\n--data-urlencode \"data[merges][EMAIL]\"=\"api@mailchimp.com\" \\\n--data-urlencode \"data[merges][FNAME]\"=\"MailChimp\" \\\n--data-urlencode \"data[merges][LNAME]\"=\"API\" \\\n--data-urlencode \"data[merges][INTERESTS]\"=\"Group1,Group2\" \\\n--data-urlencode \"data[ip_opt]\"=\"10.20.10.30\" \\\n--data-urlencode \"data[ip_signup]\"=\"10.20.10.30\" \\\n-w %{time_total}\n\n# missing data (type)\ncurl -i -X POST http://localhost:7070/webhooks/mailchimp?accessKey=$accessKey \\\n-H \"Content-type: application/x-www-form-urlencoded\" \\\n--data-urlencode \"fired_at\"=\"2009-03-26 21:35:57\" \\\n--data-urlencode \"data[id]\"=\"8a25ff1d98\" \\\n--data-urlencode \"data[list_id]\"=\"a6b5da1054\" \\\n--data-urlencode \"data[email]\"=\"api@mailchimp.com\" \\\n--data-urlencode \"data[email_type]\"=\"html\" \\\n--data-urlencode \"data[merges][EMAIL]\"=\"api@mailchimp.com\" \\\n--data-urlencode \"data[merges][FNAME]\"=\"MailChimp\" \\\n--data-urlencode \"data[merges][LNAME]\"=\"API\" \\\n--data-urlencode \"data[merges][INTERESTS]\"=\"Group1,Group2\" \\\n--data-urlencode \"data[ip_opt]\"=\"10.20.10.30\" \\\n--data-urlencode \"data[ip_signup]\"=\"10.20.10.30\" \\\n-w %{time_total}\n\n# invalid webhooks path\ncurl -i -X POST http://localhost:7070/webhooks/invalid?accessKey=$accessKey \\\n-H \"Content-type: application/x-www-form-urlencoded\" \\\n--data-urlencode \"type\"=\"subscribe\" \\\n--data-urlencode \"fired_at\"=\"2009-03-26 21:35:57\" \\\n--data-urlencode \"data[id]\"=\"8a25ff1d98\" \\\n--data-urlencode \"data[list_id]\"=\"a6b5da1054\" \\\n--data-urlencode \"data[email]\"=\"api@mailchimp.com\" \\\n--data-urlencode \"data[email_type]\"=\"html\" \\\n--data-urlencode \"data[merges][EMAIL]\"=\"api@mailchimp.com\" \\\n--data-urlencode \"data[merges][FNAME]\"=\"MailChimp\" \\\n--data-urlencode \"data[merges][LNAME]\"=\"API\" \\\n--data-urlencode \"data[merges][INTERESTS]\"=\"Group1,Group2\" \\\n--data-urlencode \"data[ip_opt]\"=\"10.20.10.30\" \\\n--data-urlencode \"data[ip_signup]\"=\"10.20.10.30\" \\\n-w %{time_total}\n\n# get normal\ncurl -i -X GET http://localhost:7070/webhooks/mailchimp?accessKey=$accessKey \\\n-H \"Content-type: application/x-www-form-urlencoded\" \\\n-w %{time_total}\n\n# get invalid\ncurl -i -X GET http://localhost:7070/webhooks/invalid?accessKey=$accessKey \\\n-H \"Content-type: application/x-www-form-urlencoded\" \\\n-w %{time_total}\n"
  },
  {
    "path": "data/test-normal.sh",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\naccessKey=$1\n\ncurl -i -X POST http://localhost:7070/events.json?accessKey=$1 \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"my_event1\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n  \"eventTime\" : \"2004-12-13T21:39:45.618-07:00\"\n}' \\\n-w %{time_total}\n"
  },
  {
    "path": "data/test-segmentio.sh",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\naccessKey=$1\n\n# normal case\ncurl -H \"Accept: application/json; version=2.0\" \\\n     http://spec.segment.com/generate/identify | \\\ncurl -X POST \\\n    -H \"Content-Type: application/json\" \\\n    -d @- \\\n    http://localhost:7070/webhooks/segmentio.json?accessKey=$accessKey\necho ''\n\n# normal case api key in header for identify event\ncurl -H \"Accept: application/json; version=2.0\" \\\n     http://spec.segment.com/generate/identify | \\\ncurl -X POST \\\n     --user \"$accessKey:\" \\\n     -H \"Content-Type: application/json\" \\\n     -d @- \\\n     http://localhost:7070/webhooks/segmentio.json\necho ''\n\n# normal case api key in header for track event\ncurl -H \"Accept: application/json; version=2.0\" \\\n     http://spec.segment.com/generate/track | \\\ncurl -X POST \\\n     --user \"$accessKey:\" \\\n     -H \"Content-Type: application/json\" \\\n     -d @- \\\n     http://localhost:7070/webhooks/segmentio.json\necho ''\n\n# normal case api key in header for page event\ncurl -H \"Accept: application/json; version=2.0\" \\\n     http://spec.segment.com/generate/page | \\\ncurl -X POST \\\n     --user \"$accessKey:\" \\\n     -H \"Content-Type: application/json\" \\\n     -d @- \\\n     http://localhost:7070/webhooks/segmentio.json\necho ''\n\n# normal case api key in header for screen event\ncurl -H \"Accept: application/json; version=2.0\" \\\n     http://spec.segment.com/generate/screen | \\\ncurl -X POST \\\n     --user \"$accessKey:\" \\\n     -H \"Content-Type: application/json\" \\\n     -d @- \\\n     http://localhost:7070/webhooks/segmentio.json\necho ''\n\n# normal case api key in header for group event\ncurl -H \"Accept: application/json; version=2.0\" \\\n     http://spec.segment.com/generate/group | \\\ncurl -X POST \\\n     --user \"$accessKey:\" \\\n     -H \"Content-Type: application/json\" \\\n     -d @- \\\n     http://localhost:7070/webhooks/segmentio.json\necho ''\n\n# normal case api key in header for alias event\ncurl -H \"Accept: application/json; version=2.0\" \\\n     http://spec.segment.com/generate/alias | \\\ncurl -X POST \\\n     --user \"$accessKey:\" \\\n     -H \"Content-Type: application/json\" \\\n     -d @- \\\n     http://localhost:7070/webhooks/segmentio.json\necho ''\n\n# invalid type\ncurl -i -X POST http://localhost:7070/webhooks/segmentio.json?accessKey=$accessKey \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"version\"   : 1,\n  \"type\"      : \"invalid_type\",\n  \"userId\"    : \"019mr8mf4r\",\n  \"sent_at\":\"2015-08-21T15:25:32.799Z\",\n  \"traits\"    : {\n      \"email\"            : \"achilles@segment.com\",\n      \"name\"             : \"Achilles\",\n      \"subscriptionPlan\" : \"Premium\",\n      \"friendCount\"      : 29\n  },\n  \"timestamp\" : \"2012-12-02T00:30:08.276Z\"\n}' \\\n-w %{time_total}\necho ''\n\n# invalid data format\ncurl -i -X POST http://localhost:7070/webhooks/segmentio.json?accessKey=$accessKey \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"version\"   : 1,\n  \"userId\"    : \"019mr8mf4r\",\n  \"sent_at\":\"2015-08-21T15:25:32.799Z\",\n  \"traits\"    : {\n      \"email\"            : \"achilles@segment.com\",\n      \"name\"             : \"Achilles\",\n      \"subscriptionPlan\" : \"Premium\",\n      \"friendCount\"      : 29\n  },\n  \"timestamp\" : \"2012-12-02T00:30:08.276Z\"\n}' \\\n-w %{time_total}\necho ''\n\n# invalid webhooks path\ncurl -i -X POST http://localhost:7070/webhooks/invalidpath.json?accessKey=$accessKey \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"version\"   : 1,\n  \"type\"      : \"identify\",\n  \"userId\"    : \"019mr8mf4r\",\n  \"sent_at\":\"2015-08-21T15:25:32.799Z\",\n  \"traits\"    : {\n      \"email\"            : \"achilles@segment.com\",\n      \"name\"             : \"Achilles\",\n      \"subscriptionPlan\" : \"Premium\",\n      \"friendCount\"      : 29\n  },\n  \"timestamp\" : \"2012-12-02T00:30:08.276Z\"\n}' \\\n-w %{time_total}\necho ''\n\n# get request\ncurl -i -X GET http://localhost:7070/webhooks/segmentio.json?accessKey=$accessKey \\\n-H \"Content-Type: application/json\" \\\n-w %{time_total}\necho ''\n\n# get invalid\ncurl -i -X GET http://localhost:7070/webhooks/invalidpath.json?accessKey=$accessKey \\\n-H \"Content-Type: application/json\" \\\n-w %{time_total}\necho ''\n"
  },
  {
    "path": "data/test.sh",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# simple test script for dataapi\naccessKey=$1\n\nfunction checkGET () {\n  resp=$( curl -i -s -X GET \"http://localhost:7070$1\" )\n  status=$( echo \"$resp\" | grep HTTP/1.1 )\n  exp=$2\n  if [[ $status =~ (.*HTTP/1.1 $exp [a-zA-Z]+) ]]; then\n  echo \"[pass] GET $1 $status\"\n  else\n  echo \"[fail] GET $1 $resp\"\n  echo \"expect $exp\"\n  exit -1\n  fi\n}\n\n\nfunction checkPOST () {\n  resp=$( curl -i -s -X POST http://localhost:7070$1 \\\n  -H \"Content-Type: application/json\" \\\n  -d \"$2\" )\n  status=$( echo \"$resp\" | grep HTTP/1.1 )\n  exp=$3\n  if [[ $status =~ (.*HTTP/1.1 $exp [a-zA-Z]+) ]]; then\n  #echo \"POST $1 $2 good $status\"\n  echo \"[pass] POST $1 $status\"\n  else\n  echo \"[fail] POST $1 $2 $resp\"\n  echo \"expect $exp\"\n  exit -1\n  fi\n}\n\n# ---------------\n# status\n# ----------------\n\ncheckGET \"/\" 200\n\n\n# -----------\n# reserved events\n# ------------\n\ntestdata='{\n  \"event\" : \"$set\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"properties\" : {\n    \"prop1\" : 1,\n  }\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 201\n\ntestdata='{\n  \"event\" : \"$unset\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"properties\" : {\n    \"prop1\" : \"\",\n  }\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 201\n\ntestdata='{\n  \"event\" : \"$delete\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 201\n\ntestdata='{\n  \"event\" : \"$xxxx\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : \"my_target_entity_type\",\n  \"targetEntityId\" : \"my_target_entity_id\",\n  \"properties\" : {\n    \"prop1\" : 1,\n  }\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 400\n\n# -------------\n# create events\n# -------------\n\n\n# full\ntestdata='{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : \"my_target_entity_type\",\n  \"targetEntityId\" : \"my_target_entity_id\",\n  \"properties\" : {\n    \"prop1\" : 1,\n    \"prop2\" : \"value2\",\n    \"prop3\" : [1, 2, 3],\n    \"prop4\" : true,\n    \"prop5\" : [\"a\", \"b\", \"c\"],\n    \"prop6\" : 4.56\n  }\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 201\n\n# different time zone\ntestdata='{\n  \"event\" : \"my_event_tzone\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"eventTime\" : \"2004-12-13T21:39:45.618-08:00\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 201\n\ntestdata='{\n  \"event\" : \"my_event_tzone\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"eventTime\" : \"2004-12-13T21:39:45.618+02:00\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 201\n\n# invalid timezone\ntestdata='{\n  \"event\" : \"my_event_tzone\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"eventTime\" : \"2004-12-13T21:39:45.618ABC\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 400\n\n\n# invalid timezone\ntestdata='{\n  \"event\" : \"my_event_tzone\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"eventTime\" : \"2004-12-13T21:39:45.618+1\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 400\n\n# no properties\ntestdata='{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : \"my_target_entity_type\",\n  \"targetEntityId\" : \"my_target_entity_id\",\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 201\n\n# no properties with $unset event\ntestdata='{\n  \"event\" : \"$unset\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 400\n\ntestdata='{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : \"my_target_entity_type\",\n  \"targetEntityId\" : \"my_target_entity_id\",\n  \"eventTime\" : \"2004-12-14T21:39:45.618Z\",\n  \"properties\": {}\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 201\n\n# no properties with $unset event\ntestdata='{\n  \"event\" : \"$unset\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : \"my_target_entity_type\",\n  \"targetEntityId\" : \"my_target_entity_id\",\n  \"eventTime\" : \"2004-12-14T21:39:45.618Z\",\n  \"properties\": {}\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 400\n\n# no tags\ntestdata='{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : \"my_target_entity_type\",\n  \"targetEntityId\" : \"my_target_entity_id\",\n  \"properties\" : {\n    \"prop1\" : \"value1\",\n    \"prop2\" : \"value2\"\n  }\n  \"eventTime\" : \"2004-12-15T21:39:45.618Z\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 201\n\n## no eventTIme\ntestdata='{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : \"my_target_entity_type\",\n  \"targetEntityId\" : \"my_target_entity_id\",\n  \"properties\" : {\n    \"prop1\" : \"value1\",\n    \"prop2\" : \"value2\"\n  }\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 201\n\n## with prid\ntestdata='{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : \"my_target_entity_type\",\n  \"targetEntityId\" : \"my_target_entity_id\",\n  \"properties\" : {\n    \"prop1\" : \"value1\",\n    \"prop2\" : \"value2\"\n  }\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n  \"prId\" : \"asfasfdsafdcsdFDWd\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 201\n\n# minimum\ntestdata='{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 201\n\n\n# check accepting null for optional fields\ntestdata='{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : null,\n  \"targetEntityId\" : null,\n  \"properties\" : null,\n  \"eventTime\" : null\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 201\n\ntestdata='{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : null,\n  \"targetEntityId\" : null,\n  \"properties\" : {\n    \"prop1\": 1,\n    \"prop2\": null\n  },\n  \"eventTime\" : null\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 201\n\n# ----------------------------\n# create events error cases\n# ----------------------------\n\n# missing event\ntestdata='{\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : \"my_target_entity_type\",\n  \"targetEntityId\" : \"my_target_entity_id\",\n  \"properties\" : {\n    \"prop1\" : 1,\n    \"prop2\" : \"value2\",\n    \"prop3\" : [1, 2, 3],\n    \"prop4\" : true,\n    \"prop5\" : [\"a\", \"b\", \"c\"],\n    \"prop6\" : 4.56\n  }\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\n\n# missing entityType\ntestdata='{\n  \"event\" : \"my_event\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : \"my_target_entity_type\",\n  \"targetEntityId\" : \"my_target_entity_id\",\n  \"properties\" : {\n    \"prop1\" : 1,\n    \"prop2\" : \"value2\",\n    \"prop3\" : [1, 2, 3],\n    \"prop4\" : true,\n    \"prop5\" : [\"a\", \"b\", \"c\"],\n    \"prop6\" : 4.56\n  }\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\n\n# missing entityId\ntestdata='{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"my_entity_type\",\n  \"properties\" : {\n    \"prop1\" : \"value1\",\n    \"prop2\" : \"value2\"\n  }\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 400\n\n# empty event string\ntestdata='{\n  \"event\" : \"\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : \"my_target_entity_type\",\n  \"targetEntityId\" : \"my_target_entity_id\",\n  \"properties\" : {\n    \"prop1\" : \"value1\",\n    \"prop2\" : \"value2\"\n  }\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\n\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 400\n\n# empty\ntestdata='{}'\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 400\n\n# empty\ntestdata=''\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 400\n\n# invalid data\ntestdata='asfd'\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 400\n\n# invalid pio_ entityType\ntestdata='{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"pio_xx\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : \"my_target_entity_type\",\n  \"targetEntityId\" : \"my_target_entity_id\",\n  \"properties\" : {\n    \"prop1\" : 1,\n    \"prop2\" : \"value2\"\n  }\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 400\n\n# invalid pio_ targetEntityType\ntestdata='{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"food\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : \"pio_xxx\",\n  \"targetEntityId\" : \"my_target_entity_id\",\n  \"properties\" : {\n    \"prop1\" : 1,\n    \"prop2\" : \"value2\"\n  }\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 400\n\n# invalid pio_ properties\ntestdata='{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"food\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : \"food2\",\n  \"targetEntityId\" : \"my_target_entity_id\",\n  \"properties\" : {\n    \"pio_aaa\" : 1,\n    \"prop2\" : \"value2\"\n  }\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 400\n\n# valid pio_pr entityType\ntestdata='{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"pio_pr\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : \"my_target_entity_type\",\n  \"targetEntityId\" : \"my_target_entity_id\",\n  \"properties\" : {\n    \"prop1\" : 1,\n    \"prop2\" : \"value2\"\n  }\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\ncheckPOST \"/events.json?accessKey=$accessKey\" \"$testdata\" 201\n\n# -----\n# get events\n# ----\n\ncheckGET \"/events.json?accessKey=$accessKey\" 200\n\n# invalid accessKey\ncheckGET \"/events.json?accessKey=999\" 401\n\ncheckGET \"/events.json?accessKey=$accessKey&startTime=abc\" 400\n\ncheckGET \"/events.json?accessKey=$accessKey&untilTime=abc\" 400\n\ncheckGET \"/events.json?accessKey=$accessKey&startTime=2004-12-13T21:39:45.618Z&untilTime=2004-12-15T21:39:45.618Z\" 200\n\n# -----\n# batch request\n# ----\n\n# normal request\ntestdata='[{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n  \"targetEntityType\" : \"item\",\n  \"targetEntityId\" : \"iid\",\n  \"properties\" : {\n    \"someProperty\" : \"value1\",\n    \"anotherProperty\" : \"value2\"\n  },\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}]'\n\ncheckPOST \"/batch/events.json?accessKey=$accessKey\" \"$testdata\" 200\n\n# request with a malformed event (2nd event)\n# the response code is successful but the error for individual event is reflected in the response's body.\ntestdata='[{\n  \"event\" : \"my_event_1\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}, {\n  \"eve\" : \"my_event_2\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n  \"eventTime\" : \"2015-12-13T21:39:45.618Z\"\n}]'\n\ncheckPOST \"/batch/events.json?accessKey=$accessKey\" \"$testdata\" 200\n\n# request with too many events (more than 50)\ntestdata=`cat data/very_long_batch_request.txt`\ncheckPOST \"/batch/events.json?accessKey=$accessKey\" \"$testdata\" 400\n"
  },
  {
    "path": "data/test2.sh",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# simple test script for dataapi\naccessKey=$1\n\ncurl -i -X POST \"http://localhost:7070/events.json?accessKey=$accessKey\" \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$delete\",\n  \"entityType\" : \"pio_user\",\n  \"entityId\" : \"123\"\n}'\n\n\ncurl -i -X POST \"http://localhost:7070/events.json?accessKey=$accessKey\" \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$delete\",\n  \"entityType\" : \"pio_item\",\n  \"entityId\" : \"174\"\n}'\n\n\ncurl -i -X POST \"http://localhost:7070/events.json?accessKey=$accessKey\" \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"pio_item\",\n  \"entityId\" : \"174\",\n  \"properties\" : {\n    \"piox_a\" : 1\n  }\n}'\n\n\ncurl -i -X POST \"http://localhost:7070/events.json?accessKey=$accessKey\" \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : null,\n  \"targetEntityId\" : null,\n  \"properties\" : {\n    \"prop1\" : 1,\n    \"prop2\" : null,\n  }\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\n\n\ncurl -i -X POST \"http://localhost:7070/events.json?accessKey=$accessKey\" \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"my_entity_type\",\n  \"entityId\" : \"my_entity_id\",\n  \"targetEntityType\" : null,\n  \"targetEntityId\" : null,\n  \"properties\" : null,\n  \"eventTime\" : null\n}'\n\n\n## prId\ncurl -i -X POST \"http://localhost:7070/events.json?accessKey=$accessKey\" \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"some_event\",\n  \"entityType\" : \"pio_user\",\n  \"entityId\" : \"123\",\n  \"prId\" : \"AbcdefXXFFdsf1\"\n}'\n"
  },
  {
    "path": "data/test3.sh",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\ncurl -i -X POST http://localhost:7070/events.json?accessKey=testingkeyasdfasdf \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"my_event1\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n  \"eventTime\" : \"2004-12-13T21:39:45.618-07:00\"\n}'\n\ncurl -i -X POST http://localhost:7070/events.json?accessKey=yT8WHQMkQLBPxGdcGWstu6Z12XaNjANu7py98Ysve2NHwGNp825bkCt2G3LPU6aK \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"my_event1\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n  \"eventTime\" : \"2004-12-13T21:39:45.618-07:00\"\n}'\n\n\ncurl -i -X POST http://localhost:7070/events.json \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"my_event2\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n  \"eventTime\" : \"2004-12-14T21:39:45.618-07:00\"\n}'\n\n\ncurl -i -X POST http://localhost:7070/events.json \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"my_event3\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n  \"eventTime\" : \"2004-12-11T21:39:45.618-07:00\"\n}'\n\ncurl -i -X POST http://localhost:7070/events.json \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"my_event4\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid2\",\n  \"eventTime\" : \"2004-12-11T22:39:45.618-07:00\"\n}'\n\ncurl -i -X POST http://localhost:7070/events.json \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"my_event5\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid2\",\n  \"eventTime\" : \"2004-12-14T21:39:45.618-07:00\"\n}'\n\ncurl -i -X POST http://localhost:7070/events.json \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"my_event1\",\n  \"entityType\" : \"item\",\n  \"entityId\" : \"uid\",\n  \"eventTime\" : \"2004-12-13T21:39:45.618-07:00\"\n}'\n"
  },
  {
    "path": "data/very_long_batch_request.txt",
    "content": "[{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n},{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n}]"
  },
  {
    "path": "doap.rdf",
    "content": "<?xml version=\"1.0\"?>\n<?xml-stylesheet type=\"text/xsl\"?>\n<rdf:RDF xml:lang=\"en\"\n         xmlns=\"http://usefulinc.com/ns/doap#\"\n         xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n         xmlns:asfext=\"http://projects.apache.org/ns/asfext#\"\n         xmlns:foaf=\"http://xmlns.com/foaf/0.1/\">\n<!--\n    Licensed to the Apache Software Foundation (ASF) under one or more\n    contributor license agreements.  See the NOTICE file distributed with\n    this work for additional information regarding copyright ownership.\n    The ASF licenses this file to You under the Apache License, Version 2.0\n    (the \"License\"); you may not use this file except in compliance with\n    the License.  You may obtain a copy of the License at\n\n         http://www.apache.org/licenses/LICENSE-2.0\n\n    Unless required by applicable law or agreed to in writing, software\n    distributed under the License is distributed on an \"AS IS\" BASIS,\n    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n    See the License for the specific language governing permissions and\n    limitations under the License.\n-->\n  <Project rdf:about=\"http://predictionio.apache.org/\">\n    <created>2016-05-26</created>\n    <license rdf:resource=\"http://spdx.org/licenses/Apache-2.0\" />\n    <name>Apache PredictionIO</name>\n    <homepage rdf:resource=\"http://predictionio.apache.org/\" />\n    <asfext:pmc rdf:resource=\"http://predictionio.apache.org\" />\n    <shortdesc>PredictionIO is an open source Machine Learning Server built on top of state-of-the-art open source stack, that enables developers to manage and deploy production-ready predictive services for various kinds of machine learning tasks.</shortdesc>\n    <description>PredictionIO is an open source Machine Learning Server built on top of state-of-the-art open source stack, that enables developers to manage and deploy production-ready predictive services for various kinds of machine learning tasks.</description>\n    <bug-database rdf:resource=\"https://issues.apache.org/jira/browse/PIO\" />\n    <mailing-list rdf:resource=\"http://predictionio.apache.org/support/\" />\n    <programming-language>Scala</programming-language>\n    <category rdf:resource=\"http://projects.apache.org/category/big-data\" />\n    <repository>\n      <GitRepository>\n        <location rdf:resource=\"https://gitbox.apache.org/repos/asf/predictionio.git\"/>\n        <browse rdf:resource=\"https://gitbox.apache.org/repos/asf/predictionio.git\"/>\n      </GitRepository>\n    </repository>\n    <maintainer>\n      <foaf:Person>\n        <foaf:name>Donald Szeto</foaf:name>\n          <foaf:mbox rdf:resource=\"mailto:donald@apache.org\"/>\n      </foaf:Person>\n    </maintainer>\n  </Project>\n</rdf:RDF>\n"
  },
  {
    "path": "docker/.ivy2/.keep",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n"
  },
  {
    "path": "docker/JUPYTER.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nJupyter With PredictionIO\n=========================\n\n## Overview\n\nUsing Jupyter based docker, you can use Jupyter Notebook with PredictionIO environment.\nIt helps you with your exploratory data analysis (EDA).\n\n## Run Jupyter Notebook\n\nFirst of all, start Jupyter container with PredictionIO environment:\n\n```\ndocker-compose -f docker-compose.jupyter.yml \\\n  -f pgsql/docker-compose.base.yml \\\n  -f pgsql/docker-compose.meta.yml \\\n  -f pgsql/docker-compose.event.yml \\\n  -f pgsql/docker-compose.model.yml \\\n  up\n```\n\nOpen `http://127.0.0.1:8888/` and then open a new terminal in Jupyter from `New` pulldown button.\n\n## Getting Started With Scala Based Template\n\n### Download Template\n\nClone a template using Git:\n\n```\ncd templates/\ngit clone https://github.com/apache/predictionio-template-recommender.git\ncd predictionio-template-recommender/\n```\n\nReplace a name with `MyApp1`.\n\n```\nsed -i \"s/INVALID_APP_NAME/MyApp1/\" engine.json\n```\n\n### Register New Application\n\nUsing pio command, register a new application as `MyApp1`.\n\n```\npio app new MyApp1\n```\n\nThis command prints an access key as below.\n\n```\n[INFO] [Pio$] Access Key: bbe8xRHN1j3Sa8WeAT8TSxt5op3lUqhvXmKY1gLRjg70K-DUhHIJJ0-UzgKumxGm\n```\n\nSet it to an environment variable `ACCESS_KEY`.\n\n```\nACCESS_KEY=bbe8xRHN1j3Sa8WeAT8TSxt5op3lUqhvXmKY1gLRjg70K-DUhHIJJ0-UzgKumxGm\n```\n\n### Import Training Data\n\nDownload trainging data and import them to PredictionIO Event server.\n\n```\ncurl https://raw.githubusercontent.com/apache/spark/master/data/mllib/sample_movielens_data.txt --create-dirs -o data/sample_movielens_data.txt\npython data/import_eventserver.py --access_key $ACCESS_KEY\n```\n\n### Build Template\n\nBuild your template by the following command:\n\n```\npio build --verbose\n```\n\n### Create Model\n\nTo create a model, run:\n\n```\npio train\n```\n\n## Getting Started With Python Based Template\n\n### Download Template\n\nClone a template using Git:\n\n```\ncd templates/\ngit clone https://github.com/jpioug/predictionio-template-iris.git\npredictionio-template-iris/\n```\n\n### Register New Application\n\nUsing pio command, register a new application as `IrisApp`.\n\n```\npio app new --access-key IRIS_TOKEN IrisApp\n```\n\n### Import Training Data\n\nDownload trainging data and import them to PredictionIO Event server.\n\n```\npython data/import_eventserver.py\n```\n\n### Build Template\n\nBuild your template by the following command:\n\n```\npio build --verbose\n```\n\n### EDA\n\nTo do data analysis, open `templates/predictionio-template-iris/eda.ipynb` on Jupyter.\n\n### Create Model\n\nYou need to clear the following environment variables in the terminal before executing `pio train`.\n\n```\nunset PYSPARK_PYTHON\nunset PYSPARK_DRIVER_PYTHON\nunset PYSPARK_DRIVER_PYTHON_OPTS\n```\n\nTo create a model, run:\n\n```\npio train --main-py-file train.py\n```\n\n\n"
  },
  {
    "path": "docker/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nApache PredictionIO Docker\n==========================\n\n## Overview\n\nPredictionIO Docker provides Docker image for use in development and production environment.\n\n\n## Usage\n\n### Run PredictionIO with Selectable docker-compose Files\n\nYou can choose storages for event/meta/model to select docker-compose.yml.\n\n```\ndocker-compose -f docker-compose.yml -f ... up\n```\n\nSupported storages are as below:\n\n| Type  | Storage                          |\n|:-----:|:---------------------------------|\n| Event | Postgresql, MySQL, Elasticsearch |\n| Meta  | Postgresql, MySQL, Elasticsearch |\n| Model | Postgresql, MySQL, LocalFS       |\n\nIf you run PredictionIO with Postgresql, run as below:\n\n```\ndocker-compose -f docker-compose.yml \\\n  -f pgsql/docker-compose.base.yml \\\n  -f pgsql/docker-compose.meta.yml \\\n  -f pgsql/docker-compose.event.yml \\\n  -f pgsql/docker-compose.model.yml \\\n  up\n```\n\nTo use localfs as model storage, change as below:\n\n```\ndocker-compose -f docker-compose.yml \\\n  -f pgsql/docker-compose.base.yml \\\n  -f pgsql/docker-compose.meta.yml \\\n  -f pgsql/docker-compose.event.yml \\\n  -f localfs/docker-compose.model.yml \\\n  up\n```\n\n## Tutorial\n\nIn this demo, we will show you how to build a recommendation template.\n\n### Run PredictionIO environment\n\nThe following command starts PredictionIO with an event server.\nPredictionIO docker image mounts ./templates directory to /templates.\n\n```\n$ docker-compose -f docker-compose.yml \\\n    -f pgsql/docker-compose.base.yml \\\n    -f pgsql/docker-compose.meta.yml \\\n    -f pgsql/docker-compose.event.yml \\\n    -f pgsql/docker-compose.model.yml \\\n    up\n```\n\nWe provide `pio-docker` command as an utility for `pio` command.\n`pio-docker` invokes `pio` command in PredictionIO container.\n\n```\n$ export PATH=`pwd`/bin:$PATH\n$ pio-docker status\n...\n[INFO] [Management$] Your system is all ready to go.\n```\n\n### Download Recommendation Template\n\nThis demo uses [predictionio-template-recommender](https://github.com/apache/predictionio-template-recommender).\n\n```\n$ cd templates\n$ git clone https://github.com/apache/predictionio-template-recommender.git MyRecommendation\n$ cd MyRecommendation\n```\n\n### Register Application\n\nYou need to register this application to PredictionIO:\n\n```\n$ pio-docker app new MyApp1\n[INFO] [App$] Initialized Event Store for this app ID: 1.\n[INFO] [Pio$] Created a new app:\n[INFO] [Pio$]       Name: MyApp1\n[INFO] [Pio$]         ID: 1\n[INFO] [Pio$] Access Key: i-zc4EleEM577EJhx3CzQhZZ0NnjBKKdSbp3MiR5JDb2zdTKKzH9nF6KLqjlMnvl\n```\n\nSince an access key is required in subsequent steps, set it to ACCESS_KEY.\n\n```\n$ ACCESS_KEY=i-zc4EleEM577EJhx3CzQhZZ0NnjBKKdSbp3MiR5JDb2zdTKKzH9nF6KLqjlMnvl\n```\n\n`engine.json` contains an application name, so replace `INVALID_APP_NAME` with `MyApp1`.\n\n```\n...\n\"datasource\": {\n  \"params\" : {\n    \"appName\": \"MyApp1\"\n  }\n},\n...\n```\n\n### Import Data\n\nTo import training data to Event server for PredictionIO, this template provides an import tool.\nThe tool depends on PredictionIO Python SDK and install as below:\n\n```\n$ pip install predictionio\n```\nand then import data:\n```\n$ curl https://raw.githubusercontent.com/apache/spark/master/data/mllib/sample_movielens_data.txt --create-dirs -o data/sample_movielens_data.txt\n$ python data/import_eventserver.py --access_key $ACCESS_KEY\n```\n\n### Build Template\n\nThis is Scala based template.\nSo, you need to build this template by `pio` command.\n\n```\n$ pio-docker build --verbose\n```\n\n### Train and Create Model\n\nTo train a recommendation model, run `train` sub-command:\n\n```\n$ pio-docker train\n```\n\n### Deploy Model\n\nIf a recommendation model is created successfully, deploy it to Prediction server for PredictionIO.\n\n```\n$ pio-docker deploy\n\n```\nYou can check predictions as below:\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{ \"user\": \"1\", \"num\": 4 }' http://localhost:8000/queries.json\n```\n\n## Advanced Topics\n\n### Run with Elasticsearch\n\nFor Elasticsearch, Meta and Event storage are available.\nTo start PredictionIO with Elasticsearch,\n\n```\ndocker-compose -f docker-compose.yml \\\n  -f elasticsearch/docker-compose.base.yml \\\n  -f elasticsearch/docker-compose.meta.yml \\\n  -f elasticsearch/docker-compose.event.yml \\\n  -f localfs/docker-compose.model.yml \\\n  up\n```\n\n### Run with Spark Cluster\n\nAdding `docker-compose.spark.yml`, you can use Spark cluster on `pio train`.\n\n```\ndocker-compose -f docker-compose.yml \\\n  -f docker-compose.spark.yml \\\n  -f elasticsearch/docker-compose.base.yml \\\n  -f elasticsearch/docker-compose.meta.yml \\\n  -f elasticsearch/docker-compose.event.yml \\\n  -f localfs/docker-compose.model.yml \\\n  up\n```\n\nTo submit a training task to Spark Cluster, run `pio-deploy train` with `--master` option:\n\n```\npio-docker train -- --master spark://spark-master:7077\n```\n\nSee `docker-compose.spark.yml` if changing settings for Spark Cluster.\n\n### Run Engine Server\n\nTo deploy your engine and start an engine server, run Docker with `docker-compose.deploy.yml`.\n\n```\ndocker-compose -f docker-compose.yml \\\n  -f pgsql/docker-compose.base.yml \\\n  -f pgsql/docker-compose.meta.yml \\\n  -f pgsql/docker-compose.event.yml \\\n  -f pgsql/docker-compose.model.yml \\\n  -f docker-compose.deploy.yml \\\n  up\n```\n\nSee `deploy/run.sh` and `docker-compose.deploy.yml` if changing a deployment.\n\n### Run with Jupyter\n\nYou can launch PredictionIO with Jupyter.\n\n```\ndocker-compose -f docker-compose.jupyter.yml \\\n  -f pgsql/docker-compose.base.yml \\\n  -f pgsql/docker-compose.meta.yml \\\n  -f pgsql/docker-compose.event.yml \\\n  -f pgsql/docker-compose.model.yml \\\n  up\n```\n\nFor more information, see [JUPYTER.md](./JUPYTER.md).\n\n## Development\n\n### Build Base Docker Image\n\n```\ndocker build -t predictionio/pio pio\n```\n\n### Build Jupyter Docker Image\n\n```\ndocker build -t predictionio/pio-jupyter jupyter\n```\n\n### Push Docker Image\n\n```\ndocker push predictionio/pio:latest\ndocker tag predictionio/pio:latest predictionio/pio:$PIO_VERSION\ndocker push predictionio/pio:$PIO_VERSION\n```\n"
  },
  {
    "path": "docker/bin/pio-docker",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nBASE_WORK_DIR=/templates\nCURRENT_DIR=`pwd`\n\nget_container_id() {\n  if [ x\"$PIO_CONTAINER_ID\" != \"x\" ] ; then\n    echo $PIO_CONTAINER_ID\n    return\n  fi\n  for i in `docker ps -f \"name=pio\" -q` ; do\n    echo $i\n    return\n  done\n}\n\nget_current_dir() {\n  if [ x\"$PIO_CURRENT_DIR\" != \"x\" ] ; then\n    echo $PIO_CURRENT_DIR\n    return\n  fi\n  D=`echo $CURRENT_DIR | sed -e \"s,.*$BASE_WORK_DIR,$BASE_WORK_DIR,\"`\n  if [[ $D = $BASE_WORK_DIR* ]] ; then\n    echo $D\n  else\n    echo $BASE_WORK_DIR\n  fi\n}\n\ncid=`get_container_id`\nif [ x\"$cid\" = \"x\" ] ; then\n  echo \"Docker Container is not found.\"\n  exit 1\nfi\n\nwdir=`get_current_dir`\n\ndocker exec -w $wdir -it $cid pio $@\n\n"
  },
  {
    "path": "docker/charts/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nHelm Charts for Apache PredictionIO\n============================\n\n## Overview\n\nHelm Charts are packages of pre-configured Kubernetes resources.\nUsing charts, you can install and manage PredictionIO in the Kubernetes.\n\n## Usage\n\n### Install PredictionIO with PostgreSQL\n\nTo install PostgreSQL and PredictionIO, run `helm install` command:\n\n```\nhelm install --name my-postgresql stable/postgresql -f postgresql.yaml\nhelm install --name my-pio ./predictionio -f predictionio_postgresql.yaml\n```\n\n`postgresql.yaml` and `predictionio_postgresql.yaml` are configuration files for charts.\nTo access Jupyter for PredictionIO, run `kubectl port-forward` and then open `http://localhost:8888/`.\n\n```\nexport POD_NAME=$(kubectl get pods --namespace default -l \"app.kubernetes.io/name=predictionio,app.kubernetes.io/instance=my-pio\" -o jsonpath=\"{.items[0].metadata.name}\")\nkubectl port-forward $POD_NAME 8888:8888\n```\n\n\n### Install Spark Cluster\n\nTo install Spark cluster, run the following command:\n\n```\nhelm install --name my-spark ./spark\n```\n\nTo train a model, run `pio train` as below:\n\n```\npio train -- --master spark://my-spark-master:7077\n```\n\n"
  },
  {
    "path": "docker/charts/postgresql.yaml",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\npostgresqlUsername: pio\npostgresqlPassword: pio\npostgresqlDatabase: pio\n\n# for testing\npersistence:\n  enabled: false\n"
  },
  {
    "path": "docker/charts/predictionio/.helmignore",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# Patterns to ignore when building packages.\n# This supports shell glob matching, relative path matching, and\n# negation (prefixed with !). Only one pattern per line.\n.DS_Store\n# Common VCS dirs\n.git/\n.gitignore\n.bzr/\n.bzrignore\n.hg/\n.hgignore\n.svn/\n# Common backup files\n*.swp\n*.bak\n*.tmp\n*~\n# Various IDEs\n.project\n.idea/\n*.tmproj\n"
  },
  {
    "path": "docker/charts/predictionio/Chart.yaml",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\nname: predictionio\nversion: 0.1.0\nappVersion: 0.13.0\ndescription: Machine learning server\nhome: http://predictionio.apache.org\nicon: http://predictionio.apache.org/images/logos/logo-ee2b9bb3.png\nsources:\n  - https://github.com/apache/predictionio\nmaintainers:\n  - name: Shinsuke Sugaya\n    email: shinsuke@apache.org\n"
  },
  {
    "path": "docker/charts/predictionio/templates/NOTES.txt",
    "content": "{{/*\n   * Licensed to the Apache Software Foundation (ASF) under one or more\n   * contributor license agreements.  See the NOTICE file distributed with\n   * this work for additional information regarding copyright ownership.\n   * The ASF licenses this file to You under the Apache License, Version 2.0\n   * (the \"License\"); you may not use this file except in compliance with\n   * the License.  You may obtain a copy of the License at\n   *\n   *    http://www.apache.org/licenses/LICENSE-2.0\n   *\n   * Unless required by applicable law or agreed to in writing, software\n   * distributed under the License is distributed on an \"AS IS\" BASIS,\n   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   * See the License for the specific language governing permissions and\n   * limitations under the License.\n   */}}\n1. Get the application URL by running these commands:\n{{- if contains \"NodePort\" .Values.pio.service.type }}\n  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath=\"{.spec.ports[0].nodePort}\" services {{ include \"predictionio.fullname\" . }})\n  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath=\"{.items[0].status.addresses[0].address}\")\n  echo http://$NODE_IP:$NODE_PORT\n{{- else if contains \"LoadBalancer\" .Values.pio.service.type }}\n     NOTE: It may take a few minutes for the LoadBalancer IP to be available.\n           You can watch the status of by running 'kubectl get svc -w {{ include \"predictionio.fullname\" . }}'\n  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include \"predictionio.fullname\" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}')\n  echo http://$SERVICE_IP:{{ .Values.pio.service.port }}\n{{- else if contains \"ClusterIP\" .Values.pio.service.type }}\n  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l \"app.kubernetes.io/name={{ include \"predictionio.name\" . }},app.kubernetes.io/instance={{ .Release.Name }}\" -o jsonpath=\"{.items[0].metadata.name}\")\n  echo \"Visit http://127.0.0.1:8888 to use your application\"\n  kubectl port-forward $POD_NAME 8888:8888\n{{- end }}\n"
  },
  {
    "path": "docker/charts/predictionio/templates/_helpers.tpl",
    "content": "{{/*\n   * Licensed to the Apache Software Foundation (ASF) under one or more\n   * contributor license agreements.  See the NOTICE file distributed with\n   * this work for additional information regarding copyright ownership.\n   * The ASF licenses this file to You under the Apache License, Version 2.0\n   * (the \"License\"); you may not use this file except in compliance with\n   * the License.  You may obtain a copy of the License at\n   *\n   *    http://www.apache.org/licenses/LICENSE-2.0\n   *\n   * Unless required by applicable law or agreed to in writing, software\n   * distributed under the License is distributed on an \"AS IS\" BASIS,\n   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   * See the License for the specific language governing permissions and\n   * limitations under the License.\n   */}}\n{{- define \"predictionio.name\" -}}\n{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix \"-\" -}}\n{{- end -}}\n\n{{- define \"predictionio.fullname\" -}}\n{{- if .Values.fullnameOverride -}}\n{{- .Values.fullnameOverride | trunc 63 | trimSuffix \"-\" -}}\n{{- else -}}\n{{- $name := default .Chart.Name .Values.nameOverride -}}\n{{- if contains $name .Release.Name -}}\n{{- .Release.Name | trunc 63 | trimSuffix \"-\" -}}\n{{- else -}}\n{{- printf \"%s-%s\" .Release.Name $name | trunc 63 | trimSuffix \"-\" -}}\n{{- end -}}\n{{- end -}}\n{{- end -}}\n\n{{- define \"predictionio.chart\" -}}\n{{- printf \"%s-%s\" .Chart.Name .Chart.Version | replace \"+\" \"_\" | trunc 63 | trimSuffix \"-\" -}}\n{{- end -}}\n"
  },
  {
    "path": "docker/charts/predictionio/templates/pio-deployment.yaml",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\napiVersion: apps/v1beta2\nkind: Deployment\nmetadata:\n  name: {{ include \"predictionio.fullname\" . }}\n  labels:\n    app.kubernetes.io/name: {{ include \"predictionio.name\" . }}\n    helm.sh/chart: {{ include \"predictionio.chart\" . }}\n    app.kubernetes.io/instance: {{ .Release.Name }}\n    app.kubernetes.io/managed-by: {{ .Release.Service }}\nspec:\n  replicas: {{ .Values.pio.replicas }}\n  selector:\n    matchLabels:\n      app.kubernetes.io/name: {{ include \"predictionio.name\" . }}\n      app.kubernetes.io/instance: {{ .Release.Name }}\n  template:\n    metadata:\n      labels:\n        app.kubernetes.io/name: {{ include \"predictionio.name\" . }}\n        app.kubernetes.io/instance: {{ .Release.Name }}\n    spec:\n      containers:\n        - name: {{ .Chart.Name }}\n          image: \"{{ .Values.pio.image.repository }}:{{ .Values.pio.image.tag }}\"\n          imagePullPolicy: {{ .Values.pio.image.pullPolicy }}\n          env:\n{{ toYaml .Values.pio.env | indent 12 }}\n          ports:\n            - name: event\n              containerPort: 7070\n              protocol: TCP\n            - name: predict\n              containerPort: 8000\n              protocol: TCP\n            - name: jupyter\n              containerPort: 8888\n              protocol: TCP\n          livenessProbe:\n            httpGet:\n              path: /\n              port: 7070\n          readinessProbe:\n            httpGet:\n              path: /\n              port: 7070\n          resources:\n{{ toYaml .Values.pio.resources | indent 12 }}\n    {{- with .Values.pio.nodeSelector }}\n      nodeSelector:\n{{ toYaml . | indent 8 }}\n    {{- end }}\n    {{- with .Values.pio.affinity }}\n      affinity:\n{{ toYaml . | indent 8 }}\n    {{- end }}\n    {{- with .Values.pio.tolerations }}\n      tolerations:\n{{ toYaml . | indent 8 }}\n    {{- end }}\n"
  },
  {
    "path": "docker/charts/predictionio/templates/pio-service.yaml",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\napiVersion: v1\nkind: Service\nmetadata:\n  name: {{ include \"predictionio.fullname\" . }}\n  labels:\n    app.kubernetes.io/name: {{ include \"predictionio.name\" . }}\n    helm.sh/chart: {{ include \"predictionio.chart\" . }}\n    app.kubernetes.io/instance: {{ .Release.Name }}\n    app.kubernetes.io/managed-by: {{ .Release.Service }}\nspec:\n  type: {{ .Values.pio.service.type }}\n  ports:\n    - port: {{ .Values.pio.service.port }}\n      targetPort: 8888\n      protocol: TCP\n      name: jupyter\n  selector:\n    app.kubernetes.io/name: {{ include \"predictionio.name\" . }}\n    app.kubernetes.io/instance: {{ .Release.Name }}\n"
  },
  {
    "path": "docker/charts/predictionio/values.yaml",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\npio:\n  replicas: 1\n  image:\n    repository: predictionio/pio-jupyter\n    tag: latest\n    pullPolicy: IfNotPresent\n  service:\n    type: ClusterIP\n    port: 8888\n  env:\n    - name: PIO_STORAGE_SOURCES_PGSQL_TYPE\n      value: jdbc\n    - name: PIO_STORAGE_SOURCES_PGSQL_URL\n      value: \"jdbc:postgresql://postgresql/pio\"\n    - name: PIO_STORAGE_SOURCES_PGSQL_USERNAME\n      value: pio\n    - name: PIO_STORAGE_SOURCES_PGSQL_PASSWORD\n      value: pio\n    - name: PIO_STORAGE_REPOSITORIES_MODELDATA_NAME\n      value: pio_model\n    - name: PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE\n      value: PGSQL\n    - name: PIO_STORAGE_REPOSITORIES_METADATA_NAME\n      value: pio_meta\n    - name: PIO_STORAGE_REPOSITORIES_METADATA_SOURCE\n      value: PGSQL\n    - name: PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME\n      value: pio_event\n    - name: PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE\n      value: PGSQL\n    - name: PYSPARK_DRIVER_PYTHON_OPTS\n      value: \"notebook --NotebookApp.token=''\"\n  resources: {}\n  nodeSelector: {}\n  tolerations: []\n  affinity: {}\n\n"
  },
  {
    "path": "docker/charts/predictionio_postgresql.yaml",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\npio:\n  env:\n    - name: PIO_STORAGE_SOURCES_PGSQL_TYPE\n      value: jdbc\n    - name: PIO_STORAGE_SOURCES_PGSQL_URL\n      value: \"jdbc:postgresql://my-postgresql-postgresql:5432/pio\"\n    - name: PIO_STORAGE_SOURCES_PGSQL_USERNAME\n      value: pio\n    - name: PIO_STORAGE_SOURCES_PGSQL_PASSWORD\n      value: pio\n    - name: PIO_STORAGE_REPOSITORIES_MODELDATA_NAME\n      value: pio_model\n    - name: PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE\n      value: PGSQL\n    - name: PIO_STORAGE_REPOSITORIES_METADATA_NAME\n      value: pio_meta\n    - name: PIO_STORAGE_REPOSITORIES_METADATA_SOURCE\n      value: PGSQL\n    - name: PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME\n      value: pio_event\n    - name: PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE\n      value: PGSQL\n    - name: PYSPARK_DRIVER_PYTHON_OPTS\n      value: \"notebook --NotebookApp.token=''\"\n\n"
  },
  {
    "path": "docker/charts/spark/.helmignore",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# Patterns to ignore when building packages.\n# This supports shell glob matching, relative path matching, and\n# negation (prefixed with !). Only one pattern per line.\n.DS_Store\n# Common VCS dirs\n.git/\n.gitignore\n.bzr/\n.bzrignore\n.hg/\n.hgignore\n.svn/\n# Common backup files\n*.swp\n*.bak\n*.tmp\n*~\n# Various IDEs\n.project\n.idea/\n*.tmproj\n"
  },
  {
    "path": "docker/charts/spark/Chart.yaml",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\nname: spark\nversion: 0.3.0\nappVersion: 2.3.2\ndescription: Fast and general-purpose cluster computing system.\nhome: http://spark.apache.org\nicon: http://spark.apache.org/images/spark-logo-trademark.png\nsources:\n  - https://github.com/kubernetes/kubernetes/tree/master/examples/spark\n  - https://github.com/apache/spark\nmaintainers:\n  - name: lachie83\n    email: lachlan.evenson@gmail.com\n  - name: Shinsuke Sugaya\n    email: shinsuke@apache.org\n"
  },
  {
    "path": "docker/charts/spark/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n# Apache Spark Helm Chart\n\nApache Spark is a fast and general-purpose cluster computing system.\n\n* http://spark.apache.org/\n\nThis chart is based on stable/spark in [Helm Charts](https://github.com/helm/charts).\n\n## Chart Details\nThis chart will do the following:\n\n* 1 x Spark Master with port 8080 exposed on an external LoadBalancer\n* 3 x Spark Workers with HorizontalPodAutoscaler to scale to max 10 pods when CPU hits 50% of 100m\n* All using Kubernetes Deployments\n\n## Prerequisites\n\n* Assumes that serviceAccount tokens are available under hostname metadata. (Works on GKE by default) URL -- http://metadata/computeMetadata/v1/instance/service-accounts/default/token\n\n## Installing the Chart\n\nTo install the chart with the release name `my-release`:\n\n```bash\n$ helm install --name my-release stable/spark\n```\n\n## Configuration\n\nThe following table lists the configurable parameters of the Spark chart and their default values.\n\n### Spark Master\n\n| Parameter               | Description                        | Default                                                    |\n| ----------------------- | ---------------------------------- | ---------------------------------------------------------- |\n| `Master.Name`           | Spark master name                  | `spark-master`                                             |\n| `Master.Image`          | Container image name               | `bde2020/spark-master`                                     |\n| `Master.ImageTag`       | Container image tag                | `2.2.2-hadoop2.7`                                          |\n| `Master.Replicas`       | k8s deployment replicas            | `1`                                                        |\n| `Master.Component`      | k8s selector key                   | `spark-master`                                             |\n| `Master.Cpu`            | container requested cpu            | `100m`                                                     |\n| `Master.Memory`         | container requested memory         | `512Mi`                                                    |\n| `Master.ServicePort`    | k8s service port                   | `7077`                                                     |\n| `Master.ContainerPort`  | Container listening port           | `7077`                                                     |\n| `Master.DaemonMemory`   | Master JVM Xms and Xmx option      | `1g`                                                       |\n| `Master.ServiceType `   | Kubernetes Service type            | `LoadBalancer`                                             |\n\n### Spark WebUi\n\n|       Parameter       |           Description            |                         Default                          |\n|-----------------------|----------------------------------|----------------------------------------------------------|\n| `WebUi.Name`          | Spark webui name                 | `spark-webui`                                            |\n| `WebUi.ServicePort`   | k8s service port                 | `8080`                                                   |\n| `WebUi.ContainerPort` | Container listening port         | `8080`                                                   |\n\n### Spark Worker\n\n| Parameter                    | Description                          | Default                                                    |\n| -----------------------      | ------------------------------------ | ---------------------------------------------------------- |\n| `Worker.Name`                | Spark worker name                    | `spark-worker`                                             |\n| `Worker.Image`               | Container image name                 | `bde2020/spark-worker`                                     |\n| `Worker.ImageTag`            | Container image tag                  | `2.2.2-hadoop2.7`                                          |\n| `Worker.Replicas`            | k8s hpa and deployment replicas      | `3`                                                        |\n| `Worker.ReplicasMax`         | k8s hpa max replicas                 | `10`                                                       |\n| `Worker.Component`           | k8s selector key                     | `spark-worker`                                             |\n| `Worker.Cpu`                 | container requested cpu              | `100m`                                                     |\n| `Worker.Memory`              | container requested memory           | `512Mi`                                                    |\n| `Worker.ContainerPort`       | Container listening port             | `7077`                                                     |\n| `Worker.CpuTargetPercentage` | k8s hpa cpu targetPercentage         | `50`                                                       |\n| `Worker.DaemonMemory`        | Worker JVM Xms and Xmx setting       | `1g`                                                       |\n| `Worker.ExecutorMemory`      | Worker memory available for executor | `1g`                                                       |\n| `Worker.Autoscaling`         | Enable horizontal pod autoscaling    | `false`                                                    |\n\n\nSpecify each parameter using the `--set key=value[,key=value]` argument to `helm install`.\n\nAlternatively, a YAML file that specifies the values for the parameters can be provided while installing the chart. For example,\n\n```bash\n$ helm install --name my-release -f values.yaml stable/spark\n```\n\n> **Tip**: You can use the default [values.yaml](values.yaml)\n"
  },
  {
    "path": "docker/charts/spark/templates/NOTES.txt",
    "content": "{{/*\n   * Licensed to the Apache Software Foundation (ASF) under one or more\n   * contributor license agreements.  See the NOTICE file distributed with\n   * this work for additional information regarding copyright ownership.\n   * The ASF licenses this file to You under the Apache License, Version 2.0\n   * (the \"License\"); you may not use this file except in compliance with\n   * the License.  You may obtain a copy of the License at\n   *\n   *    http://www.apache.org/licenses/LICENSE-2.0\n   *\n   * Unless required by applicable law or agreed to in writing, software\n   * distributed under the License is distributed on an \"AS IS\" BASIS,\n   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   * See the License for the specific language governing permissions and\n   * limitations under the License.\n   */}}\n1. Get the Spark URL to visit by running these commands in the same shell:\n\n  NOTE: It may take a few minutes for the LoadBalancer IP to be available.\n  You can watch the status of by running 'kubectl get svc --namespace {{ .Release.Namespace }} -w {{ template \"webui-fullname\" . }}'\n\n  export SPARK_SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template \"webui-fullname\" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}')\n  echo http://$SPARK_SERVICE_IP:{{ .Values.WebUi.ServicePort }}\n\n"
  },
  {
    "path": "docker/charts/spark/templates/_helpers.tpl",
    "content": "{{/*\n   * Licensed to the Apache Software Foundation (ASF) under one or more\n   * contributor license agreements.  See the NOTICE file distributed with\n   * this work for additional information regarding copyright ownership.\n   * The ASF licenses this file to You under the Apache License, Version 2.0\n   * (the \"License\"); you may not use this file except in compliance with\n   * the License.  You may obtain a copy of the License at\n   *\n   *    http://www.apache.org/licenses/LICENSE-2.0\n   *\n   * Unless required by applicable law or agreed to in writing, software\n   * distributed under the License is distributed on an \"AS IS\" BASIS,\n   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   * See the License for the specific language governing permissions and\n   * limitations under the License.\n   */}}\n{{/* vim: set filetype=mustache: */}}\n{{/*\nExpand the name of the chart.\n*/}}\n{{- define \"name\" -}}\n{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix \"-\" -}}\n{{- end -}}\n\n{{/*\nCreate fully qualified names.\nWe truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).\n*/}}\n{{- define \"master-fullname\" -}}\n{{- $name := default .Chart.Name .Values.Master.Name -}}\n{{- printf \"%s-%s\" .Release.Name $name | trunc 63 | trimSuffix \"-\" -}}\n{{- end -}}\n\n{{- define \"webui-fullname\" -}}\n{{- $name := default .Chart.Name .Values.WebUi.Name -}}\n{{- printf \"%s-%s\" .Release.Name $name | trunc 63 | trimSuffix \"-\" -}}\n{{- end -}}\n\n{{- define \"worker-fullname\" -}}\n{{- $name := default .Chart.Name .Values.Worker.Name -}}\n{{- printf \"%s-%s\" .Release.Name $name | trunc 63 | trimSuffix \"-\" -}}\n{{- end -}}\n\n"
  },
  {
    "path": "docker/charts/spark/templates/spark-master-deployment.yaml",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\napiVersion: v1\nkind: Service\nmetadata:\n  name: {{ template \"master-fullname\" . }}\n  labels:\n    heritage: {{ .Release.Service | quote }}\n    release: {{ .Release.Name | quote }}\n    chart: \"{{ .Chart.Name }}-{{ .Chart.Version }}\"\n    component: \"{{ .Release.Name }}-{{ .Values.Master.Component }}\"\nspec:\n  ports:\n    - port: {{ .Values.Master.ServicePort }}\n      targetPort: {{ .Values.Master.ContainerPort }}\n  selector:\n    component: \"{{ .Release.Name }}-{{ .Values.Master.Component }}\"\n  type: {{ .Values.Master.ServiceType }}\n---\napiVersion: v1\nkind: Service\nmetadata:\n  name: {{ template \"webui-fullname\" . }}\n  labels:\n    heritage: {{ .Release.Service | quote }}\n    release: {{ .Release.Name | quote }}\n    chart: \"{{ .Chart.Name }}-{{ .Chart.Version }}\"\n    component: \"{{ .Release.Name }}-{{ .Values.Master.Component }}\"\nspec:\n  ports:\n    - port: {{ .Values.WebUi.ServicePort }}\n      targetPort: {{ .Values.WebUi.ContainerPort }}\n  selector:\n    component: \"{{ .Release.Name }}-{{ .Values.Master.Component }}\"\n  type: {{ .Values.WebUi.ServiceType }}\n---\napiVersion: extensions/v1beta1\nkind: Deployment\nmetadata:\n  name: {{ template \"master-fullname\" . }}\n  labels:\n    heritage: {{ .Release.Service | quote }}\n    release: {{ .Release.Name | quote }}\n    chart: \"{{ .Chart.Name }}-{{ .Chart.Version }}\"\n    component: \"{{ .Release.Name }}-{{ .Values.Master.Component }}\"\nspec:\n  replicas: {{ default 1 .Values.Master.Replicas }}\n  strategy:\n    type: RollingUpdate\n  selector:\n    matchLabels:\n      component: \"{{ .Release.Name }}-{{ .Values.Master.Component }}\"\n  template:\n    metadata:\n      labels:\n        heritage: {{ .Release.Service | quote }}\n        release: {{ .Release.Name | quote }}\n        chart: \"{{ .Chart.Name }}-{{ .Chart.Version }}\"\n        component: \"{{ .Release.Name }}-{{ .Values.Master.Component }}\"\n    spec:\n      containers:\n        - name: {{ template \"master-fullname\" . }}\n          image: \"{{ .Values.Master.Image }}:{{ .Values.Master.ImageTag }}\"\n          command: [\"/bin/sh\",\"-c\"]\n          args: [\"echo $(hostname -i) {{ template \"master-fullname\" . }} >> /etc/hosts; {{ .Values.Spark.Path }}/bin/spark-class org.apache.spark.deploy.master.Master\"]\n          ports:\n            - containerPort: {{ .Values.Master.ContainerPort }}\n            - containerPort: {{ .Values.WebUi.ContainerPort }}\n          resources:\n            requests:\n              cpu: \"{{ .Values.Master.Cpu }}\"\n              memory: \"{{ .Values.Master.Memory }}\"\n          env:\n          - name: SPARK_DAEMON_MEMORY\n            value: {{ default \"1g\" .Values.Master.DaemonMemory | quote }}\n          - name: SPARK_MASTER_HOST\n            value: {{ template \"master-fullname\" . }}\n          - name: SPARK_MASTER_PORT\n            value: {{ .Values.Master.ServicePort | quote }}\n          - name: SPARK_MASTER_WEBUI_PORT\n            value: {{ .Values.WebUi.ContainerPort | quote }}\n"
  },
  {
    "path": "docker/charts/spark/templates/spark-sql-test.yaml",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\napiVersion: v1\nkind: Pod\nmetadata:\n  name: \"{{ .Release.Name }}-sql-test-{{ randAlphaNum 5 | lower }}\"\n  annotations:\n    \"helm.sh/hook\": test-success\nspec:\n  containers:\n    - name: {{ .Release.Name }}-sql-test\n      image: {{ .Values.Master.Image }}:{{ .Values.Master.ImageTag }}\n      command: [\"{{ .Values.Spark.Path }}/bin/spark-sql\", \"--master\",\n                \"spark://{{ .Release.Name }}-master:{{ .Values.Master.ServicePort }}\", \"-e\",\n                \"show databases;\"]\n  restartPolicy: Never\n"
  },
  {
    "path": "docker/charts/spark/templates/spark-worker-deployment.yaml",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\napiVersion: extensions/v1beta1\nkind: Deployment\nmetadata:\n  name: {{ template \"worker-fullname\" . }}\n  labels:\n    heritage: {{ .Release.Service | quote }}\n    release: {{ .Release.Name | quote }}\n    chart: \"{{ .Chart.Name }}-{{ .Chart.Version }}\"\n    component: \"{{ .Release.Name }}-{{ .Values.Worker.Component }}\"\nspec:\n  replicas: {{ default 1 .Values.Worker.Replicas }}\n  strategy:\n    type: RollingUpdate\n  selector:\n    matchLabels:\n      component: \"{{ .Release.Name }}-{{ .Values.Worker.Component }}\"\n  template:\n    metadata:\n      labels:\n        heritage: {{ .Release.Service | quote }}\n        release: {{ .Release.Name | quote }}\n        chart: \"{{ .Chart.Name }}-{{ .Chart.Version }}\"\n        component: \"{{ .Release.Name }}-{{ .Values.Worker.Component }}\"\n    spec:\n      containers:\n        - name: {{ template \"worker-fullname\" . }}\n          image: \"{{ .Values.Worker.Image }}:{{ .Values.Worker.ImageTag }}\"\n          command: [\"{{ .Values.Spark.Path }}/bin/spark-class\", \"org.apache.spark.deploy.worker.Worker\", \"spark://{{ template \"master-fullname\" . }}:{{ .Values.Master.ServicePort }}\"]\n          ports:\n            - containerPort: {{ .Values.Worker.ContainerPort }}\n          resources:\n            requests:\n              cpu: \"{{ .Values.Worker.Cpu }}\"\n              memory: \"{{ .Values.Worker.Memory }}\"\n          env:\n          - name: SPARK_DAEMON_MEMORY\n            value: {{ default \"1g\" .Values.Worker.DaemonMemory | quote }}\n          - name: SPARK_WORKER_MEMORY\n            value: {{ default \"1g\" .Values.Worker.ExecutorMemory | quote }}\n          - name: SPARK_WORKER_WEBUI_PORT\n            value: {{ .Values.WebUi.ContainerPort | quote }}\n"
  },
  {
    "path": "docker/charts/spark/templates/spark-worker-hpa.yaml",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n{{- if .Values.Worker.Autoscaling.Enabled }}\napiVersion: autoscaling/v2beta1\nkind: HorizontalPodAutoscaler\nmetadata:\n  labels:\n    heritage: {{ .Release.Service | quote }}\n    release: {{ .Release.Name | quote }}\n    chart: \"{{ .Chart.Name }}-{{ .Chart.Version }}\"\n    component: \"{{ .Release.Name }}-{{ .Values.Worker.Component }}\"\n  name: {{ template \"worker-fullname\" . }}\nspec:\n  scaleTargetRef:\n    apiVersion: apps/v1beta1\n    kind: Deployment\n    name: {{ template \"worker-fullname\" . }}\n  minReplicas: {{ .Values.Worker.Replicas }}\n  maxReplicas: {{ .Values.Worker.ReplicasMax }}\n  metrics:\n  - type: Resource\n    resource:\n      name: cpu\n      targetAverageUtilization: {{ .Values.Worker.CpuTargetPercentage }}\n{{- end }}\n"
  },
  {
    "path": "docker/charts/spark/values.yaml",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# Default values for spark.\n# This is a YAML-formatted file.\n# Declare name/value pairs to be passed into your templates.\n# name: value\n\nSpark:\n  Path: \"/spark\"\n\nMaster:\n  Name: master\n  Image: \"bde2020/spark-master\"\n  ImageTag: \"2.2.2-hadoop2.7\"\n  Replicas: 1\n  Component: \"spark-master\"\n  Cpu: \"100m\"\n  Memory: \"512Mi\"\n  ServicePort: 7077\n  ContainerPort: 7077\n  # Set Master JVM memory. Default 1g\n  # DaemonMemory: 1g\n  ServiceType: LoadBalancer\n\nWebUi:\n  Name: webui\n  ServicePort: 8080\n  ContainerPort: 8080\n  ServiceType: LoadBalancer\n\nWorker:\n  Name: worker\n  Image: \"bde2020/spark-worker\"\n  ImageTag: \"2.2.2-hadoop2.7\"\n  Replicas: 3\n  Component: \"spark-worker\"\n  Cpu: \"100m\"\n  Memory: \"512Mi\"\n  ContainerPort: 8081\n  # Set Worker JVM memory. Default 1g\n  # DaemonMemory: 1g\n  # Set how much total memory workers have to give executors\n  # ExecutorMemory: 1g\n  Autoscaling:\n    Enabled: false\n  ReplicasMax: 10\n  CpuTargetPercentage: 50\n\n"
  },
  {
    "path": "docker/deploy/run.sh",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\ncd /templates/$PIO_TEMPLATE_NAME\npio deploy\n"
  },
  {
    "path": "docker/docker-compose.deploy.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  pio:\n    environment:\n      - \"PIO_TEMPLATE_NAME=MyRecommendation\"\n      - \"PIO_RUN_FILE=/deploy/run.sh\"\n    volumes:\n      - ./deploy:/deploy\n"
  },
  {
    "path": "docker/docker-compose.jupyter.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  pio:\n    image: predictionio/pio-jupyter:latest\n    ports:\n      - 7070:7070\n      - 8000:8000\n      - 8888:8888\n    volumes:\n      - ./templates:/home/jovyan/templates\n      - ./.ivy2:/home/jovyan/.ivy2\n    environment:\n      - CHOWN_HOME=yes\n      - GRANT_SUDO=yes\n      - VOLUME_UID=yes\n      - \"PYSPARK_DRIVER_PYTHON_OPTS=notebook --NotebookApp.token=''\"\n    dns: 8.8.8.8\n"
  },
  {
    "path": "docker/docker-compose.spark.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  spark-master:\n    image: bde2020/spark-master:2.2.2-hadoop2.7\n    container_name: spark-master\n    ports:\n      - \"8080:8080\"\n      - \"7077:7077\"\n    environment:\n      - INIT_DAEMON_STEP=setup_spark\n  spark-worker-1:\n    image: bde2020/spark-worker:2.2.2-hadoop2.7\n    container_name: spark-worker-1\n    depends_on:\n      - spark-master\n    ports:\n      - \"8081:8081\"\n    environment:\n      - \"SPARK_MASTER=spark://spark-master:7077\"\n"
  },
  {
    "path": "docker/docker-compose.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  pio:\n    image: predictionio/pio:latest\n    ports:\n      - 7070:7070\n      - 8000:8000\n    volumes:\n      - ./templates:/templates\n    dns: 8.8.8.8\n"
  },
  {
    "path": "docker/elasticsearch/docker-compose.base.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  elasticsearch:\n    image: docker.elastic.co/elasticsearch/elasticsearch:5.6.4\n    environment:\n      - xpack.graph.enabled=false\n      - xpack.ml.enabled=false\n      - xpack.monitoring.enabled=false\n      - xpack.security.enabled=false\n      - xpack.watcher.enabled=false\n      - cluster.name=predictionio\n      - bootstrap.memory_lock=true\n      - \"ES_JAVA_OPTS=-Xms1g -Xmx1g\"\n    ulimits:\n      memlock:\n        soft: -1\n        hard: -1\n  pio:\n    depends_on:\n      - elasticsearch\n    environment:\n      PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE: elasticsearch\n      PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS: elasticsearch\n      PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS: 9200\n      PIO_STORAGE_SOURCES_ELASTICSEARCH_SCHEMES: http\n\n"
  },
  {
    "path": "docker/elasticsearch/docker-compose.event.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  pio:\n    environment:\n      PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME: pio_event\n      PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE: ELASTICSEARCH\n\n"
  },
  {
    "path": "docker/elasticsearch/docker-compose.meta.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  pio:\n    environment:\n      PIO_STORAGE_REPOSITORIES_METADATA_NAME: pio_meta\n      PIO_STORAGE_REPOSITORIES_METADATA_SOURCE: ELASTICSEARCH\n\n"
  },
  {
    "path": "docker/jupyter/Dockerfile",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nFROM predictionio/pio:latest\n\nENV DEBIAN_FRONTEND noninteractive\n\nRUN apt-get update \\\n    && apt install -y build-essential curl git gcc make openssl libssl-dev libbz2-dev \\\n    apt-transport-https ca-certificates g++ gnupg graphviz lsb-release openssh-client zip \\\n    libreadline-dev libsqlite3-dev cmake libxml2-dev wget bzip2 sudo vim unzip locales \\\n    && apt-get clean \\\n    && rm -rf /var/lib/apt/lists/*\n\nRUN echo \"en_US.UTF-8 UTF-8\" > /etc/locale.gen && locale-gen\n\nENV LC_ALL=en_US.UTF-8 \\\n    LANG=en_US.UTF-8 \\\n    LANGUAGE=en_US.UTF-8 \\\n    NB_USER=jovyan \\\n    NB_UID=1000 \\\n    NB_GID=100 \\\n    CONDA_DIR=/opt/conda \\\n    PIP_DEFAULT_TIMEOUT=180\nENV PATH=$CONDA_DIR/bin:$PATH \\\n    HOME=/home/$NB_USER\n\nADD fix-permissions /usr/local/bin/fix-permissions\nRUN chmod +x /usr/local/bin/fix-permissions \\\n    && groupadd wheel -g 11 \\\n    && echo \"auth required pam_wheel.so use_uid\" >> /etc/pam.d/su \\\n    && useradd -m -s /bin/bash -N -u $NB_UID $NB_USER \\\n    && mkdir -p $CONDA_DIR \\\n    && chmod g+w /etc/passwd \\\n    && fix-permissions $HOME \\\n    && fix-permissions $CONDA_DIR\n\nUSER $NB_USER\n\nENV MINICONDA_VERSION 4.4.10\nRUN wget -q https://repo.continuum.io/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh -O /tmp/miniconda.sh \\\n    && echo 'bec6203dbb2f53011e974e9bf4d46e93 */tmp/miniconda.sh' | md5sum -c - \\\n    && bash /tmp/miniconda.sh -f -b -p $CONDA_DIR \\\n    && rm /tmp/miniconda.sh \\\n    && conda config --system --prepend channels conda-forge \\\n    && conda config --system --set auto_update_conda false \\\n    && conda config --system --set show_channel_urls true \\\n    && conda install --quiet --yes conda=\"${MINICONDA_VERSION%.*}.*\" \\\n    && conda update --all --quiet --yes \\\n    && conda clean -tipsy \\\n    && rm -rf /home/$NB_USER/.cache/yarn \\\n    && fix-permissions $CONDA_DIR \\\n    && fix-permissions /home/$NB_USER\n\nRUN conda install --quiet --yes 'tini=0.18.0' \\\n    && conda list tini | grep tini | tr -s ' ' | cut -d ' ' -f 1,2 >> $CONDA_DIR/conda-meta/pinned \\\n    && conda clean -tipsy \\\n    && fix-permissions $CONDA_DIR \\\n    && fix-permissions /home/$NB_USER\n\nRUN conda install --quiet --yes 'notebook=5.6.*' 'jupyterlab=0.34.*' nodejs\\\n    && jupyter labextension install @jupyterlab/hub-extension@^0.11.0 \\\n    && jupyter notebook --generate-config \\\n    && conda clean -tipsy \\\n    && npm cache clean --force \\\n    && rm -rf $CONDA_DIR/share/jupyter/lab/staging \\\n    && rm -rf /home/$NB_USER/.cache/yarn \\\n    && fix-permissions $CONDA_DIR \\\n    && fix-permissions /home/$NB_USER\n\nADD requirements.txt /tmp/requirements.txt\nRUN pip --no-cache-dir install -r /tmp/requirements.txt \\\n    && fix-permissions $CONDA_DIR \\\n    && fix-permissions /home/$NB_USER\n\nCOPY jupyter_notebook_config.py /home/$NB_USER/.jupyter/\nCOPY start*.sh /usr/local/bin/\n\nUSER root\nRUN chmod +x /usr/local/bin/*.sh\n\nEXPOSE 8888\nWORKDIR $HOME\nENTRYPOINT [\"tini\", \"--\"]\nCMD [\"/usr/local/bin/start-jupyter.sh\"]\n\n"
  },
  {
    "path": "docker/jupyter/fix-permissions",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nset -e\n\nfor d in $@; do\n  find \"$d\" \\\n    ! \\( \\\n      -group $NB_GID \\\n      -a -perm -g+rwX  \\\n    \\) \\\n    -exec chgrp $NB_GID {} \\; \\\n    -exec chmod g+rwX {} \\;\n  find \"$d\" \\\n    \\( \\\n      -type d \\\n      -a ! -perm -6000  \\\n    \\) \\\n    -exec chmod +6000 {} \\;\ndone\n"
  },
  {
    "path": "docker/jupyter/jupyter_notebook_config.py",
    "content": "# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n\nc = get_config()\nc.NotebookApp.ip = '*'\nc.NotebookApp.port = 8888\nc.NotebookApp.open_browser = False\n\n"
  },
  {
    "path": "docker/jupyter/requirements.txt",
    "content": "cython\ngoogle-cloud\nh5py\nipywidgets\njupyter_contrib_nbextensions\nkeras\nmatplotlib\npandas\npandas-gbq\npredictionio\nsklearn\ntensor2tensor\ntensorflow\nwidgetsnbextension\n"
  },
  {
    "path": "docker/jupyter/start-jupyter.sh",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nset -e\n\n# store PIO environment to pio-env.sh\nPIO_ENV_FILE=/etc/predictionio/pio-env.sh\nenv | grep ^PIO_ >> $PIO_ENV_FILE\nif [ $(grep _MYSQL_ $PIO_ENV_FILE | wc -l) = 0 ] ; then\n  sed -i \"s/^MYSQL/#MYSQL/\" $PIO_ENV_FILE\nfi\n\n# start event server\nsh /usr/bin/pio_run &\n\nexport PYSPARK_PYTHON=$CONDA_DIR/bin/python\nif [ x\"$PYSPARK_DRIVER_PYTHON\" = \"x\" ] ; then\n  export PYSPARK_DRIVER_PYTHON=$CONDA_DIR/bin/jupyter\nfi\nif [ x\"$PYSPARK_DRIVER_PYTHON_OPTS\" = \"x\" ] ; then\n  export PYSPARK_DRIVER_PYTHON_OPTS=notebook\nfi\n\n. /usr/local/bin/start.sh $PIO_HOME/bin/pio-shell --with-pyspark\n\n"
  },
  {
    "path": "docker/jupyter/start.sh",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nset -e\n\nif [[ \"$VOLUME_UID\" == \"1\" || \"$VOLUME_UID\" == 'yes' ]]; then\n  DIR_UID=`ls -lnd /home/jovyan/templates | awk '{print $3}'`\n  if [ x\"$DIR_UID\" != \"x\" -a x\"$DIR_UID\" != \"x0\" ] ; then\n    NB_UID=$DIR_UID\n  fi\nfi\n\nif [ $(id -u) == 0 ] ; then\n  if id jovyan &> /dev/null ; then\n    echo \"Set username to $NB_USER\"\n    usermod -d /home/$NB_USER -l $NB_USER jovyan\n  fi\n\n  if [[ \"$CHOWN_HOME\" == \"1\" || \"$CHOWN_HOME\" == 'yes' ]]; then\n    echo \"Change ownership of /home/$NB_USER to $NB_UID\"\n    chown -R $NB_UID /home/$NB_USER\n  fi\n  if [ ! -z \"$CHOWN_EXTRA\" ]; then\n    for extra_dir in $(echo $CHOWN_EXTRA | tr ',' ' '); do\n      chown -R $NB_UID $extra_dir\n    done\n  fi\n\n  if [[ \"$NB_USER\" != \"jovyan\" ]]; then\n    if [[ ! -e \"/home/$NB_USER\" ]]; then\n      echo \"Move home dir to /home/$NB_USER\"\n      mv /home/jovyan \"/home/$NB_USER\"\n    fi\n    if [[ \"$PWD/\" == \"/home/jovyan/\"* ]]; then\n      newcwd=\"/home/$NB_USER/${PWD:13}\"\n      echo \"Set CWD to $newcwd\"\n      cd \"$newcwd\"\n    fi\n  fi\n\n  if [ \"$NB_UID\" != $(id -u $NB_USER) ] ; then\n    echo \"Set $NB_USER to uid:$NB_UID\"\n    usermod -u $NB_UID $NB_USER\n  fi\n\n  if [ \"$NB_GID\" != $(id -g $NB_USER) ] ; then\n    echo \"Add $NB_USER to gid:$NB_GID\"\n    groupadd -g $NB_GID -o ${NB_GROUP:-${NB_USER}}\n    usermod -g $NB_GID -a -G $NB_GID,100 $NB_USER\n  fi\n\n  if [[ \"$GRANT_SUDO\" == \"1\" || \"$GRANT_SUDO\" == 'yes' ]]; then\n    echo \"Set sudo access to $NB_USER\"\n    echo \"$NB_USER ALL=(ALL) NOPASSWD:ALL\" > /etc/sudoers.d/notebook\n  fi\n\n  echo \"Execute command as $NB_USER\"\n  exec su $NB_USER -c \"env PATH=$PATH $*\"\n\nelse\n  echo \"Execute command\"\n  exec $*\n\nfi\n\n"
  },
  {
    "path": "docker/localfs/docker-compose.model.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  pio:\n    environment:\n      PIO_STORAGE_REPOSITORIES_MODELDATA_NAME: pio_model\n      PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE: LOCALFS\n      PIO_FS_BASEDIR: /work/pio_store\n      PIO_FS_ENGINESDIR: /work/pio_store/engines\n      PIO_FS_TMPDIR: /work/pio_store/tmp\n      PIO_STORAGE_SOURCES_LOCALFS_TYPE: localfs\n      PIO_STORAGE_SOURCES_LOCALFS_PATH: /work/pio_store/models\n\n"
  },
  {
    "path": "docker/mysql/docker-compose.base.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  mysql:\n    image: mysql:8\n    command: mysqld --character-set-server=utf8mb4 --collation-server=utf8mb4_unicode_ci\n    environment:\n      MYSQL_ROOT_PASSWORD: root\n      MYSQL_USER: pio\n      MYSQL_PASSWORD: pio\n      MYSQL_DATABASE: pio\n  pio:\n    depends_on:\n      - mysql\n    environment:\n      PIO_STORAGE_SOURCES_MYSQL_TYPE: jdbc\n      PIO_STORAGE_SOURCES_MYSQL_URL: \"jdbc:mysql://mysql/pio\"\n      PIO_STORAGE_SOURCES_MYSQL_USERNAME: pio\n      PIO_STORAGE_SOURCES_MYSQL_PASSWORD: pio\n\n"
  },
  {
    "path": "docker/mysql/docker-compose.event.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  pio:\n    environment:\n      PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME: pio_event\n      PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE: MYSQL\n\n"
  },
  {
    "path": "docker/mysql/docker-compose.meta.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  pio:\n    environment:\n      PIO_STORAGE_REPOSITORIES_METADATA_NAME: pio_meta\n      PIO_STORAGE_REPOSITORIES_METADATA_SOURCE: MYSQL\n\n"
  },
  {
    "path": "docker/mysql/docker-compose.model.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  pio:\n    environment:\n      PIO_STORAGE_REPOSITORIES_MODELDATA_NAME: pio_model\n      PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE: MYSQL\n\n"
  },
  {
    "path": "docker/pgsql/docker-compose.base.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  postgres:\n    image: postgres:9\n    environment:\n      POSTGRES_USER: pio\n      POSTGRES_PASSWORD: pio\n      POSTGRES_INITDB_ARGS: --encoding=UTF8\n  pio:\n    depends_on:\n      - postgres\n    environment:\n      PIO_STORAGE_SOURCES_PGSQL_TYPE: jdbc\n      PIO_STORAGE_SOURCES_PGSQL_URL: \"jdbc:postgresql://postgres/pio\"\n      PIO_STORAGE_SOURCES_PGSQL_USERNAME: pio\n      PIO_STORAGE_SOURCES_PGSQL_PASSWORD: pio\n\n"
  },
  {
    "path": "docker/pgsql/docker-compose.event.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  pio:\n    environment:\n      PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME: pio_event\n      PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE: PGSQL\n\n"
  },
  {
    "path": "docker/pgsql/docker-compose.meta.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  pio:\n    environment:\n      PIO_STORAGE_REPOSITORIES_METADATA_NAME: pio_meta\n      PIO_STORAGE_REPOSITORIES_METADATA_SOURCE: PGSQL\n\n"
  },
  {
    "path": "docker/pgsql/docker-compose.model.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"3\"\nservices:\n  pio:\n    environment:\n      PIO_STORAGE_REPOSITORIES_MODELDATA_NAME: pio_model\n      PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE: PGSQL\n\n"
  },
  {
    "path": "docker/pio/Dockerfile",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nFROM openjdk:8\n\nARG PIO_GIT_URL=https://github.com/apache/predictionio.git\nARG PIO_TAG=v0.13.0\nENV SCALA_VERSION=2.11.12\nENV SPARK_VERSION=2.2.3\nENV HADOOP_VERSION=2.7.7\nENV ELASTICSEARCH_VERSION=5.5.3\nENV PGSQL_VERSION=42.2.4\nENV MYSQL_VERSION=8.0.12\nENV PIO_HOME=/usr/share/predictionio\n\nRUN apt-get update && \\\n    apt-get install -y dpkg-dev fakeroot && \\\n    apt-get clean && \\\n    rm -rf /var/lib/apt/lists/*\n\nWORKDIR /opt/src\nRUN git clone -b $PIO_TAG $PIO_GIT_URL\nWORKDIR /opt/src/predictionio\nRUN bash ./make-distribution.sh \\\n      -Dscala.version=$SCALA_VERSION \\\n      -Dspark.version=$SPARK_VERSION \\\n      -Dhadoop.version=$HADOOP_VERSION \\\n      -Delasticsearch.version=$ELASTICSEARCH_VERSION \\\n      --with-deb && \\\n    dpkg -i ./assembly/target/predictionio_*.deb && \\\n    cp -r ./python /usr/share/predictionio && \\\n    mkdir /var/log/predictionio && \\\n    rm -rf /opt/src/predictionio/*\n\n\nRUN cp /etc/predictionio/pio-env.sh /etc/predictionio/pio-env.sh.orig && \\\n    echo \"#!/usr/bin/env bash\" > /etc/predictionio/pio-env.sh\nRUN curl -o $PIO_HOME/lib/postgresql-$PGSQL_VERSION.jar \\\n    http://central.maven.org/maven2/org/postgresql/postgresql/$PGSQL_VERSION/postgresql-$PGSQL_VERSION.jar && \\\n    echo \"POSTGRES_JDBC_DRIVER=$PIO_HOME/lib/postgresql-$PGSQL_VERSION.jar\" >> /etc/predictionio/pio-env.sh && \\\n    echo \"MYSQL_JDBC_DRIVER=$PIO_HOME/lib/mysql-connector-java-$MYSQL_VERSION.jar\" >> /etc/predictionio/pio-env.sh\n\nWORKDIR /usr/share\nRUN curl -o /opt/src/spark-$SPARK_VERSION.tgz \\\n    http://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \\\n    tar zxvf /opt/src/spark-$SPARK_VERSION.tgz && \\\n    echo \"SPARK_HOME=\"`pwd`/`ls -d spark*` >> /etc/predictionio/pio-env.sh && \\\n    rm -rf /opt/src\n\nWORKDIR /templates\nADD pio_run /usr/bin/pio_run\n\nEXPOSE 7070\nEXPOSE 8000\n\nCMD [\"sh\", \"/usr/bin/pio_run\"]\n\n"
  },
  {
    "path": "docker/pio/pio_run",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n. /etc/predictionio/pio-env.sh\n\n# check elasticsearch status\nif [ x\"$PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE\" != \"x\" ] ; then\n  RET=-1\n  COUNT=0\n  ES_HOST=`echo $PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS | sed -e \"s/,.*//\"`\n  ES_PORT=`echo $PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS | sed -e \"s/,.*//\"`\n  # Wait for elasticsearch startup\n  while [ $RET != 0 -a $COUNT -lt 10 ] ; do\n    echo \"Waiting for ${ES_HOST}...\"\n    curl --connect-timeout 60 --retry 10 -s \"$ES_HOST:$ES_PORT/_cluster/health?wait_for_status=green&timeout=1m\"\n    RET=$?\n    COUNT=`expr $COUNT + 1`\n    sleep 1\n  done\nfi\n\n# check mysql jar file\nif [ x\"$PIO_STORAGE_SOURCES_MYSQL_TYPE\" != \"x\" ] ; then\n  MYSQL_JAR_FILE=$PIO_HOME/lib/mysql-connector-java-$MYSQL_VERSION.jar\n  if [ ! -f $MYSQL_JAR_FILE ] ; then\n    curl -o $MYSQL_JAR_FILE http://central.maven.org/maven2/mysql/mysql-connector-java/$MYSQL_VERSION/mysql-connector-java-$MYSQL_VERSION.jar\n  fi\nfi\n\n# Check PIO status\nRET=-1\nCOUNT=0\nwhile [ $RET != 0 -a $COUNT -lt 10 ] ; do\n  echo \"Waiting for PredictionIO...\"\n  $PIO_HOME/bin/pio status\n  RET=$?\n  COUNT=`expr $COUNT + 1`\n  sleep 1\ndone\n\n\nif [ x\"$PIO_RUN_FILE\" != \"x\" ] ; then\n  sh $PIO_RUN_FILE\nelse\n  # Start PIO Event Server\n  $PIO_HOME/bin/pio eventserver\nfi\n\n"
  },
  {
    "path": "docker/templates/.keep",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n"
  },
  {
    "path": "docs/javadoc/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nJava API Documentation\n======================\n\n1.  Run this command at the project's root.\n    ```\n    $ sbt/sbt unidoc\n    ```\n\n2.  Point your web browser at `target/javaunidoc/index.html`.\n"
  },
  {
    "path": "docs/javadoc/javadoc-overview.html",
    "content": "<!--\n  Licensed to the Apache Software Foundation (ASF) under one or more\n  contributor license agreements.  See the NOTICE file distributed with\n  this work for additional information regarding copyright ownership.\n  The ASF licenses this file to You under the Apache License, Version 2.0\n  (the \"License\"); you may not use this file except in compliance with\n  the License.  You may obtain a copy of the License at\n\n      http://www.apache.org/licenses/LICENSE-2.0\n\n  Unless required by applicable law or agreed to in writing, software\n  distributed under the License is distributed on an \"AS IS\" BASIS,\n  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n  See the License for the specific language governing permissions and\n  limitations under the License.\n-->\n<body>\n  <h1>\n    PredictionIO API Documentation\n  </h1>\n  <p>\n    If you are building a prediction engine, the most interesting package would\n    be <a\n    href=\"{@docRoot}/org.apache.predictionio/controller/java/package-summary.html\">org.apache.predictionio.controller.java</a>\n    and <a\n    href=\"{@docRoot}/org.apache.predictionio/data/store/java/package-summary.html\">org.apache.predictionio.data.store.java</a>\n    You may also want to look at <a\n    href=\"{@docRoot}/org.apache.predictionio/controller/package-summary.html\">org.apache.predictionio.controller</a>,\n    as some functionality, such as custom model persistence {@link\n    org.apache.predictionio.controller.PersistentModel}, are provided directly by that\n    package.\n  </p>\n</body>\n"
  },
  {
    "path": "docs/manual/.gitignore",
    "content": "# Bower\n/bower_components\n\n# Bundler\n/.bundle\n\n# Build Directory\n/build\n\n# Cache\n/.sass-cache\n/.cache\n\n# Sanity\n/sanity.html\n\n# OS Files\n.DS_Store\n.DS_Store?\n._*\n.Spotlight-V100\n.Trashes\n.AppleDouble\n.LSOverride\nIcon\nDesktop.ini\nIcon?\nehthumbs.db\nThumbs.db\n*~\n\n.project\n_site\n"
  },
  {
    "path": "docs/manual/Gemfile",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nsource 'https://rubygems.org'\n\ngem 'middleman', '~> 3.3.10'\ngem 'middleman-livereload', '~> 3.4.2'\ngem 'middleman-autoprefixer'\ngem 'middleman-minify-html'\ngem 'middleman-syntax'\ngem 'middleman-s3_sync'\ngem 'middleman-search_engine_sitemap'\n\ngem 'slim'\ngem 'therubyracer'\ngem 'oj'\ngem 'redcarpet', '>= 3.2.3'\ngem 'travis'\ngem 'nokogiri'\ngem 'rainbow'\ngem \"bootstrap-sass\", require: false\n\nplatforms :mswin, :mingw do\n  gem 'tzinfo-data'\n  gem 'wdm', '~> 0.1.0'\nend\n"
  },
  {
    "path": "docs/manual/Rakefile",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nrequire 'middleman'\nrequire 'nokogiri'\nrequire 'rainbow/ext/string'\nrequire 'uri'\nrequire 'net/http'\nrequire 'redcarpet'\nrequire File.join(File.dirname(__FILE__), 'lib', 'custom_renderer')\n\n\ntask :test do\n\nHTML = <<EOT\n<div class=\"tabs\">\n  <div data-tab=\"PHP\" data-lang=\"php\">\n```\nTest 0 <>\nTest 1 >\nTest 3 <\nTest 4 ><\nTest 5 =>\nTest 6 <=\nTest 7 <>\n<p><b>Test 8</b></p>\n```\n  </div>\n</div>\nEOT\n\nHTML2 = <<EOT\n<div class=\"tabs\">\n  <div data-tab=\"Ruby\" data-lang=\"ruby\">\n```ruby\nTest 0 <>\nTest 1 >\nTest 3 <\nTest 4 ><\nTest 5 =>\nTest 6 <=\nTest 7 <>\n<p><b>Test</b></p>\n\n# This is a ruby file.\nclass MyClass\n  def foo\n    'bar'\n  end\nend\n```\n  </div>\n  <div data-tab=\"Plain\">\nThis is a test of **markdown** inside a tab!\n\n```\n// This tab does not have the data-lang attribute set!\n$ cd path/to/your/file\n```\n  </div>\n  <div data-tab=\"HTML\" data-lang=\"html\">\n```html\n<p>Yes you can still use HTML in code blocks!</p>\n```\n  </div>\n</div>\nEOT\n\n  def block_html(raw_html)\n    done = raw_html.gsub(/(```.*?```)/m) do |match|\n      markdown = Redcarpet::Markdown.new(CustomRenderer, fenced_code_blocks: true)\n      markdown.render(match)\n    end\n\n    doc = Nokogiri::HTML::DocumentFragment.parse(done)\n    nodes = doc.css('div.tabs > div')\n\n    if nodes.empty?\n      raw_html\n    else\n      ul = Nokogiri::XML::Node.new('ul', doc)\n      ul['class'] = 'control'\n\n      nodes.each do |node|\n        title = node.attribute('data-tab').to_s\n        lang = node.attribute('data-lang').to_s\n\n        uuid = SecureRandom.uuid\n        id = \"tab-#{uuid}\"\n\n        li = Nokogiri::XML::Node.new('li', doc)\n        li['data-lang'] = lang\n        li.inner_html = %Q(<a href=\"##{id}\">#{title}</a>)\n\n        ul.add_child(li)\n\n        node['id'] = id\n      end\n\n      nodes.first.before(ul)\n\n      doc.to_html\n    end\n  end\n\n\n  puts 'start block'\n  puts block_html(HTML2)\n  puts 'end block'\nend\n\n\ndesc 'Check site for broken links'\ntask :check do\n\n  sets = []\n  cache = Sanity::Cache.new\n\n  Dir[\"build/**/*.html\"].each do |filename|\n    p = Sanity::Page.new(filename, cache)\n\n    sets << p.check_links\n  end\n\n  html = Sanity::Report::HTML.new(sets.map{ |s| s.to_html })\n  File.open('sanity.html', 'w') { |file| file.write(html) }\nend\n\n\nmodule Sanity\n  module Report\n\n    class HTML\n      include Padrino::Helpers\n\n      HEADER = <<EOT\n<!DOCTYPE html>\n<html lang=\"en\">\n  <head>\n    <title>Sanity Report</title>\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n    <link href=\"http://maxcdn.bootstrapcdn.com/bootstrap/3.3.1/css/bootstrap.min.css\" rel=\"stylesheet\">\n\n  </head>\n  <body>\n    <div class=\"jumbotron\">\n      <div class=\"container-fluid\">\n        <h1>Sanity Report</h1>\n      </div>\n    </div>\n    <div class=\"container-fluid\">\n\nEOT\n      FOOTER = <<EOT\n    </div>\n  </body>\n</html>\nEOT\n\n\n\n\n      def initialize(content)\n        if content.respond_to?(:to_html)\n          html = content.to_html\n        else\n          html = content\n        end\n\n        @content = content_tag(:div, html, id: 'content')\n      end\n\n      def to_html\n        \"#{HEADER}#{@content}#{FOOTER}\"\n      end\n\n      def to_s\n        to_html\n      end\n    end\n  end\n\n  class ResultSet\n    include Padrino::Helpers\n    attr_accessor :set\n    def initialize(path, set = [])\n      @path = path\n      @set = set\n    end\n\n    def push(item)\n      @set.push(item)\n    end\n\n    def to_html\n      content_tag(:h2, @path) <<\n      content_tag(:table, class: 'table table-striped') do\n        content_tag(:thead) do\n          content_tag(:tr) do\n            content_tag(:th, 'Type') <<\n            content_tag(:th, 'Status') <<\n            content_tag(:th, 'URI') <<\n            content_tag(:th, 'Message') <<\n            content_tag(:th, 'Path')\n          end\n        end <<\n        content_tag(:tbody) do\n          @set.map do |item|\n            item.to_html\n          end\n        end\n      end\n    end\n  end\n\n  class Result\n    include Padrino::Helpers\n    attr_accessor :type, :status, :path, :uri, :message, :cache, :backtrace\n\n    def initialize\n    end\n\n    def exception=(e)\n      @status = :exception\n      @message = e.message\n      @backtrace = e.backtrace\n    end\n\n    def to_s\n      \"#{@type} [#{@status}] #{@uri} #{@message} #{@path}\".color(terminal_color)\n    end\n\n    def to_html\n      content_tag(:tr, class: bootstrap_css_class) do\n        content_tag(:td, @type) <<\n        content_tag(:td, @status) <<\n        content_tag(:td, @uri) <<\n        content_tag(:td, @message) <<\n        content_tag(:td, @path)\n      end\n    end\n\n\n\n    private\n\n    def bootstrap_css_class\n      case @status\n      when :success\n        'success'\n      when :info\n        'info'\n      when :warning\n        'warning'\n      when :error\n        'danger'\n      when :exception\n        'danger'\n      else\n        raise ArgumentError, \"Status `#{@status}` is not a valid type\"\n      end\n    end\n\n    def terminal_color\n      case @status\n      when :success\n        :green\n      when :info\n       :blue\n      when :warning\n        :yellow\n      when :error\n        :red\n      when :exception\n        :red\n      else\n        raise ArgumentError, \"Status `#{@status}` is not a valid type\"\n      end\n    end\n  end\n\n  class Cache\n    def initialize(store = {})\n      @store = store\n    end\n\n    def read(uri)\n      @store[uri]\n    end\n\n    def write(uri, value)\n      @store[uri] = value\n    end\n\n    def fetch(uri)\n      if block_given?\n        if exists?(uri)\n          read(uri)\n        else\n          write(uri, yield)\n        end\n      else\n        read(uri)\n      end\n    end\n\n    def exists?(uri)\n      @store.has_key?(uri)\n    end\n  end\n\n  class Page\n    INDEX_FILE = 'index.html'\n\n    def initialize(filename, cache = Sanity::Cache.new)\n      @filename = filename\n      @cache = cache\n      f = File.open(@filename)\n      @doc = Nokogiri::HTML(f)\n\n      @build_path = File.join(Middleman::Application.root, 'build')\n      f.close\n    end\n\n    def check_links\n      rs = Sanity::ResultSet.new(@filename)\n      @doc.css('a').each do |link|\n        uri = link['href']\n        r = check_href(uri)\n        r.path = @filename\n        puts r\n        rs.push(r)\n      end\n      rs\n    end\n\n    def check_href(href)\n      # TODO: add trailing slash, relative url, and in page anchor links.\n      # TODO: Test for missing titles!\n      # TODO: Test for unneeded .html extension!\n      # TODO: Switch from Nokogir to raw ID checker\n      case href\n      when /\\A\\s*\\z/, nil\n        check_empty_href(href)\n      when /\\A(https?):\\/\\/.+\\z/\n        check_external_href(href)\n      when /\\A#.+\\z/\n        check_anchor_href(href)\n      when /\\A#\\z/\n        check_empty_anchor_href(href)\n      when /\\A\\/\\z/\n        check_root_href(href)\n      when /\\Amailto:.+\\z/\n        check_mailto_href(href)\n      else\n        check_internal_href(href)\n      end\n    end\n\n    def check_external_href(href)\n      r = Sanity::Result.new\n      r.uri = href\n      r.type = :external_uri\n      begin\n        response = @cache.fetch(href) do\n          r.cache = :miss\n          uri = URI(href)\n          Net::HTTP.get_response(uri)\n        end\n\n        case response\n        when Net::HTTPSuccess\n          r.status = :success\n        when Net::HTTPNotFound\n          r.status = :error\n        when Net::HTTPRedirection\n          location = response['location']\n          r.status = :info\n          r.message = \"Redirect: #{location}\"\n        else\n          r.status = :warning\n          r.message = \"Response: #{response.class}\"\n        end\n      rescue => e\n        r.exception = e\n      end\n      r\n    end\n\n    def check_anchor_href(href)\n      r = Sanity::Result.new\n      r.uri = href\n      r.type = :anchor\n      begin\n        result = @doc.css(href)\n\n        if result.count > 0\n          r.status = :success\n        else\n          r.status = :error\n        end\n\n      rescue => e\n        r.exception = e\n      end\n      r\n    end\n\n    def check_empty_anchor_href(href)\n\n      r = Sanity::Result.new\n      r.uri = href\n      r.type = :empty_anchor\n      r.status = :info\n      r\n    end\n\n    def check_root_href(href)\n      r = Sanity::Result.new\n      r.uri = href\n      r.type = :root_path\n\n      filename = File.join(@build_path, INDEX_FILE)\n      if File.exist?(filename)\n        r.status = :success\n      else\n        r.status = :error\n        r.message \"Not found: #{filename}\"\n      end\n\n      r\n    end\n\n    def check_mailto_href(href)\n      r = Sanity::Result.new\n      r.uri = href\n      r.type = :mailto\n\n      uri = URI.parse(href)\n      if uri.is_a?(URI::MailTo)\n        r.status = :success\n\n      else\n        r.status = :error\n      end\n\n      r\n    end\n\n    def check_empty_href(href)\n      r = Sanity::Result.new\n      r.uri = href\n      r.type = :empty_uri\n      r.status = :error\n      r\n    end\n\n    def check_internal_href(href)\n\n      r = Sanity::Result.new\n      r.uri = href\n      r.type = :internal_uri\n\n      filename = File.join(@build_path, href.gsub('/', File::SEPARATOR))\n      if File.directory?(filename)\n        filename = File.join(filename, INDEX_FILE)\n      end\n\n      if File.exist?(filename)\n\n        r.status = :success\n      else\n\n        r.status = :error\n      end\n\n      r\n    end\n  end\nend\n\n\n"
  },
  {
    "path": "docs/manual/bower.json",
    "content": "{\n  \"name\": \"predictionio.apache.org\",\n  \"description\": \"Apache PredictionIO Documentation\",\n  \"license\": \"Apache-2.0\",\n  \"homepage\": \"predictionio.apache.org\",\n  \"ignore\": [\n    \"**/.*\",\n    \"node_modules\",\n    \"bower_components\"\n  ],\n  \"dependencies\": {\n    \"jquery\": \"~2.1.1\",\n    \"normalize.css\": \"~3.0.2\",\n    \"Slidebars\": \"~0.10.2\",\n    \"Tabslet\": \"~1.4.8\",\n    \"jcarousel\": \"~0.3.3\"\n  }\n}\n"
  },
  {
    "path": "docs/manual/config.rb",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nrequire 'lib/custom_renderer'\nrequire 'lib/gallery_generator'\n\n# General Settings\nset :css_dir,       'stylesheets'\nset :js_dir,        'javascripts'\nset :images_dir,    'images'\nset :partials_dir,  'partials'\n\nactivate :directory_indexes\nactivate :syntax, line_numbers: true\nactivate :autoprefixer\n\n# Markdown\nset :markdown_engine, :redcarpet\nset :markdown,\n    renderer: ::CustomRenderer,\n    fenced_code_blocks: true,\n    no_intra_emphasis: true,\n    autolink: true,\n    strikethrough: true,\n    superscript: true,\n    highlight: true,\n    underline: true,\n    tables: true\n\n# Sprockets\nsprockets.append_path File.join root, 'bower_components'\n\n# Sitemap\nset :url_root, '//predictionio.apache.org'\nactivate :search_engine_sitemap, exclude_attr: 'hidden'\n\n# Development Settings\nconfigure :development do\n  set :scheme, 'http'\n  set :host, Middleman::PreviewServer.host rescue 'localhost'\n  set :port, Middleman::PreviewServer.port rescue 80\n  Slim::Engine.set_options pretty: false, sort_attrs: false\n  set :debug_assets, true\nend\n\n# Build Settings\nconfigure :build do\n  set :scheme, 'https'\n  set :host, 'predictionio.apache.org'\n  set :port, 80\n  Slim::Engine.set_options pretty: false, sort_attrs: false\n  activate :asset_hash\n  activate :minify_css\n  activate :minify_javascript\n  activate :minify_html do |html|\n    html.remove_multi_spaces        = true\n    html.remove_comments            = true\n    html.remove_intertag_spaces     = false\n    html.remove_quotes              = false\n    html.simple_doctype             = false\n    html.remove_script_attributes   = true\n    html.remove_style_attributes    = false\n    html.remove_link_attributes     = false\n    html.remove_form_attributes     = false\n    html.remove_input_attributes    = false\n    html.remove_javascript_protocol = true\n    html.remove_http_protocol       = false\n    html.remove_https_protocol      = false\n    html.preserve_line_breaks       = false\n    html.simple_boolean_attributes  = false\n  end\nend\n\n# Hacks\n\n# Engine Template Gallery generation\ncurrent_dir = File.dirname(__FILE__)\nyaml_file_path = \"#{current_dir}/source/gallery/templates.yaml\"\nout_file_path = \"#{current_dir}/source/gallery/template-gallery.html.md\"\nGallery.generate_md(yaml_file_path, out_file_path)\n\n# https://github.com/middleman/middleman/issues/612\nSlim::Engine.disable_option_validator!\n\n# https://github.com/Aupajo/middleman-search_engine_sitemap/issues/2\nset :file_watcher_ignore, [\n  /^bin(\\/|$)/,\n  /^\\.bundle(\\/|$)/,\n  # /^vendor(\\/|$)/, # Keep this commented out!\n  /^node_modules(\\/|$)/,\n  /^\\.sass-cache(\\/|$)/,\n  /^\\.cache(\\/|$)/,\n  /^\\.git(\\/|$)/,\n  /^\\.gitignore$/,\n  /\\.DS_Store/,\n  /^\\.rbenv-.*$/,\n  /^Gemfile$/,\n  /^Gemfile\\.lock$/,\n  /~$/,\n  /(^|\\/)\\.?#/,\n  /^tmp\\//\n]\n"
  },
  {
    "path": "docs/manual/data/nav/build.yml",
    "content": "root:\n  -\n    body: 'Samples'\n    url: '#'\n    children:\n      -\n        body: 'Typography'\n        url: '/samples/'\n      -\n        body: 'Sizing'\n        url: '/samples/sizing/'\n      -\n        body: 'Tabs'\n        url: '/samples/tabs/'\n      -\n        body: 'Languages'\n        url: '/samples/languages/'\n\n  -\n    body: 'Menu'\n    url: '/samples/level-1/'\n    children:\n      -\n        body: 'Level 2.1'\n        url: '/samples/level-2-1/'\n      -\n        body: 'Level 2.2'\n        url: '/samples/level-2-2/'\n        children:\n          -\n            body: 'Level 3.1 This Title Is Very Long To Test Line Wrap'\n            url: '/samples/level-3-1/'\n            children:\n              -\n                body: 'Level 4.1'\n                url: '/samples/level-4-1/'\n              -\n                body: 'Level 4.2'\n                url: '/samples/level-4-2/'\n              -\n                body: 'Level 4.3'\n                url: '/samples/level-4-3/'\n          -\n            body: 'Level 3.2'\n            url: '/samples/level-3-2/'\n"
  },
  {
    "path": "docs/manual/data/nav/main.yml",
    "content": "root:\n  -\n    body: 'Apache PredictionIO® Documentation'\n    url: '/'\n    children:\n      -\n        body: 'Welcome to Apache PredictionIO®'\n        url: '/'\n  -\n    body: 'Getting Started'\n    url: '#'\n    children:\n      -\n        body: 'A Quick Intro'\n        url: '/start/'\n      -\n        body: 'Installing Apache PredictionIO'\n        url: '/install/'\n      -\n        body: 'Downloading an Engine Template'\n        url: '/start/download/'\n      -\n        body: 'Deploying Your First Engine'\n        url: '/start/deploy/'\n      -\n        body: 'Customizing the Engine'\n        url: '/start/customize/'\n  -\n    body: 'Integrating with Your App'\n    url: '#'\n    children:\n      -\n        body: 'App Integration Overview'\n        url: '/appintegration/'\n      -\n        body: 'List of SDKs'\n        url: '/sdk/'\n        children:\n          -\n            body: 'Java & Android SDK'\n            url: '/sdk/java/'\n          -\n            body: 'PHP SDK'\n            url: '/sdk/php/'\n          -\n            body: 'Python SDK'\n            url: '/sdk/python/'\n          -\n            body: 'Ruby SDK'\n            url: '/sdk/ruby/'\n          -\n            body: 'Community Powered SDKs'\n            url: '/community/projects.html#sdks'\n  -\n    body: 'Deploying an Engine'\n    url: '#'\n    children:\n      -\n        body: 'Deploying as a Web Service'\n        url: '/deploy/'\n      -\n        body: 'Batch Predictions'\n        url: '/batchpredict/'\n      -\n        body: 'Monitoring Engine'\n        url: '/deploy/monitoring/'\n      -\n        body: 'Setting Engine Parameters'\n        url: '/deploy/engineparams/'\n      -\n        body: 'Deploying Multiple Engine Variants'\n        url: '/deploy/enginevariants/'\n      -\n        body: 'Engine Server Plugin'\n        url: '/deploy/plugin/'\n  -\n    body: 'Customizing an Engine'\n    url: '#'\n    children:\n      -\n        body: 'Learning DASE'\n        url: '/customize/'\n      -\n        body: 'Implement DASE'\n        url: '/customize/dase/'\n      -\n        body: 'Troubleshooting Engine Development'\n        url: '/customize/troubleshooting/'\n      -\n        body: 'Engine Scala APIs'\n        url: '/api/current/#package'\n  -\n    body: 'Collecting and Analyzing Data'\n    url: '#'\n    children:\n      -\n        body: 'Event Server Overview'\n        url: '/datacollection/'\n      -\n        body: 'Collecting Data with REST/SDKs'\n        url: '/datacollection/eventapi/'\n      -\n        body: 'Events Modeling'\n        url: '/datacollection/eventmodel/'\n      -\n        body: 'Unifying Multichannel Data with Webhooks'\n        url: '/datacollection/webhooks/'\n      -\n        body: 'Channel'\n        url: '/datacollection/channel/'\n      -\n        body: 'Importing Data in Batch'\n        url: '/datacollection/batchimport/'\n      -\n        body: 'Using Analytics Tools'\n        url: '/datacollection/analytics/'\n      -\n        body: 'Event Server Plugin'\n        url: '/datacollection/plugin/'\n  -\n    body: 'Choosing an Algorithm'\n    url: '#'\n    children:\n      -\n        body: 'Built-in Algorithm Libraries'\n        url: '/algorithm/'\n      -\n        body: 'Switching to Another Algorithm'\n        url: '/algorithm/switch/'\n      -\n        body: 'Combining Multiple Algorithms'\n        url: '/algorithm/multiple/'\n      -\n        body: 'Adding Your Own Algorithms'\n        url: '/algorithm/custom/'\n  -\n    body: 'Tuning and Evaluation'\n    url: '#'\n    children:\n      -\n        body: 'Overview'\n        url: '/evaluation/'\n      -\n        body: 'Hyperparameter Tuning'\n        url: '/evaluation/paramtuning/'\n      -\n        body: 'Evaluation Dashboard'\n        url: '/evaluation/evaluationdashboard/'\n      -\n        body: 'Choosing Evaluation Metrics'\n        url: '/evaluation/metricchoose/'\n      -\n        body: 'Building Evaluation Metrics'\n        url: '/evaluation/metricbuild/'\n  -\n    body: 'System Architecture'\n    url: '#'\n    children:\n      -\n        body: 'Architecture Overview'\n        url: '/system/'\n      -\n        body: 'Using Another Data Store'\n        url: '/system/anotherdatastore/'\n  -\n    body: 'PredictionIO® Official Templates'\n    url: '#'\n    children:\n      -\n        body: 'Intro'\n        url: '/templates/'\n      -\n        body: 'Recommendation'\n        children:\n          -\n            body: 'Quick Start'\n            url: '/templates/recommendation/quickstart/'\n          -\n            body: 'DASE'\n            url: '/templates/recommendation/dase/'\n          -\n            body: 'Evaluation Explained'\n            url: '/templates/recommendation/evaluation/'\n          -\n            body: 'How-To'\n            url: '/templates/recommendation/how-to/'\n          -\n            body: 'Read Custom Events'\n            url: '/templates/recommendation/reading-custom-events/'\n          -\n            body: 'Customize Data Preparator'\n            url: '/templates/recommendation/customize-data-prep/'\n          -\n            body: 'Customize Serving'\n            url: '/templates/recommendation/customize-serving/'\n          -\n            body: 'Train with Implicit Preference'\n            url: '/templates/recommendation/training-with-implicit-preference/'\n          -\n            body: 'Filter Recommended Items by Blacklist in Query'\n            url: '/templates/recommendation/blacklist-items/'\n          -\n            body: 'Batch Persistable Evaluator'\n            url: '/templates/recommendation/batch-evaluator/'\n      -\n        body: 'E-Commerce Recommendation'\n        children:\n          -\n            body: 'Quick Start'\n            url: '/templates/ecommercerecommendation/quickstart/'\n          -\n            body: 'DASE'\n            url: '/templates/ecommercerecommendation/dase/'\n          -\n            body: 'How-To'\n            url: '/templates/ecommercerecommendation/how-to/'\n          -\n            body: 'Train with Rate Event'\n            url: '/templates/ecommercerecommendation/train-with-rate-event/'\n          -\n            body: 'Adjust Score'\n            url: '/templates/ecommercerecommendation/adjust-score/'\n      -\n        body: 'Similar Product'\n        children:\n          -\n            body: 'Quick Start'\n            url: '/templates/similarproduct/quickstart/'\n          -\n            body: 'DASE'\n            url: '/templates/similarproduct/dase/'\n          -\n            body: 'How-To'\n            url: '/templates/similarproduct/how-to/'\n          -\n            body: 'Multiple Events and Multiple Algorithms'\n            url: '/templates/similarproduct/multi-events-multi-algos/'\n          -\n            body: 'Returns Item Properties'\n            url: '/templates/similarproduct/return-item-properties/'\n          -\n            body: 'Train with Rate Event'\n            url: '/templates/similarproduct/train-with-rate-event/'\n          -\n            body: 'Get Rid of Events for Users'\n            url: '/templates/similarproduct/rid-user-set-event/'\n          -\n            body: 'Recommend Users'\n            url: '/templates/similarproduct/recommended-user/'\n      -\n        body: 'Classification'\n        children:\n          -\n            body: 'Quick Start'\n            url: '/templates/classification/quickstart/'\n          -\n            body: 'DASE'\n            url: '/templates/classification/dase/'\n          -\n            body: 'How-To'\n            url: '/templates/classification/how-to/'\n          -\n            body: 'Use Alternative Algorithm'\n            url: '/templates/classification/add-algorithm/'\n          -\n            body: 'Read Custom Properties'\n            url: '/templates/classification/reading-custom-properties/'\n  -\n    body: 'Engine Template Gallery'\n    url: '#'\n    children:\n      -\n        body: 'Browse'\n        url: '/gallery/template-gallery/'\n      -\n        body: 'Submit your Engine as a Template'\n        url: '/community/submit-template/'\n  -\n    body: 'Demo Tutorials'\n    url: '#'\n    children:\n      -\n        body: 'Community Contributed Demo'\n        url: '/community/projects.html#demos'\n      -\n        body: 'Text Classification Engine Tutorial'\n        url: '/demo/textclassification/'\n  -\n    body: 'Getting Involved'\n    url: '/community/'\n    children:\n      -\n        body: 'Contribute Code'\n        url: '/community/contribute-code/'\n      -\n        body: 'Contribute Documentation'\n        url: '/community/contribute-documentation/'\n      -\n        body: 'Contribute a SDK'\n        url: '/community/contribute-sdk/'\n      -\n        body: 'Contribute a Webhook'\n        url: '/community/contribute-webhook/'\n      -\n        body: 'Community Projects'\n        url: '/community/projects/'\n  -\n    body: 'Getting Help'\n    url: '#'\n    children:\n      -\n        body: 'FAQs'\n        url: '/resources/faq/'\n      -\n        body: 'Support'\n        url: '/support/'\n  -\n    body: 'Resources'\n    url: '#'\n    children:\n      -\n        body: 'Command-line Interface'\n        url: '/cli/'\n      -\n        body: 'Release Cadence'\n        url: '/resources/release/'\n      -\n        body: 'Developing Engines with IntelliJ IDEA'\n        url: '/resources/intellij/'\n      -\n        body: 'Upgrade Instructions'\n        url: '/resources/upgrade/'\n      -\n        body: 'Glossary'\n        url: '/resources/glossary/'\n  -\n    body: 'Apache Software Foundation'\n    url: '#'\n    children:\n      -\n        body: 'Apache Homepage'\n        url: 'https://www.apache.org/'\n      -\n        body: 'License'\n        url: 'https://www.apache.org/licenses/'\n      -\n        body: 'Sponsorship'\n        url: 'https://www.apache.org/foundation/sponsorship.html'\n      -\n        body: 'Thanks'\n        url: 'https://www.apache.org/foundation/thanks.html'\n      -\n        body: 'Security'\n        url: 'https://www.apache.org/security/'\n"
  },
  {
    "path": "docs/manual/data/versions.yml",
    "content": "pio: 0.14.0\nspark: 2.4.0\nspark_download_filename: spark-2.4.0-bin-hadoop2.7\nelasticsearch_download_filename: elasticsearch-6.8.1\nhbase_version: 1.2.6\nhbase_basename: hbase-1.2.6\nhbase_variant: bin\n"
  },
  {
    "path": "docs/manual/helpers/application_helpers.rb",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nmodule ApplicationHelpers\n  def page_title\n    if current_page.data.title\n      content_tag :h1 do\n        rendered_title\n      end\n    else\n      content_tag :h1, class: 'missing' do\n        'Missing Title'\n      end\n    end\n  end\n\n  def rendered_title\n    return unless current_page.data.title\n    title = current_page.data.title\n    template = Tilt['erb'].new { title }\n    template.render(self, current_page.data)\n  end\n\n  def github_url\n    base = 'https://github.com/apache/predictionio/tree/livedoc/docs/manual'\n    path = current_page.source_file.sub(Middleman::Application.root_path.to_s, '')\n    base + path\n  end\n\n  def page_title_in_nav_menu(nodes)\n    def is_current_page(node)\n      if node.url == current_page.url\n        return true\n      else\n        return false\n      end\n    end\n\n    if nodes\n      result = \"\"\n      nodes.each do |node|\n        if node.children\n          node.children.each do |child|\n            if is_current_page(child)\n              result = child\n            end\n          end\n        else\n          if is_current_page(node)\n            result = node\n          end\n        end\n      end\n      if result != \"\"\n        return result.body\n      else\n        return current_page.data.title\n      end\n    else\n      return \"Welcome to Apache PredictionIO Documentation!\"\n    end\n\n  end\n\n  def link_to_with_active(body, url, options = {})\n    if url == current_page.url\n      link_to body, url, options.merge(class: [options[:class], 'active'].join(' '))\n    else\n      link_to body, url, options\n    end\n  end\nend\n"
  },
  {
    "path": "docs/manual/helpers/breadcrumb_helpers.rb",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nrequire 'rainbow/ext/string'\n\nmodule BreadcrumbHelpers\n\n  def breadcrumbs\n    result = false\n    data.nav.main.root.each do |node|\n      result = breadcrumb_search(current_page.url, node)\n      break if result\n    end\n\n    partial 'nav/breadcrumbs', locals: { crumbs: result }\n\n  end\n\n\n  def breadcrumb_search(path, node, depth = 0, crumb = [])\n    crumb[depth] = node\n    return crumb if node.url == path\n\n    if node.children\n      node.children.each do |child|\n        result = breadcrumb_search(path, child, depth + 1, crumb)\n        return result if result\n      end\n    end\n\n    false\n  end\nend\n"
  },
  {
    "path": "docs/manual/helpers/icon_helpers.rb",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nmodule IconHelpers\n  def icon(name)\n    if name.nil?\n      %Q{<i class=\"fa\"></i>}\n    else\n      %Q{<i class=\"fa fa-#{name}\"></i>}\n    end\n  end\nend\n"
  },
  {
    "path": "docs/manual/helpers/table_of_contents_helpers.rb",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nmodule TableOfContentsHelpers\n  def table_of_contents(resource)\n    content = remove_front_matter_data(File.read(resource.source_file))\n    extension = File.extname(resource.source_file)[1..-1] # Trim the first dot.\n\n    if extension != 'md'\n      # Render other extensions first if they exist.\n      template = Tilt[extension].new { content }\n      content = template.render(self, resource.data)\n    end\n\n    # Now the custom Markdown TOC.\n    markdown = Redcarpet::Markdown.new(Redcarpet::Render::HTML_TOC.new(nesting_level: 2))\n    # TOC gets confused with Ruby comments inside code blocks so we removed them.\n    content_without_code = content.gsub(/(```[\\s\\S]*?```)/, '')\n    output = markdown.render(content_without_code)\n\n    if output.length == 0\n      return\n    else\n      content_tag :aside, output, id: 'table-of-contents'\n    end\n  end\n\n  private\n\n  def remove_front_matter_data(content)\n    yaml_regex = /\\A(---\\s*\\n.*?\\n?)^((---|\\.\\.\\.)\\s*$\\n?)/m\n    if content =~ yaml_regex\n      content = content.sub(yaml_regex, '')\n    end\n\n    json_regex = /\\A(;;;\\s*\\n.*?\\n?)^(;;;\\s*$\\n?)/m\n    if content =~ json_regex\n      content = content.sub(json_regex, '')\n    end\n\n    content\n  end\nend\n"
  },
  {
    "path": "docs/manual/helpers/url_helpers.rb",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nmodule UrlHelpers\n  def absolute_url(path)\n    URI::Generic.build(\n      scheme: 'https',\n      host: host,\n      path: path\n    ).to_s\n  end\nend\n"
  },
  {
    "path": "docs/manual/lib/custom_renderer.rb",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nrequire 'middleman-core/renderers/redcarpet'\n\nclass CustomRenderer < Middleman::Renderers::MiddlemanRedcarpetHTML\n  def initialize(options = {})\n    defaults = { with_toc_data: true }\n    super(defaults.merge(options))\n  end\n\n  def paragraph(text)\n    case text\n    when/\\A(INFO|SUCCESS|WARNING|DANGER|NOTE|TODO):/\n      convert_alerts(text)\n    else\n      %Q(<p>#{text}</p>)\n    end\n  end\n\n  def header(text, level)\n    id = text.downcase.tr(\" \", \"-\")\n    id = \"'\" + id + \"'\"\n    #the anchor before the headings are there to provide proper jumping points.\n    \"<h#{level} id=#{id} class='header-anchors' >#{text}</h#{level}>\"\n  end\n\n  def block_code(code, language)\n    language = language ? language : 'bash'\n    super\n  end\n\n  def block_html(raw_html)\n    # Render fenced code blocks first!\n    replace = raw_html.gsub(/(```.*?```)/m) do |match|\n      markdown = Redcarpet::Markdown.new(CustomRenderer, fenced_code_blocks: true)\n      markdown.render(match)\n    end\n\n    doc = Nokogiri::HTML::DocumentFragment.parse(replace)\n    nodes = doc.css('div.tabs > div')\n\n    if nodes.empty?\n      raw_html\n    else\n      ul = Nokogiri::XML::Node.new('ul', doc)\n      ul['class'] = 'control'\n\n      nodes.each do |node|\n        title = node.attribute('data-tab').to_s\n        lang = node.attribute('data-lang').to_s\n\n        uuid = SecureRandom.uuid\n        id = \"tab-#{uuid}\"\n\n        li = Nokogiri::XML::Node.new('li', doc)\n        li['data-lang'] = lang\n        li.inner_html = %Q(<a href=\"##{id}\">#{title}</a>)\n\n        ul.add_child(li)\n\n        node['id'] = id\n      end\n\n      nodes.first.before(ul)\n\n      doc.to_html\n    end\n  end\n\n  private\n\n  def convert_alerts(text)\n    text.gsub(/\\A(INFO|SUCCESS|WARNING|DANGER|NOTE|TODO):(.*?)(\\n(?=\\n)|\\z)/m) do\n      css_class = $1.downcase\n      content = $2.strip\n      %Q(<div class=\"alert-message #{css_class}\"><p>#{content}</p></div>)\n    end\n  end\nend"
  },
  {
    "path": "docs/manual/lib/gallery_generator.rb",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nrequire 'yaml'\nrequire 'uri'\n\nmodule Gallery\n\n  private\n\n  INTRO = '---\ntitle: Engine Template Gallery\n---\n\nPick a tab for the type of template you are looking for. Some still need to be ported (a simple process) to Apache PIO and these are marked. Also see each Template description for special support instructions.\n\n'\n\n  BEGIN_TABS = '<div class=\"tabs\">'\n\n  RECOMMENDER_SYSTEMS = '<div data-tab=\"Recommenders\">'\n\n  CLASSIFICATION = '<div data-tab=\"Classification\">'\n\n  REGRESSION = '<div data-tab=\"Regression\">'\n\n  CLUSTERING = '<div data-tab=\"Clustering\">'\n\n  NLP = '<div data-tab=\"NLP\">'\n\n  SIMILARITY = '<div data-tab=\"Similarity\">'\n\n  OTHER = '<div data-tab=\"Other\">'\n\n  TEMPLATE_INTRO = '<h3><a href=\"%{repo}\">%{name}</a></h3>'\n\n  STAR_BUTTON ='<iframe src=\"https://ghbtns.com/github-btn.html?user=%{user}&repo=%{repo}&type=star&count=true\" frameborder=\"0\" align=\"middle\" scrolling=\"0\" width=\"170px\" height=\"20px\"></iframe>'\n\n  TEMPLATE_DETAILS =\n'\n<p>\n%{description}\n</p>\n<p>Support: %{support}</p>\n<br/>\n<table>\n<tr><th>Type</th><th>Language</th><th>License</th><th>Status</th><th>PIO min version</th><th>Apache PIO Convesion Required</th</tr>\n<tr><td>%{type}</td><td>%{language}</td><td>%{license}</td><td>%{status}</td><td>%{pio_min_version}</td><td>%{apache_pio_convesion_required}</td></tr>\n</table>\n<br/>\n'\n\n  SECTION_SEPARATOR ='</div>'\n\n  END_TABS ='</div>'\n\n  class Template\n    public\n    attr_accessor :has_github, :github_repo, :github_user\n\n    def initialize(engine)\n      engine.each do |key, val|\n        self.instance_variable_set(\"@#{key}\", val)\n        self.class.send :define_method, key, lambda { self.instance_variable_get(\"@#{key}\") }\n      end\n\n      @tags = @tags.map{ |s| s.downcase }\n\n      @has_github = parse_github\n    end\n\n    private\n    def parse_github\n      uri = URI.parse(@repo)\n      if uri.host == 'github.com'\n        path = uri.path.split('/')\n        raise \"Wrong github repo url\" unless path.length >= 3\n        @github_user = path[1]\n        @github_repo = path[2]\n        return true\n      else\n        return false\n      end\n    end\n  end\n\n  def self.write_template(mdfile, template)\n    intro = TEMPLATE_INTRO % {\n      name: template.name,\n      repo: template.repo }\n\n    if template.has_github\n        intro += STAR_BUTTON % {\n                    user: template.github_user,\n                    repo: template.github_repo}\n    end\n\n    mdfile.write(intro)\n    mdfile.write(TEMPLATE_DETAILS % {\n      description: template.description,\n      type: template.type,\n      language: template.language,\n      license: template.license,\n      status: template.status,\n      support: template.support_link,\n      pio_min_version: template.pio_min_version,\n      apache_pio_convesion_required: template.apache_pio_convesion_required\n    })\n  end\n\n  def self.write_templates(mdfile, templates)\n    templates.each do |t|\n      write_template(mdfile, t)\n    end\n  end\n\n  def self.write_markdown(mdfile, templates)\n    recommenders   = templates.select{ |engine| engine.tags.include? 'recommender' }\n    classification = templates.select{ |engine| engine.tags.include? 'classification' }\n    regression     = templates.select{ |engine| engine.tags.include? 'regression' }\n    similarity     = templates.select{ |engine| engine.tags.include? 'similarity' }\n    nlps           = templates.select{ |engine| engine.tags.include? 'nlp' }\n    clustering   = templates.select{ |engine| engine.tags.include? 'clustering' }\n    others         = templates.select{ |engine| engine.tags.include? 'other' }\n\n    mdfile.write(INTRO)\n\n    mdfile.write(BEGIN_TABS)\n\n    mdfile.write(RECOMMENDER_SYSTEMS)\n    write_templates(mdfile, recommenders)\n\n    mdfile.write(SECTION_SEPARATOR)\n\n    mdfile.write(CLASSIFICATION)\n    write_templates(mdfile, classification)\n\n    mdfile.write(SECTION_SEPARATOR)\n\n    mdfile.write(REGRESSION)\n    write_templates(mdfile, regression)\n\n    mdfile.write(SECTION_SEPARATOR)\n\n    mdfile.write(NLP)\n    write_templates(mdfile, nlps)\n\n    mdfile.write(SECTION_SEPARATOR)\n\n    mdfile.write(CLUSTERING)\n    write_templates(mdfile, clustering)\n\n    mdfile.write(SECTION_SEPARATOR)\n\n    mdfile.write(SIMILARITY)\n    write_templates(mdfile, similarity)\n\n    mdfile.write(SECTION_SEPARATOR)\n\n    mdfile.write(OTHER)\n    write_templates(mdfile, others)\n\n    mdfile.write(SECTION_SEPARATOR)\n\n    mdfile.write(END_TABS)\n\n end\n\n\n  public\n  def self.generate_md(yaml_file_path, out_file_path)\n\n    File.open(yaml_file_path) do |in_file|\n      File.open(out_file_path, 'w') do |out_file|\n\n        templates = YAML.load(in_file)\n        parsed = templates.map{ |t| Template.new(t[\"template\"]) }\n\n        write_markdown(out_file, parsed)\n      end\n    end\n  end\nend\n"
  },
  {
    "path": "docs/manual/source/404.html.md",
    "content": "---\ntitle: Error 404\ndescription: Page not found!\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n# Page Not Found\n\nSorry the page you were looking for was not found :(\n"
  },
  {
    "path": "docs/manual/source/algorithm/custom.html.md",
    "content": "---\ntitle: Adding your own Algorithms\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n(Coming soon)\n"
  },
  {
    "path": "docs/manual/source/algorithm/index.html.md",
    "content": "---\ntitle: Built-in Algorithm Libraries\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nAn engine can virtually call any algorithm in the Algorithm class. Apache\nPredictionIO currently offers native support to [Spark\nMLlib](http://spark.apache.org/docs/latest/mllib-guide.html) machine learning\nlibrary. It is being used by some of the engine templates in the [template\ngallery](/gallery/template-gallery).\n\nMore library support will be added soon.\n"
  },
  {
    "path": "docs/manual/source/algorithm/multiple.html.md",
    "content": "---\ntitle: Combining Multiple Algorithms\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n\nYou can use more than one algorithm to build multiple models in an engine. The predicted results can be combined in the Serving class.\n\nHere are some How-to examples:\n\n* [Similar Product template - Multiple Events and Multiple Algorithms](/templates/similarproduct/multi-events-multi-algos/)\n"
  },
  {
    "path": "docs/manual/source/algorithm/switch.html.md",
    "content": "---\ntitle: Switching to Another Algorithm\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nEvery engine template comes with default algorithm(s). To switch to another algorithm, you simply need to modify the Algorithm class.\n\nHere are some How-to examples:\n\n* [Classification template - switching from NaiveBayes to Random Forests](/templates/classification/add-algorithm/)\n"
  },
  {
    "path": "docs/manual/source/appintegration/index.html.md",
    "content": "---\ntitle: App Integration Overview\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nApache PredictionIO is designed as a machine learning server that\nintegrates with your applications on production environments.\n\nA web or mobile app normally:\n\n1.  Send event data to Apache PredictionIO's Event Server for model\n    training\n2.  Send dynamic queries to deployed engine(s) to retrieve predicted results\n\n![Apache PredictionIO Single Engine\nOverview](/images/overview-singleengine.png)\n\n## Sending Event Data\n\nApache PredictionIO's Event Server receives event data from your\napplication. The data can be used by engines as training data to build predictive\nmodels.\n\nEvent Server listens to port 7070 by default. You can change the port with the\n[--port arg](/cli/#event-server-commands) when you launch the Event Server.\n\nFor further information, please read:\n\n* [Event Server Overview](/datacollection/)\n* [Collecting Data with REST/SDKs](/datacollection/eventapi)\n\n## Sending Query\n\nAfter you deploy an engine as a web service, it will wait for queries from your\napplication and return predicted results in JSON format.  An engine listens to\nport 8000 by default. If you want to deploy multiple engines, you can specific a\ndifferent port for each of them.\n\nFor further information, please read:\n\n* [Deploying an Engine as a Web Service](/deploy/)\n"
  },
  {
    "path": "docs/manual/source/archived/community.html.md",
    "content": "---\ntitle: Archived Community Projects\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## SDKs\n\n### Node.js SDK for PredictionIO\n\nURL: https://github.com/asafyish/predictionio-driver and\nhttps://www.npmjs.org/package/predictionio-driver\n\nNode.js PredictionIO 0.8+ client supporting both callback syntax and promise\nsyntax.\n\n- Core Author: Asaf Yishai\n\n- Status: It works with PredictionIO v0.8 - Under active development\n\n### C#/.NET SDK for PredictionIO\n\nURL: https://github.com/orbyone/Sensible.PredictionIO.NET\n\nC#/.NET library for PredictionIO 0.9.4, supporting both synchronous and\nasynchronous calls, for item recommendation and item ranking algorithms. Loosely\nbased on the PredictionIO Java SDK API.\n\n- Core Author: Themos Piperakis\n\n- Status: It works with PredictionIO v0.9.4 - Under active development\n\n### .NET SDK for PredictionIO\n\nURL: https://github.com/ibrahimozgon/PredictionIO-.Net-SDK\n\n.NET SDK for PredictionIO\n\n- Core Author: Ibrahim Özgön\n\n- Status: It works with PredictionIO v0.9 - Under active development\n\n\n## Installations\n\n### Vagrant Installation for Apache PredictionIO®\n\nURL: https://github.com/PredictionIO/PredictionIO-Vagrant\n\nBring Up PredictionIO 0.9.x VM with Vagrant.\n\n- Core Author: Raphael Mäder\n\n- Status: It works with PredictionIO v0.8 - Under active development\n\n### Another Docker Installation for Apache PredictionIO®\n\nURL: https://github.com/sphereio/docker-predictionio\n\nDocker container for PredictionIO-based machine learning services.\n\n- Core Author: Fabian M. Borschel\n\n- Status: It works with PredictionIO v0.9.3 - Under active development\n\n\n## Extensions\n\n### GraphX Parallel SimRank Algorithm\n\nURL: https://github.com/ZhouYii/PIO-Parallel-Simrank-Engine\n\nImplementation of Delta-Simrank algorithm using Spark's GraphX framework.\n\n- Core Author: Joey Zhou\n\n- Status: It works with PredictionIO v0.8 - Under active development\n\n### Magento Similar Products Extension\n\nURL: https://github.com/magento-hackathon/Predictionio\n\nSimilar Products is a Magento extension that utilizes PredictionIO to create a more personalized suggestion of up-sell products on the Magento product page.\n\n- Core Author: Steven Richardson, Raphael Mäder & Damian Luszczymak\n\n- Status: It works with PredictionIO v0.8 - Under active development\n\n\n## Wrappers\n\n### Lavarel Wrapper for PredictionIO\n\nURL: https://github.com/michael-hopkins/PredictionIO-Laravel-Wrapper and https://packagist.org/packages/hopkins/predictionio-laravel-wrapper\n\nA Laravel wrapper for PredictionIO v0.8.\n\n- Core Author: Bruno Cabral & Michael Hopkins\n\n- Status: It works with PredictionIO v0.8 - Under active development\n\n### Magento 2 Personalised Products Module\n\nURL: https://github.com/richdynamix/personalised-products\n\nPersonalised Products is a Magento 2 module that will serve realtime predicted suggestions for product upsells on the product page and complimentary suggestions for cross sells on the basket page. All powered by PredictionIO using the [Similar Product](/gallery/template-gallery/#recommender-systems \"Similar Product\") engine and the [Complementary Purchase](/gallery/template-gallery/#unsupervised-learning \"Complementary Purchase\") engine.\n\n- Core Author: Steven Richardson\n\n- Status: It works with PredictionIO v0.9.5 - Under active development\n\n\n## Demos\n\n### NoGoodGamez\n\n<img src=\"/images/showcase/nogoodgamez-158x70.png\" alt=\"NoGoodGamez\" class=\"static\" />\n\nPS3/PS4 game Recommendation built by [pashadude](https://github.com/pashadude/)\n\nURL: http://nogoodgamez.com\n\n### OnTapp\n\n<img src=\"/images/showcase/on-tapp-70x70.png\" alt=\"OnTapp\" class=\"static\" />\n\nBeer recommendation app built by [Victor Leung](https://twitter.com/victorleungtw).\n\nWriteup: https://medium.com/@victorleungtw/beer-recommendation-engine-using-predictionio-36488ea0c50d\n\n### Yelpio\n\n<img src=\"/images/showcase/yelpio-70x70.png\" alt=\"OnTapp\" class=\"static\" />\n\nBusiness Recommendation built by [TRAN QUOC HOAN](https://twitter.com/k09ht), [Inhwan Lee](https://github.com/ihlee01), and 山本直人.\n\nURL: http://yelpio.hongo.wide.ad.jp/"
  },
  {
    "path": "docs/manual/source/archived/index.html.md",
    "content": "---\ntitle: List of Archived Pages\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Archived Contribution\n\nPlease help to move pages here if they are no-longer maintained\n"
  },
  {
    "path": "docs/manual/source/archived/install-linux.html.md.erb",
    "content": "---\ntitle: Installing Apache PredictionIO on Linux / Mac OS X\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nFollow the steps below to setup Apache PredictionIO and its\ndependencies. In these instructions we will assume you are in your home\ndirectory. Wherever you see `/home/abc`, replace it with your own home\ndirectory.\n\n### Java\n\nEnsure you have an appropriate Java version installed. For example:\n\n```\n$ java -version\njava version \"1.8.0_40\"\nJava(TM) SE Runtime Environment (build 1.8.0_40-b25)\nJava HotSpot(TM) 64-Bit Server VM (build 25.40-b25, mixed mode)\n```\n\n\n### Download Apache PredictionIO\n\nDownload Apache PredictionIO and extract it.\n\n```\n$ cd\n$ pwd\n/home/abc\n$ wget http://download.prediction.io/PredictionIO-<%= data.versions.pio %>.tar.gz\n$ tar zxvf PredictionIO-<%= data.versions.pio %>.tar.gz\n```\n\nNOTE: Download instructions above apply to previous non-Apache releases only.\nOnce we have made an Apache release, new instructions will be provided.\n\n### Installing Dependencies\n\nLet us install dependencies inside a subdirectory of the Apache PredictionIO\ninstallation. By following this convention, you can use\nPredictionIO's default configuration as is.\n\n```\n$ mkdir PredictionIO-<%= data.versions.pio %>/vendors\n```\n\n\n#### Spark Setup\n\n<%= partial 'shared/install/spark' %>\n\n\n#### Elasticsearch Setup\n\nWARNING: You may skip this section if you are using PostgreSQL or MySQL.\n\n<%= partial 'shared/install/elasticsearch' %>\n\n\n#### HBase Setup<a class=\"anchor\" name=\"hbase\">&nbsp;</a>\n\nWARNING: You may skip this section if you are using PostgreSQL or MySQL.\n\n<%= partial 'shared/install/hbase' %>\n\n\nIn addition, you must set your environment variable `JAVA_HOME`. For example, in\n`/home/abc/.bashrc` add the following line:\n\n```\nexport JAVA_HOME=/usr/lib/jvm/java-8-oracle\n```\n\n\n<%= partial 'shared/install/dependent_services' %>\n\nNow you have installed everything you need!\n\n<%= partial 'shared/install/proceed_template' %>\n"
  },
  {
    "path": "docs/manual/source/archived/install-vagrant.html.md.erb",
    "content": "---\ntitle: Installing PredictionIO with Vagrant (VirtualBox)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nWARNING: Running PredictionIO with Vagrant is intended for the purposes of\nsimple tests in an isolated environment. Due to resource limitation and overhead\nof virtual machine (VM), it runs much more slowly or may encounter memory issue.\nWe recommend using Linux or Mac machine for serious usage.\n\n## Install VirtualBox\n\nIf you don't have VirtualBox installed, please follow the instructions in the\n[VirtualBox site](https://www.virtualbox.org/wiki/Downloads) to download and\ninstall it. After installation is done, you don't need to setup anything in Virtual Box. Vagrant will do it for you later.\n\n## Install Vagrant\n\nIf you don't have Vagrant installed, please follow the instructions in the the\n[Vagrant site](https://www.vagrantup.com/downloads.html) to download and install\nit.\n\n## Bring up PredictoinIO VM with Vagrant\n\nGet the latest vagrant setup from github and make sure in master branch:\n\n```\n$ git clone https://github.com/PredictionIO/PredictionIO-Vagrant.git\n$ cd PredictionIO-Vagrant/\n$ git checkout master\n```\n\nInside the directory `PredictionIO-Vagrant/`, you will find a file named `Vagrantfile` which is the configuration file used by Vagrant to setup the VM. You may modify this file if you want to change the VM configuration.\n\nFor example, if you want to change the memory of the VM, you can locate the following line in the `Vagrantfile` and change the value passed to the `memory` parameter (default is 2048MB):\n\n```\n  v.customize [\"modifyvm\", :id, \"--cpuexecutioncap\", \"90\", \"--memory\", \"2048\"]\n```\n\nIn the directory `PredictionIO-Vagrant/`, bring up PredictionIO VM by running:\n\n```\n$ vagrant up\n```\n\nINFO: When you run `vagrant up` for the first time, it will download the base\nbox ubuntu/trusty64 if you don't have it. Then it will also install all\nnecessary libraries and setup PredictionIO in the virtual machine.\n\nWhen it finishes successfully, you should see something like the following:\n\n```\n==> default: Installation done!\n==> default: --------------------------------------------------------------------------------\n==> default: Installation of PredictionIO <%= data.versions.pio %> complete!\n==> default: IMPORTANT: You still have to start PredictionIO and dependencies manually:\n==> default: Run: 'pio-start-all'\n==> default: Check the status with: 'pio status'\n==> default: Use: 'pio [train|deploy|...]' commands\n==> default: Please report any problems to: dev@predictionio.apache.org\n==> default: Documentation at: http://predictionio.apache.org\n==> default: --------------------------------------------------------------------------------\n==> default: Finish PredictionIO installation.\n```\n\nThat's it! Now you have a PredictionIO VM running!\n\nPlease see the following notes regarding how to use PredictionIO VM with vagrant.\n\n## Using the PredictionIO VM\n\n### Login to the VM\n\nYou could ssh to the VM by running the following from your host machine in the\nsame directory where you run `vagrant up` (i.e. PredictionIO-Vagrant/)\n\n```\n$ vagrant ssh\n```\n\nThen your console prompt becomes something like the following, which means you have logged into the VM:\n\n```\nvagrant@vagrant-ubuntu-trusty-64:~$\n```\n\nOne you've logged into the VM, you can proceed to [Choosing an Engine Template](/start/download) or continue the QuickStart of the Engine template you have chosen.\n\n### Shutdown and bring up PredictionIO VM again\n\nWhen you are not using PredictionIO VM, you should shut down VM properly, by running the following **in the host machine** (not inside VM):\n\n```\n$ vagrant halt\n```\n\nWARNING: If you didn't shut down VM properly or you ran `vagrant suspend`, the\nVM may go to suspend state. HBase may not be running properly next time when\nyou run `vagrant up.` In this case, you can always run `vagrant halt` to do a\nclean shutdown first before run `vagrant up` again.\n\nThen you can run `vagrant up` again later to bring up the PredicitonIO VM again.\n\n```\n$ vagrant up\n```\n\nWhen it's ready, you should see the following:\n\n```\n==> default: --------------------------------------------------------------------------------\n==> default: PredictionIO VM is up!\n==> default: You could run 'pio status' inside VM ('vagrant ssh' to VM first) to confirm if PredictionIO is ready.\n==> default: IMPORTANT: You still have to start the eventserver manually (inside VM):\n==> default: Run: 'pio eventserver'\n==> default: --------------------------------------------------------------------------------\n```\n"
  },
  {
    "path": "docs/manual/source/archived/launch-aws.html.md.erb",
    "content": "---\ntitle: Launching PredictionIO on AWS\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nDeploying PredictionIO on Amazon Web Services is extremely easy thanks\nto AWS Marketplace. As long as you have access to AWS, you can launch a\nready-to-use PredictionIO Amazon EC2 instance with a single click.\n\n## Prerequisites\n\n* Amazon Web Services account\n* Amazon EC2\n\n## Access AWS Marketplace\n\nVisit [PredictionIO product's page on AWS\nMarketplace](https://aws.amazon.com/marketplace/pp/B00RPIFSYS/) and sign in with\nyour AWS account.\n\n## Using 1-Click Launch\n\nYou should see the following screen after you have logged in.\n\n![alt text](../images/awsm-product.png)\n\nUnder the big yellow \"Continue\" button, select the region where you want to\nlaunch the PredictionIO EC2 instance, then click \"Continue\".\n\n![alt text](../images/awsm-1click.png)\n\nReview your instance's settings before launching. For quick prototyping work,\nwe recommend using the \"memory optimized\" instances for the cheapest memory configurations at least the\n\"Memory Optimized R3 (r3.large)\" or for larger datasets the \"(r3.xlarge)\".\n\n## Setting Security Group\n\nThe default security group, marked by \"AutogenByAWSMP\", has the following ports\nopened to public:\n\n* 22 (SSH)\n* 7070 (PredictionIO Event Server)\n* 8000 (PredictionIO Server)\n* 8080 (Spark Master)\n* 9200 (Elasticsearch)\n\n## Start Using PredictionIO\n\nIt may take a few minutes after the EC2 instance has launched for all\nPredictionIO components to become ready. When they are ready, you may connect to\nyour instance, see [AWS\ndocumentation](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-connect-to-instance-linux.html)\nfor more details.\n\nOnce you connect to your instance, you can find PredictionIO  at\n`/opt/PredictionIO` and the binary command path is `/opt/PredictionIO/bin`.\n\n<%= partial 'shared/install/proceed_template' %>\n\nNOTE: The AWS instance will have all PredictionIO components automatically started for\nyou, so you could safely skip the **pio-start-all** command as described in QuickStart.\n"
  },
  {
    "path": "docs/manual/source/archived/supervisedlearning.html.md",
    "content": "---\ntitle: Machine Learning With PredictionIO\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis guide is designed to give developers a brief introduction to fundamental concepts in machine learning, as well as an explanation of how these concept tie into PredictionIO's engine development platform. This particular guide will largely deal with giving some\n\n## Introduction to Supervised Learning\n\nThe first question we must ask is: what is machine learning? **Machine learning** is the field of study at the intersection of computer science, engineering, mathematics, and statistics which seeks to discover or infer patterns hidden within a set of observations, which we call our data. Some examples of problems that machine learning seeks to solve are:\n\n\n\n- Predict whether a patient has breast cancer based on their mammogram results.\n- Predict whether an e-mail is spam or not based on the e-mail's content.\n- Predict today's temperature based on climate variables collected for the previous week.\n\n### Thinking About Data\n\nIn the latter examples, we are trying to predict an outcome \\\\(Y\\\\), or **response**, based on some recorded or observed variables \\\\(X\\\\), or **features**. For example: in the third problem each observation is a patient, the response variable \\\\(Y\\\\) is equal to 1 if this patient has breast cancer and 0 otherwise, and \\\\(X\\\\) represents the mammogram results.\n\nWhen we say we want to predict \\\\(Y\\\\) using \\\\(X\\\\), we are trying to answer the question: how does a response \\\\(Y\\\\) depend on a set of features \\\\(X\\\\) affect the response \\\\(Y\\\\)? To do this we need a set of observations, which we call our **training data**, consisting of observations for which we have observed both \\\\(Y\\\\) and \\\\(X\\\\), in order to make inference about this relationship.\n\n### Different Types of Supervised Learning Problems\n\nNote that  in the first two examples, the outcome \\\\(Y\\\\) can  only take on two values (1 : cancer/spam, 0: no cancer/ no spam). Whenever the outcome variable \\\\(Y\\\\) denotes a label associated to a particular group of observations (i.e. cancer group), the **supervised learning** problem is also called a **classification** problem. In the third example, however, \\\\(Y\\\\) can take on any numerical value since it denotes some temperature reading (i.e. 25.143, 25.14233, 32.0). These types of supervised learning problems are also called **regression** problems.\n\n### Training a Predictive Model\n\nA predictive model should be thought of as a function \\\\(f\\\\) that takes as input a set of features, and outputs a predicted outcome (i.e. \\\\(f(X) = Y\\\\)). The phrase **training a model** simply refers to the process of using the training data to estimate such a function.  \n\n## PredictionIO and Supervised Learning\n\nMachine learning methods generally assume that our observation responses and features are numeric vectors. We will say that observations in this format are in **standard form**. However, when you are working with real-life data this will often not be the case. The data will often be formatted in a manner that is specific to the application's needs. As an example, let's suppose our application is [StackOverFlow](http://stackoverflow.com). The data we want to analyze are questions, and we want to predict based on a question's content whether or not it is related to Scala.\n\n\n**Self-check:**   Is this a classification or regression problem?\n\n### Thinking About Data With PredictionIO\n\nPredictionIO's predictive engine development platform allows you to easily incorporate observations that are not in standard form. Continuing with our example, we can import the observations, or StackOverFlow questions, into [PredictionIO's Event Server](/datacollection/) as events with the following properties:\n\n\n`properties = {question : String, topic : String}`\n\nThe value `question` is the actual question stored as a `String`, and topic is also a string equal to either `\"Scala\"` or `\"Other\"`. Our outcome here is `topic`, and `question` will provide a source for extracting features. That is, we will be using `question` to predict the outcome `topic`.\n\nOnce the observations are loaded as events into the Event Server, the engine's [Data Source](/customize/) component is able to read them,  which allows you to treat them as objects in a Scala project. The engine's Preparator component is in charge of converting these observations into standard form. To do this, we can first map the topic values as follows:\n\n\n\n`Map(\"Other\" -> 0, \"Scala\" -> 1)`.\n\n\nWe can then vectorize the observation's associated question text to obtain a numeric feature vector for each of our observations. This text vectorization procedure is an example of a general concept in machine learning called **feature extraction**. After performing these transformations of our observations, they are now in standard form and can be used for training a large quantity of machine learning models.\n\n### Training the Model With PredictionIO\n\nThe Algorithm engine component serves two purposes: outputting a predictive model \\\\(f\\\\) and using this to predict the outcome variable. Here \\\\(f\\\\) takes as input a vectorized question and outputs either 0 or 1. However, our `Query` input will be again a question, and our `PredictedResult` the topic associated to the predicted label (0 or 1):\n\n\n`Query = {question : String}`\n`PredictedResult = {topic : String}`\n\n\nWith PredictionIO's engine development platform, you can easily automate the vectorization of the Query question, as well as mapping the predicted label to the appropriate topic output format.\n"
  },
  {
    "path": "docs/manual/source/archived/tapster.html.md",
    "content": "---\ntitle: Comics Recommendation Demo\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Introduction\n\nIn this demo, we will show you how to build a Tinder-style web application (named \"Tapster\") recommending comics to users based on their likes/dislikes of episodes interactively.\n\nThe demo will use [Similar Product Template](https://predictionio.apache.org/templates/similarproduct/quickstart/). Similar Product Template is a great choice if you want to make recommendations based on immediate user activities or for new users with limited history. It uses MLLib Alternating Least Squares (ALS) recommendation algorithm, a [Collaborative filtering](http://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) (CF) algorithm commonly used for recommender systems. These techniques aim to fill in the missing entries of a user-item association matrix. Users and products are described by a small set of latent factors that can be used to predict missing entries. A layman's interpretation of Collaborative Filtering is \"People who like this comic, also like these comics.\"\n\nAll the code and data is on GitHub at: [github.com/PredictionIO/Demo-Tapster](https://github.com/PredictionIO/Demo-Tapster).\n\n### Data\nThe source of the data is from [Tapastic](http://tapastic.com/). You can find the data files [here](https://github.com/PredictionIO/Demo-Tapster/tree/master/data).\n\nThe data structure looks like this:\n\n[Episode List](https://github.com/PredictionIO/Demo-Tapster/blob/master/data/episode_list.csv) `data/episode_list.csv`\n\n**Fields:** episodeId | episodeTitle | episodeCategories | episodeUrl | episodeImageUrls\n\n1,000 rows. Each row represents one episode.\n\n[User Like Event List](https://github.com/PredictionIO/Demo-Tapster/blob/master/data/user_list.csv) `data/user_list.csv`\n\n**Fields:** userId | episodeId | likedTimestamp\n\n192,587 rows. Each row represents one user like for the given episode.\n\nThe tutorial has four major steps:\n\n- Demo application setup\n- PredictionIO installation and setup\n- Import data into database and PredictionIO\n- Integrate demo application with PredictionIO\n\n## Tapster Demo Application\nThe demo application is built using Rails.\n\nYou can clone the existing application with:\n\n```\n$ git clone  https://github.com/PredictionIO/Demo-Tapster.git\n$ cd Demo-Tapster\n$ bundle install\n```\nYou will need to edit `config/database.yml` to match your local database settings. We have provided some sensible defaults for PostgreSQL, MySQL, and SQLite.\n\nSetup the database with:\n\n```\n$ rake db:create\n$ rake db:migrate\n```\n\nAt this point, you should have the demo application ready but with an empty database. Lets import the episodes data into our database. We will do this with: `$ rake import:episodes`. An \"Episode\" is a single [comic strip](http://en.wikipedia.org/wiki/Comic_strip).\n\n[View on GitHub](https://github.com/PredictionIO/Demo-Tapster/blob/master/lib/tasks/import/episodes.rake)\n\nThis script is pretty simple. It loops through the CSV file and creates a new episode for each line in the file in our local database.\n\nYou can start the app and point your browser to [http://localhost:3000](http://localhost:3000)\n\n```\n$rails server\n```\n\n![Rails Server](/images/demo/tapster/rails-server.png)\n\n## Apache PredictionIO Setup\n\n### Install Apache PredictionIO\nFollow the installation instructions [here](http://predictionio.apache.org/install/) or simply run:\n\n```\n$ bash -c \"$(curl -s https://raw.githubusercontent.com/apache/predictionio/master/bin/install.sh)\"\n```\n\n![PIO Install](/images/demo/tapster/pio-install.png)\n\n\n### Create a New App\nYou will need to create a new app on Apache PredictionIO to house\nthe Tapster demo. You can do this with:\n\n```\n$ pio app new tapster\n```\n\nTake note of the App ID and Access Key.\n\n![PIO App New](/images/demo/tapster/pio-app-new.png)\n\n### Setup Engine\n\nWe are going to copy the Similar Product Template into the PIO directory.\n\n```\n$ cd PredictionIO\n$ git clone https://github.com/apache/predictionio-template-similar-product.git tapster-episode-similar\n```\n\nNext we are going to update the App ID in the ‘engine.json’ file to match the App ID we just created.\n\n```\n$ cd tapster-episode-similar\n$ nano engine.json\n$ cd ..\n```\n\n![Engine Setup](/images/demo/tapster/pio-engine-setup.png)\n\n\n### Modify  Engine Template\n\nBy the default, the engine template reads the “view” events. We can easily to change it to read “like” events.\n\n<!-- For more advanced example of how-to combine view and like/dislike events in one recommender, please see the multi-events-multi-algos.html -->\n\nModify `readTraining()` in DataSource.scala:\n\n```scala\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    ...\n\n    val viewEventsRDD: RDD[ViewEvent] = eventsDb.find(\n      appId = dsp.appId,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"like\")), // MODIFIED\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n      // eventsDb.find() returns RDD[Event]\n      .map { event =>\n        val viewEvent = try {\n          event.event match {\n            case \"like\" => ViewEvent( // MODIFIED\n              user = event.entityId,\n              item = event.targetEntityId.get,\n              t = event.eventTime.getMillis)\n            case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n          }\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Cannot convert ${event} to ViewEvent.\" +\n              s\" Exception: ${e}.\")\n            throw e\n          }\n        }\n        viewEvent\n      }\n\n    ...\n  }\n}\n\n```\n\nFinally to build the engine we will run:\n\n```\n$ cd tapster-episode-similar\n$ pio build\n$ cd ..\n```\n\n![PIO Build](/images/demo/tapster/pio-build.png)\n\n## Import Data\n\nOnce everything is installed, start the event server by running: `$ pio eventserver`\n\n![Event Server](/images/demo/tapster/pio-eventserver.png)\n\nINFO: You can check the status of Apache PredictionIO at any time\nby running: `$ pio status`\n\nALERT: If your laptop goes to sleep you might manually need to restart HBase with:\n\n```\n$ cd PredictionIO/venders/hbase-0.98.6/bin\n$ ./stop-hbase.sh\n$ ./start-hbase.sh\n```\n\nThe key event we are importing into Apache PredictionIO event\nserver is the \"Like\" event (for example, user X likes episode Y).\n\nWe will send this data to Apache PredictionIO by executing `$ rake\nimport:predictionio` command.\n\n[View on GitHub](https://github.com/PredictionIO/Demo-Tapster/blob/master/lib/tasks/import/predictionio.rake)\n\nThis script is a little more complex. First we need to connect to the Event Server.\n\n```\nclient = PredictionIO::EventClient.new(ENV['PIO_ACCESS_KEY'], ENV['PIO_EVENT_SERVER_URL'], THREADS)\n```\n\nYou will need to create the environmental variables `PIO_ACCESS_KEY` and `PIO_EVENT_SERVER_URL`. The default Event Server URL is: http://localhost:7070.\n\nINFO: If you forget your **Access Key** you can always run: `$ pio app list`\n\nYou can set these values in the `.env` file located in the application root directory and it will be automatically loaded into your environment each time Rails is run.\n\nThe next part of the script loops through each line of the `data/user_list.csv`\nfile and returns an array of unique user and episode IDs. Once we have those we\ncan send the data to Apache PredictionIO like this.\n\nFirst the users:\n\n```\nuser_ids.each_with_index do |id, i|\n  # Send unique user IDs to PredictionIO.\n  client.aset_user(id)\n  puts \"Sent user ID #{id} to PredictionIO. Action #{i + 1} of #{user_count}\"\nend\n```\n\nAnd now the episodes:\n\n```\nepisode_ids.each_with_index do |id, i|\n  # Load episode from database - we will need this to include the categories!\n  episode = Episode.where(episode_id: id).take\n\n  if episode\n    # Send unique episode IDs to PredictionIO.\n    client.acreate_event(\n      '$set',\n      'item',\n      id,\n      properties: { categories: episode.categories }\n    )\n    puts \"Sent episode ID #{id} to PredictionIO. Action #{i + 1} of #{episode_count}\"\n  else\n    puts \"Episode ID #{id} not found in database! Skipping!\".color(:red)\n  end\nend\n```\n\nFinally we loop through the `data/user_list.csv` file a final time to send the like events:\n\n```\nCSV.foreach(USER_LIST, headers: true) do |row|\n  user_id = row[0] # userId\n  episode_id = row[1] # episodeId\n\n  # Send like to PredictionIO.\n  client.acreate_event(\n    'like',\n    'user',\n    user_id,\n    { 'targetEntityType' => 'item', 'targetEntityId' => episode_id }\n  )\n\n  puts \"Sent user ID #{user_id} liked episode ID #{episode_id} to PredictionIO. Action #{$INPUT_LINE_NUMBER} of #{line_count}.\"\nend\n```\n\nIn total the script takes about 4 minutes to run on a basic laptop. At this\npoint all the data is now imported to Apache PredictionIO.\n\n![Import](/images/demo/tapster/pio-import-predictionio.png)\n\n### Engine Training\n\nWe train the engine with the following command:\n\n```\n$ cd tapster-episode-similar\n$ pio train -- --driver-memory 4g\n```\n\n![PIO Train](/images/demo/tapster/pio-train.png)\n\nUsing the --driver-memory option to limit the memory used by Apache PredictionIO.\nWithout this Apache PredictionIO can consume too much memory leading to a crash.\nYou can adjust the 4g up or down depending on your system specs.\n\nYou can set up a job to periodically retrain the engine so the model is updated with the latest dataset.\n\n\n### Deploy Model\n\nYou can deploy the model with: `$ pio deploy` from the `tapster-episode-similar` directory.\n\nAt this point, you have an demo app with data and a Apache PredictionIO\nserver with a trained model all setup. Next, we will connect the\ntwo so you can log the live interaction (likes) events into Apache PredictionIO\nevent server and query the engine server for recommendation.\n\n\n## Connect Demo app with Apache PredictionIO\n\n### Overview\nOn a high level the application keeps a record of each like and dislike. It uses jQuery to send an array of both likes and dislikes to the server on each click. The server then queries Apache PredictionIO for a similar episode which is relayed to jQuery and displayed to the user.\n\nData flow:\n\n- The user likes an episode.\n- Tapster sends the \"Like\" event to Apache PredictionIO event\n  server.\n- Tapster queries Apache PredictionIO engine with all the episodes\n  the user has rated (likes and dislikes) in this session.\n- Apache PredictionIO returns 1 recommended episode.\n\n### JavaScript\nAll the important code lives in `app/assets/javascripts/application.js` [View on GitHub](https://github.com/PredictionIO/Demo-Tapster/blob/master/app/assets/javascripts/application.js)\n\nMost of this file is just handlers for click things, displaying the loading dialog and other such things.\n\nThe most important function is to query the Rails server for results from Apache\nPredictionIO.\n\n```\n// Query the server for a comic based on previous likes. See episodes#query.\nqueryPIO: function() {\n  var _this = this; // For closure.\n  $.ajax({\n    url: '/episodes/query',\n    type: 'POST',\n    data: {\n      likes: JSON.stringify(_this.likes),\n      dislikes: JSON.stringify(_this.dislikes),\n    }\n  }).done(function(data) {\n    _this.setComic(data);\n  });\n}\n```\n### Rails\n\nOn the Rails side all the fun things happen in the episodes controller located at: `app/controllers/episodes_controller` [View on GitHub](https://github.com/PredictionIO/Demo-Tapster/blob/master/app/controllers/episodes_controller.rb).\n\n```\ndef query\n  # Create PredictionIO client.\n  client = PredictionIO::EngineClient.new(ENV['PIO_ENGINE_URL'])\n\n  # Get posted likes and dislikes.\n  likes = ActiveSupport::JSON.decode(params[:likes])\n  dislikes = ActiveSupport::JSON.decode(params[:dislikes])\n\n  if likes.empty?\n    # We can't query PredictionIO with no likes so\n    # we will return a random comic instead.\n    @episode = random_episode\n\n    render json: @episode\n    return\n  end\n\n  # Query PredictionIO.\n  # Here we black list the disliked items so they are not shown again!\n  response = client.send_query(items: likes, blackList: dislikes,  num: 1)\n\n  # With a real application you would want to do some\n  # better sanity checking of the response here!\n\n  # Get ID of response.\n  id = response['itemScores'][0]['item']\n\n  # Find episode in database.\n  @episode = Episode.where(episode_id: id).take\n\n  render json: @episode\nend\n```\n\nOn the first line we make a connection to Apache PredictionIO. You\nwill need to set the `PIO_ENGINE_URL`. This can be done in the `.env` file. The\ndefault URL is: http://localhost:8000.\n\nNext we decode the JSON sent from the browser.\n\nAfter that we check to see if the user has liked anything yet. If not we just return a random episode.\n\nIf the user has likes then we can send that data to Apache PredictionIO event server.\n\nWe also blacklist the dislikes so that they are not returned.\n\nWith our response from Apache PredictionIO it’s just a matter of\nlooking it up in the database and rendering that object as JSON.\n\nOnce the response is sent to the browser JavaScript is used to replace the existing comic and hide the loading message.\n\nThats it. You’re done! If Ruby is not your language of choice check out our other [SDKs](http://predictionio.apache.org/sdk/) and remember you can always interact with the Event Server though it’s native JSON API.\n\n## Links\nSource code is on GitHub at: [github.com/PredictionIO/Demo-Tapster](https://github.com/PredictionIO/Demo-Tapster)\n\n## Conclusion\n\nLove this tutorial and Apache PredictionIO? Both are open source\n(Apache 2 License). [Fork](https://github.com/PredictionIO/Demo-Tapster) this\ndemo and build upon it. If you produce something cool shoot us an email and we\nwill link to it from here.\n\nFound a typo? Think something should be explained better? This tutorial (and all\nour other documentation) live in the main repo\n[here](https://github.com/apache/predictionio/blob/livedoc/docs/manual/source/demo/tapster.html.md).\nOur documentation is in the `livedoc` branch. Find out how to contribute\ndocumentation at\nhttp://predictionio.apache.org/community/contribute-documentation/].\n\nWe &hearts; pull requests!\n"
  },
  {
    "path": "docs/manual/source/batchpredict/index.html.md",
    "content": "---\ntitle: Batch Predictions\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n##Overview\nProcess predictions for many queries using efficient parallelization\nthrough Spark. Useful for mass auditing of predictions and for\ngenerating predictions to push into other systems.\n\nBatch predict reads and writes multi-object JSON files similar to the\n[batch import](/datacollection/batchimport/) format. JSON objects are separated\nby newlines and cannot themselves contain unencoded newlines.\n\n##Compatibility\n`pio batchpredict` loads the engine and processes queries exactly like\n`pio deploy`. There is only one additional requirement for engines\nto utilize batch predict:\n\nWARNING: All algorithm classes used in the engine must be\n[serializable](https://www.scala-lang.org/api/2.11.8/index.html#scala.Serializable).\n**This is already true for PredictionIO's base algorithm classes**, but may be broken\nby including non-serializable fields in their constructor. Using the\n[`@transient` annotation](http://fdahms.com/2015/10/14/scala-and-the-transient-lazy-val-pattern/)\nmay help in these cases.\n\nThis requirement is due to processing the input queries as a\n[Spark RDD](https://spark.apache.org/docs/latest/rdd-programming-guide.html#resilient-distributed-datasets-rdds)\nwhich enables high-performance parallelization, even on a single machine.\n\n##Usage\n\n### `pio batchpredict`\n\nCommand to process bulk predictions. Takes the same options as `pio deploy` plus:\n\n### `--input <value>`\n\nPath to file containing queries; a multi-object JSON file with one\nquery object per line. Accepts any valid Hadoop file URL.\n\nDefault: `batchpredict-input.json`\n\n### `--output <value>`\n\nPath to file to receive results; a multi-object JSON file with one\nobject per line, the prediction + original query. Accepts any\nvalid Hadoop file URL. Actual output will be written as Hadoop\npartition files in a directory with the output name.\n\nDefault: `batchpredict-output.json`\n\n### `--query-partitions <value>`\n\nConfigure the concurrency of predictions by setting the number of partitions\nused internally for the RDD of queries. This will directly effect the\nnumber of resulting `part-*` output files. While setting to `1` may seem\nappealing to get a single output file, this will remove parallelization\nfor the batch process, reducing performance and possibly exhausting memory.\n\nDefault: number created by Spark context's `textFile` (probably the number\nof cores available on the local machine)\n\n### `--engine-instance-id <value>`\n\nIdentifier for the trained instance to use for batch predict.\n\nDefault: the latest trained instance.\n\n##Example\n\n###Input\n\nA multi-object JSON file of queries as they would be sent to the engine's\nHTTP Queries API.\n\nNOTE: Read via\n[SparkContext's `textFile`](https://spark.apache.org/docs/latest/rdd-programming-guide.html#external-datasets)\nand so may be a single file or any supported Hadoop format.\n\nFile: `batchpredict-input.json`\n\n```json\n{\"user\":\"1\"}\n{\"user\":\"2\"}\n{\"user\":\"3\"}\n{\"user\":\"4\"}\n{\"user\":\"5\"}\n```\n\n###Execute\n\n```bash\npio batchpredict \\\n  --input batchpredict-input.json \\\n  --output batchpredict-output.json\n```\n\nThis command will run to completion, aborting if any errors are encountered.\n\n###Output\n\nA multi-object JSON file of predictions + original queries. The predictions\nare JSON objects as they would be returned from the engine's HTTP Queries API.\n\nNOTE: Results are written via Spark RDD's `saveAsTextFile` so each partition\nwill be written to its own `part-*` file.\nSee [post-processing results](#post-processing-results).\n\nFile 1: `batchpredict-output.json/part-00000`\n\n```json\n{\"query\":{\"user\":\"1\"},\"prediction\":{\"itemScores\":[{\"item\":\"1\",\"score\":33},{\"item\":\"2\",\"score\":32}]}}\n{\"query\":{\"user\":\"3\"},\"prediction\":{\"itemScores\":[{\"item\":\"2\",\"score\":16},{\"item\":\"3\",\"score\":12}]}}\n{\"query\":{\"user\":\"4\"},\"prediction\":{\"itemScores\":[{\"item\":\"3\",\"score\":19},{\"item\":\"1\",\"score\":18}]}}\n```\n\nFile 2: `batchpredict-output.json/part-00001`\n\n```json\n{\"query\":{\"user\":\"2\"},\"prediction\":{\"itemScores\":[{\"item\":\"5\",\"score\":55},{\"item\":\"3\",\"score\":28}]}}\n{\"query\":{\"user\":\"5\"},\"prediction\":{\"itemScores\":[{\"item\":\"1\",\"score\":24},{\"item\":\"4\",\"score\":14}]}}\n```\n\n###Post-processing Results\n\nAfter the process exits successfully, the parts may be concatenated into a\nsingle output file using a command like:\n\n```bash\ncat batchpredict-output.json/part-* > batchpredict-output-all.json\n```\n"
  },
  {
    "path": "docs/manual/source/cli/index.html.md",
    "content": "---\ntitle: Command Line\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n##Overview\n\nInteraction with Apache PredictionIO is done through the command\nline interface. It follows the format of:\n\n```pio <command> [options] <args>...```\n\nYou can run ```pio help``` to see a list of all available commands and ```pio\nhelp <command>``` to see details of the command.\n\nApache PredictionIO commands can be separated into the following\nthree categories.\n\n##General Commands\n```pio help```          Display usage summary. `pio help <command>` to read about a specific subcommand.\n\n```pio version```       Displays the version of the installed PredictionIO.\n\n```pio status```        Displays install path and running status of PredictionIO system and its dependencies.\n\n\n##Event Server Commands\n\n```pio eventserver```   Launch the Event Server.\n\n```pio app```           Manage apps that are used by the Event Server.\n\n```pio app data-delete <name>``` deletes all data associated with the app.\n\n```pio app delete <name>``` deletes the app and its data.\n\n  ```--ip <value>``` IP to bind to. Default to localhost.\n\n  ```--port <value>``` Port to bind to. Default to 7070.\n\n\n```pio accesskey```     Manage app access keys.\n\n\n##Engine Commands\nEngine commands need to be run from the directory that contains the engine\nproject. ```--debug``` and ```--verbose``` flags will provide debug and\nthird-party informational messages.\n\n```pio build```         Build the engine at the current directory.\n\n```pio train```         Kick off a training using an engine.\n\n```pio deploy```        Deploy an engine as an engine server.\n\n```pio batchpredict```  Process bulk predictions using an engine.\n\nFor ```deploy``` & ```batchpredict```, if ```--engine-instance-id``` is not\nspecified, it will use the latest trained instance.\n"
  },
  {
    "path": "docs/manual/source/community/contribute-code.html.md",
    "content": "---\ntitle: Contribute Code\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThank you for your interest in contributing to Apache PredictionIO.\nOur mission is to enable developers to build scalable machine learning\napplications easily. Here is how you can help with the project development. If\nyou have any question regarding development at anytime, please free to\n[subscribe](mailto:dev-subscribe@predictionio.apache.org) and post to\nthe [Development Mailing\nList](mailto:dev-subscribe@predictionio.apache.org).\n\n## Areas in Need of Help\n\nWe accept contributions of all kinds at any time. We are compiling this list to\nshow features that are highly sought after by the community.\n\n- Tests and CI\n- Engine template, tutorials, and samples\n- Client SDKs\n- Building engines in Java (updating the Java controller API)\n- Code clean up and refactoring\n- Code and data pipeline optimization\n- Developer experience (UX) improvement\n\n## How to Report an Issue\n\nIf you wish to report an issue you found, you can do so on [Apache PredictionIO\nJIRA](https://issues.apache.org/jira/browse/PIO).\n\n## How to Help Resolve Existing Issues\n\nIn general, bug fixes should be done the same way as new features, but critical\nbug fixes will follow a different path.\n\n## How to Add / Propose a New Feature\n\nBefore adding new features into JIRA, please check that the feature does not currently exist in JIRA.\n\n1. To propose a new feature, simply\n   [subscribe](mailto:dev-subscribe@predictionio.apache.org) and post\n   your proposal to [Apache PredictionIO Development Mailing List]\n   (mailto:dev@predictionio.apache.org).\n2. Discuss with the community and the core development team on what needs to be\n   done, and lay down concrete plans on deliverables.\n3. Once solid plans are made, start creating tickets in the [issue tracker]\n   (https://issues.apache.org/jira/browse/PIO).\n4. Work side by side with other developers using Apache PredictionIO\n   Development Mailing List as primary mode of communication. You\n   never know if someone else has a better idea. ;)\n\n### Adding ticket to JIRA\n\n1. Add a descriptive Summary and a detailed description\n2. Set Issue Type to Bug, Improvement, New Feature, Test or Wish\n3. Set Priority to Blocker, Critical, Major, Minor or Trivial\n4. Fill out Affects Version with the version of PredictionIO you are currently using\n5. Fill out Environment if needed for description of your bug / feature\n6. Please leave other fields blank\n\n### Triaging JIRA\n\nTickets will be triaged by PredictionIO committers.\n\n- **Target Version**: Either a particular version or `Future` if to be done later\n    + Once a fix has been committed, the Fix Version will filled in with the appropriate release\n\n- **Component**: Each ticket will be annotated with one or more of the following Components\n    + **Core**: affects the main code branch / will be part of a release\n    + **Documentation**: affects the documents / will be pushed to livedoc branch\n    + **Templates**: affects one of the separate github repositories for a template\n\n## How to Issue a Pull Request\n\nWhen you have finished your code, you can [create a pull\nrequest](https://help.github.com/articles/creating-a-pull-request/) against the\n**develop** branch.\n\n- The title must contain a tag associating with an existing JIRA ticket. You\n  must create a ticket so that the infrastructure can correctly track issues\n  across Apache JIRA and GitHub. If your ticket is `PIO-789`, your title must\n  look something like `[PIO-789] Some short description`.\n- Please also, in your commit message summary, include the JIRA ticket number\n  similar to above.\n- Make sure the title and description are clear and concise. For more details on\n  writing a good commit message, check out [this\n  guide](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html).\n- If the change is visual, make sure to include a screenshot or GIF.\n- Make sure it is being opened into the right branch.\n- Make sure it has been rebased on top of that branch.\n\nNOTE: When it is close to a release, and if there are major development ongoing,\na release branch will be forked from the develop branch to stabilize the code\nfor binary release. Please refer to the *git flow* methodology page for more\ninformation.\n\n## Getting Started\n\nApache PredictionIO relies heavily on the [git flow methodology](\nhttp://nvie.com/posts/a-successful-git-branching-model/). Please make sure you\nread and understand it before you start your development. By default, cloning\nApache PredictionIO will put you in the *develop* branch, which in\nmost cases is where all the latest development go to.\n\nNOTE: For core development, please follow the [Scala Style Guide](http://docs.scala-lang.org/style/).\n\n### Create a Fork of the Apache PredictionIO Repository\n\n1. Start by creating a GitHub account if you do not already have one.\n2. Go to [Apache PredictionIO’s GitHub\n   mirror](https://github.com/PredictionIO/PredictionIO) and fork it to your own\n   account.\n3. Clone your fork to your local machine.\n\nIf you need additional help, please refer to\nhttps://help.github.com/articles/fork-a-repo/.\n\n### Building Apache PredictionIO from Source\n\nAfter the previous section, you should have a copy of Apache PredictionIO\nin your local machine ready to be built.\n\n1. Make sure you are on the *develop* branch. You can double check by `git\n   status` or simply `git checkout develop`.\n2. At the root of the repository, do `./make-distribution.sh` to build\n   PredictionIO.\n\n### Setting Up the Environment\n\nApache PredictionIO relies on 3rd party software to perform its\ntasks. To set them up, simply follow this [documentation](\nhttp://predictionio.apache.org/install/install-sourcecode/#installing-dependencies).\n\n### Start Hacking\n\nYou should have a Apache PredictionIO development environment by\nnow. Happy hacking!\n\n## Anatomy of Apache PredictionIO Code Tree\n\nThe following describes each directory’s purpose.\n\n### bin\n\nShell scripts and any relevant components to go into the binary distribution.\nUtility shell scripts can also be included here.\n\n### conf\n\nConfiguration files that are used by both a source tree and binary distribution.\n\n### core\n\nCore Apache PredictionIO code that provides the DASE controller\nAPI, core data structures, and workflow creation and management code.\n\n### data\n\nApache PredictionIO Event Server, and backend-agnostic storage\nlayer for event store and metadata store.\n\n### docs\n\nSource code for http://predictionio.apache.org site, and any other\ndocumentation support files.\n\n### examples\n\nComplete code examples showing Apache PredictionIO's application.\n\n### sbt\n\nEmbedded SBT (Simple Build Tool) launcher.\n\n### storage\n\nStorage implementations.\n\n### tools\n\nTools for running Apache PredictionIO. Contains primarily the CLI\n(command-line interface) and its supporting code, and the experimental\nevaluation dashboard.\n"
  },
  {
    "path": "docs/manual/source/community/contribute-documentation.html.md",
    "content": "---\ntitle: Contribute Documentation\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## How to Write Documentation\n\nYou can help improve the Apache PredictionIO documentation by\nsubmitting tutorials, writing how-tos, fixing errors, and adding missing\ninformation. You can edit any page live on\n[GitHub](https://github.com/apache/predictionio) by clicking the\npencil icon on any page or open a [Pull\nRequest](https://help.github.com/articles/creating-a-pull-request/).\n\n## Branching\n\nUse the `livedoc` branch if you want to update the current documentation.\n\nUse the `develop` branch if you want to write documentation for the next\nrelease.\n\n## Installing Locally\n\nApache PredictionIO documentation uses\n[Middleman](http://middlemanapp.com/) and is hosted on Apache.\n\n[Gems](http://rubygems.org/) are managed with [Bundler](http://bundler.io/).\nFront end code with [Bower](http://bower.io/).\n\nRequires [Ruby](https://www.ruby-lang.org/en/) 2.1 or greater. We recommend\n[RVM](http://rvm.io/) or [rbenv](https://github.com/sstephenson/rbenv).\n\nWARNING: **OS X** users you will need to install [Xcode Command Line\nTools](https://developer.apple.com/xcode/downloads/) with: `$ xcode-select\n--install` first.\n\nYou can install everything with the following commands:\n\n```bash\n$ cd docs/manual\n$ gem install bundler\n$ bundle install\n$ npm install -g bower\n$ bower install\n```\n\n\n## Starting the Server\n\nStart the server with:\n\n```\n$ bundle exec middleman server\n```\n\nThis will start the local web server at [localhost:4567](http://localhost:4567/).\n\n## Building the Site\n\nBuild the site with:\n\n```\n$ bundle exec middleman build\n```\n\n## Styleguide\n\nPlease follow this styleguide for any documentation contributions.\n\n### Text\n\nView our [Sample Typography](/samples/) page for all possible styles.\n\n### Headings\n\nThe main heading `h1` is derived from the title data attribute:\n\n```\n---\ntitle: Page Title\n---\n```\n\nStart other headings with `h2`. Prefer the `## Heading` format in Markdown.\n\n### Links\n\nInternal links:\n\n* Should start with / (relative to root).\n* Should end with / (S3 requirement).\n* Should **not** end with .html.\n\nFollowing these rules helps keep everything consistent and allows our version\nparser to correctly version links. Middleman is configured for directory\nindexes. Linking to a file in `sources/samples/index.html` should be done with\n`[Title](/sample/)`.\n\n```md\n[Good](/path/to/page/)\n\n[Bad](../page) Not relative to root!\n[Bad](page.html) Do not use the .html extension!\n[Bad](/path/to/page) Does not end with a /.\n\n```\n\n### Images\n\nImages should be exactly 900px wide. [Chrome Window Resizer](https://chrome.google.com/webstore/detail/window-resizer/kkelicaakdanhinjdeammmilcgefonfh)\nis an excellent extension for browser resizing.\n\nWARNING: **OS X** users please [Disable Shadows](http://www.idownloadblog.com/2014/08/03/how-to-remove-the-shadow-window-screenshots-on-mac-os-x/)\nbefore taking a screenshot.\n\nImages should only show the relevant tab/terminal. Hide any additional toolbars.\n\nImages will **automatically scale** by default. If you want an image to remain a set size you can use a raw HTML tag like this:\n\n```\n<img src=\"/images/path/to/image.png\" alt=\"Image\" class=\"static\" />\n```\n\n### Code Blocks\n\nFenced code blocks are created using the <code>&#96;&#96;&#96;language</code> format.\n\nA example of each language is available on our [Language Samples](/samples/languages) page.\n\n### Code Tabs\n\nCode tabs use the following HTML format:\n\n```html\n<div class=\"tabs\">\n  <div data-tab=\"Tab Title\" data-lang=\"language\">\n    Markdown, code blocks, or HTML is OK inside a tab.\n  </div>\n  <div data-tab=\"Second Tab\" data-lang=\"optional\">\n    ...\n  </div>\n</div>\n```\n\nYou can see an example of this on our [Tab Samples](/samples/tabs/) page.\n\n### SEO\n\nYou can hide a page from the `sitemap.xml` file by setting the pages\n[Frontmater](http://middlemanapp.com/basics/frontmatter/) like this:\n\n```md\n---\ntitle: Secret Page\nhidden: true\n---\n```\n\n## Important Files\n\n| Description   | File          |\n| ------------- | ------------- |\n| Left side navigation. | `data/nav/main.yml` |\n| Main site layout. | `source/layouts/layout.html.slim` |\n| Custom Markdown renderer based on [Redcarpet](https://github.com/vmg/redcarpet). | `lib/custom_renderer.rb` |\n| Custom TOC helper. | `helpers/table_of_contents_helpers.rb` |\n\n### Versions\n\nVarious site wide versions are defined in `data/versions.yml` and embedded with ERB like `<%= data.versions.pio %>`.\n\nNOTE: Files must end with a `.erb` extension to be processed as ERB.\n\n## Going Live\n\nFor Apache project committers, pushing to the `livedoc` branch of PredictionIO ASF git will update\nhttp://predictionio.apache.org in about 10 minutes.\n\nMake sure the **apache.org** remote is attached to your `predictionio` repo, and if not, add it:\n\n```\n$ git remote -v\n$ git remote add apache https://gitbox.apache.org/repos/asf/predictionio.git\n```\n\nThen, push the `livedoc` branch. (It will be published and synced with the public GitHub mirror):\n\n```\n$ git push apache livedoc\n```\n\nYou can check the progress of each build on [Apache's\nJenkins](https://builds.apache.org/):\n\n* [build-site](https://builds.apache.org/job/PredictionIO-build-site/)\n* [publish-site](https://builds.apache.org/job/PredictionIO-publish-site/)\n\n## Checking the Site\n\nWARNING: The check rake task is still in **beta** however it is extremely useful for catching accidental errors.\n\n```bash\n$ bundle exec middleman build\n$ bundle exec rake check\n```\n\nThe `rake check` task parses each HTML page in the `build` folder and checks it for common errors including 404s.\n\n## License\n\nDocumentation is under a [Apache License Version\n2.0](https://www.apache.org/licenses/LICENSE-2.0).\n"
  },
  {
    "path": "docs/manual/source/community/contribute-sdk.html.md",
    "content": "---\ntitle:  Contribute a SDK\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nA SDK should provide convenient methods for client applications to easily record\nusers' behaviors in Apache PredictionIO's Event Server and also query\nrecommendations from machine learning Engines. Therefore, a SDK typically has 2\ncorresponding clients: `Event Client` and `Engine Client`.\n\nThe following guideline bases on the REST API provided by Apache PredictionIO's\nEvent Client which details can be found\n[here](http://predictionio.apache.org/datacollection/eventapi/).\n\n## Event Client\nBecause the Event Server has only 1 connection point, the `Event Client`\nneeds to implement this core request first. The core request has the\nfollowing rules.\n\n- **URL**: `<base URL>/events.json?accessKey=<your access key>`\n(e.g. http://localhost:7070/events.json?accessKey=1234567890)\n\n- **Request**: `POST` + JSON data. Please refer to the [Event Creation API]\n(http://predictionio.apache.org/datacollection/eventapi/) for the details\non the fields of the JSON data object.\n\n- **Response**:\n    + **Success**: status code `201` with a JSON result containing\n    the `eventId`.\n    + **Failure**: a JSON result containing a `message` field describing\n    the error.\n        * Status code `401`: invalid access key.\n        * Status code `400`: fail to parse the JSON request e.g. missing\n        required fields like `event`, or invalid `eventTime` format.\n\nOther convenient methods are just shortcut. They could simply build\nthe event's parameters and call the core request. `Event Client` should\nsupport the following 7 shorthand operations:\n\n- **User entities**\n    + **Sets properties of a user**: with the JSON object\n\n        ```json\n        {\n            \"event\": \"$set\",\n            \"entityType\": \"user\",\n            \"entityId\": <user_ID>,\n            \"properties\": <properties>\n        }\n        ```\n\n    + **Unsets some properties of a user**: with the JSON object\n\n        ```json\n        {\n            \"event\": \"$unset\",\n            \"entityType\": \"user\",\n            \"entityId\": <user_ID>,\n            \"properties\": <properties>\n        }\n        ```\n\n    + **Delete a user**: with the JSON object\n\n        ```json\n        {\n            \"event\": \"$delete\",\n            \"entityType\": \"user\",\n            \"entityId\": <user_ID>\n        }\n        ```\n\n- **Item entities**\n    + **Sets properties of an item**: with the JSON object\n\n        ```json\n        {\n            \"event\": \"$set\",\n            \"entityType\": \"item\",\n            \"entityId\": <item_ID>,\n            \"properties\": <properties>\n        }\n        ```\n\n    + **Unsets some properties of an item**: with the JSON object\n\n        ```json\n        {\n            \"event\": \"$unset\",\n            \"entityType\": \"item\",\n            \"entityId\": <item_ID>,\n            \"properties\": <properties>\n        }\n        ```\n\n    + **Delete an item**: with the JSON object\n\n        ```json\n        {\n            \"event\": \"$delete\",\n            \"entityType\": \"item\",\n            \"entityId\": <item_ID>\n        }\n        ```\n\n- **Others**\n    + **Record a user's action on some item**: with the JSON object\n\n        ```json\n        {\n            \"event\": <event_name>,\n            \"entityType\": \"user\",\n            \"entityId\": <user_ID>,\n            \"targetEntityType\": \"item\",\n            \"targetEntityId\": <item_ID>,\n            \"properties\": <properties>\n        }\n        ```\n\nAgain, please refer to the [API documentation]\n(http://predictionio.apache.org/datacollection/eventapi/) for explanations\non the reversed events like `$set`, `$unset` or `$delete`.\n\nINFO: The `eventTime` is optional but it is recommended that the client\napplication should include time in the request. Therefore, it is best\nthat the `Event Client` includes the time field if missing, before\nsending the event to the server.\n\n\n## Engine Client\n`Engine Client`'s main job is to retrieve recommendation or prediction  results\nfrom Apache PredictionIO's Engines. It has only a few rules on the\nrequest and response type.\n\n- **URL**: `<base URL>/queries.json` (e.g. http://localhost:8000/queries.json)\n\n- **Request**: `POST` + JSON data. For example,\n\n    ```json\n    {\n        \"user\": 1,\n        \"num\": 4\n    }\n    ```\n\n- **Response**:\n    + **Success**: status code `200` with a JSON result object. For example,\n\n        ```json\n        {\n            \"itemScores\": [\n                {\n                    \"item\": 39,\n                    \"score\": \"6.177719297832409\"\n                },\n                {\n                    \"item\": 79,\n                    \"score\": \"5.931687319083594\"\n                },\n                ...\n            ]\n        }\n        ```\n    + **Failure**: status code `400` e.g. fail to parse the query.\n\nThe formats of JSON objects in both the request and response must be defined by\nthe Apache PredictionIO's Engine and are different across\napplications. The above examples are taken from the Recommendation Engine\ntemplate in which the query and prediction results are defined as following.\n\n```scala\ncase class Query(\n  user: String,\n  num: Int\n) extends Serializable\n\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n) extends Serializable\n```\n\n## Testing Your SDK\n\nYou can set up a local host Apache PredictionIO environment to test\nyour SDK. However, it is hard to set it up online to test your SDK automatically\nusing services like Travis CI. In that case, you should consider using these\nlightweight [mock servers]\n(https://github.com/minhtule/PredictionIO-Mock-Server). Please see the\ninstructions in the repo how to use it. It takes less than 5 minutes!\n\nThat's it! We are looking forward to see your SDK!\n"
  },
  {
    "path": "docs/manual/source/community/contribute-webhook.html.md",
    "content": "---\ntitle:  Contribute a Webhooks Connector\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nNOTE: Please check out the [latest develop\nbranch](https://github.com/apache/predictionio).\n\nEvent server can collect data from other third-party sites or software through their webhooks services (for example, SegmentIO, MailChimp). To support that, a *Webhooks Connector* for the third-party data is needed to be integrated into Event Server. The job of the *Webhooks Connector* is as simply as converting the third-party data into Event JSON. You can find an example below.\n\nCurrently we support two types of connectors: `JsonConnector` and `FormConnector`, which is responsible for accepting *JSON* data and *Form-submission* data, respectively.\n\n**JsonConnector**:\n\n```scala\npackage org.apache.predictionio.data.webhooks\n\n/** Connector for Webhooks connection */\nprivate[predictionio] trait JsonConnector {\n\n  /** Convert from original JObject to Event JObject\n    * @param data original JObject recevived through webhooks\n    * @return Event JObject\n   */\n  def toEventJson(data: JObject): JObject\n\n}\n\n```\n\nThe EventServer URL path to collect webhooks JSON data:\n\n```\nhttp://<EVENT SERVER URL>/webhooks/<CONNECTOR_NAME>.json?accessKey=<YOUR_ACCESS_KEY>&channel=<CHANNEL_NAME>\n```\n\nNote that you may collect Webhooks data into default channel (without the `channel` parameter in the URL) but it's highly recommended to create dedicated [Channel](/datacollection/channel/) to collect specific Webhooks data (e.g. create one channel \"segmentio\" for SegmentIO and another channel \"mailchimp\" for Mailchimp data) because it allows you to manage and query data more easily, and the Webhooks data won't be mixed with your other normal app data.\n\n\n**FormConnector**:\n\n```scala\npackage org.apache.predictionio.data.webhooks\n\n/** Connector for Webhooks connection with Form submission data format\n  */\nprivate[predictionio] trait FormConnector {\n\n  /** Convert from original Form submission data to Event JObject\n    * @param data Map of key-value pairs in String type received through webhooks\n    * @return Event JObject\n   */\n  def toEventJson(data: Map[String, String]): JObject\n\n}\n\n```\n\nThe EventServer URL path to collect webhooks form-subimssion data (no .json):\n\n```\nhttp://<EVENT SERVER URL>/webhooks/<CONNECTOR_NAME>?accessKey=<YOUR_ACCESS_KEY>&channel=<CHANNEL_NAME>\n```\n\nNote that you may collect Webhooks data into default channel (without the `channel` parameter in the URL) but it's highly recommended to create dedicated [Channel](/datacollection/channel/) to collect specific Webhooks data (e.g. create one channel \"segmentio\" for SegmentIO and another channel \"mailchimp\" for Mailchimp data) because it allows you to manage and query data more easily, and the Webhooks data won't be mixed with your other normal app data.\n\n\n# Example\n\nFor example, let's say there is a third-party website (say, it is named \"ExampleJson\") which can send the following JSON data through its webhooks service and we would like to collect it into Event Store.\n\n**UserActionItem**:\n\n```json\n{\n  \"type\": \"userActionItem\",\n  \"userId\": \"as34smg4\",\n  \"event\": \"do_something_on\",\n  \"itemId\": \"kfjd312bc\",\n  \"context\": {\n    \"ip\": \"1.23.4.56\",\n    \"prop1\": 2.345,\n    \"prop2\": \"value1\"\n  },\n  \"anotherPropertyA\": 4.567,\n  \"anotherPropertyB\": false,\n  \"timestamp\": \"2015-01-15T04:20:23.567Z\"\n}\n```\n\n\n## 1. Implement Webhooks Connector\n\nBecause the data sent by this third-party \"ExampleJson\" site is in JSON format, we implement an object `ExampleJsonConnector` which extends `JsonConnector`:\n\n\n```scala\nprivate[predictionio] object ExampleJsonConnector extends JsonConnector {\n\n  implicit val json4sFormats: Formats = DefaultFormats\n\n  override def toEventJson(data: JObject): JObject = {\n    val common = try {\n      data.extract[Common]\n    } catch {\n      case e: Exception => throw new ConnectorException(\n        s\"Cannot extract Common field from ${data}. ${e.getMessage()}\", e)\n    }\n\n    val json = try {\n      common.`type` match {\n        case \"userActionItem\" =>\n          toEventJson(common = common, userActionItem = data.extract[UserActionItem])\n        case x: String =>\n          throw new ConnectorException(\n            s\"Cannot convert unknown type '${x}' to Event JSON.\")\n      }\n    } catch {\n      case e: ConnectorException => throw e\n      case e: Exception => throw new ConnectorException(\n        s\"Cannot convert ${data} to eventJson. ${e.getMessage()}\", e)\n    }\n\n    json\n  }\n\n  // Convert the UserActionItem JSON to Event JSON\n  def toEventJson(common: Common, userActionItem: UserActionItem): JObject = {\n    import org.json4s.JsonDSL._\n\n    // map to EventAPI JSON\n    val json =\n      (\"event\" -> userActionItem.event) ~\n      (\"entityType\" -> \"user\") ~\n      (\"entityId\" -> userActionItem.userId) ~\n      (\"targetEntityType\" -> \"item\") ~\n      (\"targetEntityId\" -> userActionItem.itemId) ~\n      (\"eventTime\" -> userActionItem.timestamp) ~\n      (\"properties\" -> (\n        (\"context\" -> userActionItem.context) ~\n        (\"anotherPropertyA\" -> userActionItem.anotherPropertyA) ~\n        (\"anotherPropertyB\" -> userActionItem.anotherPropertyB)\n      ))\n    json\n  }\n\n  // Common required fields\n  case class Common(\n    `type`: String\n  )\n\n  // UserActionItem fields\n  case class UserActionItem (\n    userId: String,\n    event: String,\n    itemId: String,\n    context: JObject,\n    anotherPropertyA: Option[Double],\n    anotherPropertyB: Option[Boolean],\n    timestamp: String\n  )\n\n}\n```\n\nYou can find the complete example in [the GitHub\nrepo](https://github.com/apache/predictionio/blob/develop/data/src/main/scala/org/apache/predictionio/data/webhooks/examplejson/ExampleJsonConnector.scala)\nand how to write [tests for the\nconnector](https://github.com/apache/predictionio/blob/develop/data/src/test/scala/org/apache/predictionio/data/webhooks/examplejson/ExampleJsonConnectorSpec.scala).\n\n\nPlease put the connector code in a separate directory for each site. For example, code for segmentio connector should be in\n\n```\ndata/src/main/scala/org.apache.predictionio/data/webhooks/segmentio/\n```\n\nand tests should be in\n\n```\ndata/src/test/scala/org.apache.predictionio/data/webhooks/segmentio/\n```\n\n**For form-submission data**, you can find the complete example [the GitHub\nrepo](https://github.com/apache/predictionio/blob/develop/data/src/main/scala/org/apache/predictionio/data/webhooks/exampleform/ExampleFormConnector.scala)\nand how to write [tests for the\nconnector](https://github.com/apache/predictionio/blob/develop/data/src/test/scala/org/apache/predictionio/data/webhooks/exampleform/ExampleFormConnectorSpec.scala).\n\n\n## 2. Integrate the Connector into Event Server\n\nOnce we have the connector implemented, we can add this to the EventServer so we can collect real-time data.\n\nAdd the connector to [`WebhooksConnectors` object](\nhttps://github.com/apache/predictionio/blob/develop/data/src/main/scala/org/apache/predictionio/data/api/WebhooksConnectors.scala):\n\n```scala\n\nimport org.apache.predictionio.data.webhooks.examplejson.ExampleJsonConnector // ADDED\n\nprivate[predictionio] object WebhooksConnectors {\n\n  // Map of Connector Name to Connector\n  val json: Map[String, JsonConnector] = Map(\n    \"segmentio\" -> SegmentIOConnector,\n    \"examplejson\" -> ExampleJsonConnector // ADDED\n  )\n\n  // Map of Connector Name to Connector\n  val form: Map[String, FormConnector] = Map(\n    \"mailchimp\" -> MailChimpConnector\n  )\n\n}\n```\n\nNote that the name of the connectors (e.g. \"examplejson\", \"segmentio\") will be used as the webhooks URL. In this example, the event server URL to collect data from \"ExampleJson\" would be:\n\n```\nhttp://<EVENT SERVER URL>/webhooks/examplejson.json?accessKey=<YOUR_ACCESS_KEY>&channel=<CHANNEL_NAME>\n```\n\nFor `FormConnector`, the URL doesn't have `.json`. For example,\n\n```\nhttp://<EVENT SERVER URL>/webhooks/mailchimp?accessKey=<YOUR_ACCESS_KEY>&channel=<CHANNEL_NAME>\n```\n\nThat's it. Once you re-compile Apache PredictionIO, you can send\nthe ExampleJson data to the following URL and the data will be stored to the App\nof the corresponding Access Key.\n"
  },
  {
    "path": "docs/manual/source/community/index.html.md",
    "content": "---\ntitle: Community Page\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## User Mailing List\n\nThis list is for users of Apache PredictionIO to ask questions,\nshare knowledge, and discuss issues. Do send mail to this list with usage and\nconfiguration questions and problems. Also, please send questions to this list\nto verify your problem before filing issues in JIRA.\n\n[Subscribe](mailto:user-subscribe@predictionio.apache.org) to our User Mailing List.\n[Unsubscribe](mailto:user-unsubscribe@predictionio.apache.org) from our User Mailing List.\n\n## Twitter\n\nFollow us on Twitter [@predictionio](https://twitter.com/PredictionIO).\n\n## Facebook Page\n\nLike us on Facebook at https://www.facebook.com/predictionio.\n\n## GitHub\n\nView our code on GitHub at https://github.com/apache/predictionio.\n\n<iframe src=\"/github/?user=apache&repo=predictionio&type=fork&count=true&size=large\" allowtransparency=\"true\" frameborder=\"0\" scrolling=\"0\" width=\"170\" height=\"30\"></iframe>\n<iframe src=\"/github/?user=apache&repo=predictionio&type=watch&count=true&size=large\" allowtransparency=\"true\" frameborder=\"0\" scrolling=\"0\" width=\"170\" height=\"30\"></iframe>\n"
  },
  {
    "path": "docs/manual/source/community/projects.html.md",
    "content": "---\ntitle: Community Powered Projects\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nHere you will find great projects contributed by the Apache PredictionIO\ncommunity.\n\nINFO: If you have built a Apache PredictionIO-related project, we\nwould love to showcase it to the community! Simply edit [this\npage](https://github.com/apache/predictionio/blob/livedoc/docs/manual/source/community/projects.html.md)\nand submit a pull request.\n\n## SDKs\n\n### Swift SDK\n\n - Minh-Tu Le: https://github.com/minhtule/PredictionIO-Swift-SDK\n\n\n## DEMOs\n\n### Tapster iOS Demo\n\n - Minh-Tu Le: https://github.com/minhtule/Tapster-iOS-Demo\n\n\n## Universal Recommender\n\n - ActionML: https://github.com/actionml/universal-recommender\n\n\n## Docker Images\n\n - Ming Fang: https://github.com/mingfang/docker-predictionio\n\n - Steven Yan: https://github.com/steveny2k/docker-predictionio\n\n - Japan PredictionIO User Group: https://github.com/jpioug/predictionio-docker\n\n - Inspectorio Inc: https://github.com/inspectorioinc/docker-prediction-io\n\n\n## Archived Projects\n\nSome community projects have not got any update for quite some time.\n\nThese projects are listed in the [archived list](/archived/community/).\n\nIf an archived project is updated, please edit [this\npage](https://github.com/apache/predictionio/blob/livedoc/docs/manual/source/community/projects.html.md)\nand submit a pull request to put your project back to this active projects list.\n"
  },
  {
    "path": "docs/manual/source/community/submit-template.html.md",
    "content": "---\ntitle: Submitting a Template to Template Gallery\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Template Guidelines\n\n- Please give your template and GitHub repo a meaningful name (for example, My-MLlibKMeansClustering-Template).\n\n- Please tag your repo for each released version. This is required by Template Gallery.\n\n    For example, tag the release with v0.1.0:\n\n    ```\n    $ git tag -a v0.1.0 -m 'version 0.1.0'\n    ```\n\n- For clarity, the engine template directory structure should be:\n\n    ```\n    data/  # contains sample data or related files\n    project/  # contains the necessary sbt files for build (e.g assembly.sbt)\n    src/  # template source code\n    .gitignore\n    README.md\n    build.sbt\n    engine.json # one or more engine.json\n    template.json\n    ```\n\n- Try to keep the root directory clean. If you have additional script files or other files, please create new folders for them and provide description.\n\n- Include a QuickStart of how to use the engine, including:\n  1. Overview description of the template\n  2. Events and Data required by the template\n  3. Description of Query and PredictedResult\n  4. Steps to import sample data\n  5. Description of the sample data\n  6. Steps to build, train and deploy the engine\n  7. Steps to send sample query and expected output\n\n\n- If you have additional sample data, please also provide description and how to import them in README\n\n- If you have multiple engine.json files, please provide description of them in README\n\n- It's recommended to follow [Scala Style Guide](http://docs.scala-lang.org/style/)\n\n## How to submit\n\n- Fork repository\n- Modify *docs/manual/source/gallery/templates.yaml* introducing a new template. The schema of the engine description is following:\n\n```yml\n- template:\n    name: (Name of your template)\n    repo: (Link to your repository)\n    description: |-\n      (Brief description of your template written in markdown syntax)\n    tags: [ (One of [classification, regression, unsupervised, recommender, nlp, other]) ]\n    type: (Parallel or Local)\n    language: (Language)\n    license: (License)\n    status: (e.g. alpha, stable or requested (under development))\n    pio_min_version: (Minimum version of PredictionIO to run your template)\n```\n- Submit your changes via pull-request\n"
  },
  {
    "path": "docs/manual/source/customize/dase.html.md.erb",
    "content": "---\ntitle: Implementing DASE\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis section gives you an overview of DASE components and how to implement them. You will find links to some engine templates for more concrete examples.\n\n# DataSource\n\nDataSource reads and selects useful data from the Event Store (data store of the Event Server) and returns TrainingData.\n\n## readTraining()\n\nYou need to implement readTraining() of [PDataSource](https://predictionio.apache.org/api/current/#org.apache.predictionio.controller.PDataSource), where you can use the [PEventStore Engine API](https://predictionio.apache.org/api/current/#org.apache.predictionio.data.store.PEventStore$) to read the events and create the TrainingData based on the events.\n\nThe following code example reads user \"view\" and \"buy\" item events, filters specific type of events for future processing and returns TrainingData accordingly.\n\n```scala\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"view\", \"buy\")),\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n      .cache()\n\n    val viewEventsRDD: RDD[ViewEvent] = eventsRDD\n      .filter { event => event.event == \"view\" }\n      .map { ... }\n\n    ...\n\n    new TrainingData(...)\n  }\n\n}\n```\n\n\n## Using PEventStore Engine API\n\nPlease see [Event Server Overview](https://predictionio.apache.org/datacollection/) to understand [EventAPI](https://predictionio.apache.org/datacollection/eventapi/) and [event modeling](https://predictionio.apache.org/datacollection/eventmodel/).\n\nWith [PEventStore Engine API](https://predictionio.apache.org/api/current/#org.apache.predictionio.data.store.PEventStore$), you can easily read different events in DataSource and get the information you need.\n\nFor example, let's say you have events like the following:\n\n```json\n{\n  \"event\": \"myEvent\",\n  \"entityType\": \"user\",\n  \"entityId\": \"u0\",\n  \"targetEntityType\": \"item\",\n  \"targetEntityId\": \"i0\",\n  \"properties\" : {\n    \"a\" : 3,\n    \"b\" : \"some_string\",\n    \"c\" : [\"a\", \"b\", \"c\"],\n    \"d\" : [1.2, 3.4, 5.6],\n    \"e\" : 6\n  }\n}\n```\n\nThen following code could read these events and extract the properties field of the event and convert it to a `MyEvent` object.\n\n```scala\n  val myEvents: RDD[MyEvent] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"myEvent\")),\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n    .map { event =>\n      try {\n        MyEvent(\n          entityId = event.entityId,\n          targetEntityId = event.targetEntityId.get,\n          a = event.properties.get[Int](\"a\"),\n          b = event.properties.get[String](\"b\"),\n          c = event.properties.get[List[String]](\"c\"),\n          d = event.properties.get[List[Double]](\"d\"),\n          e = event.properties.getOpt[Int](\"e\") // use getOpt for optional data\n        )\n      } catch {\n        case e: Exception =>\n          logger.error(s\"Cannot convert ${event}. Exception: ${e}.\")\n          throw e\n      }\n    }\n```\n\nIf you have used special events `$set/$unset/$delete` setting entity's properties, you can retrieve it with `PEventStore.aggregateProperties()`.\n\nPlease see [event modeling](https://predictionio.apache.org/datacollection/eventmodel/) to understand usage of special `$set/$unset/$delete` events.\n\nFor example, the following code show how you could retrieve properties of the \"item\" entities:\n\n```scala\n    // create a RDD of (entityID, Item)\n    val itemsRDD: RDD[(String, Item)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"item\"\n    )(sc).map { case (entityId, properties) =>\n\n      try {\n        val item = Item(\n          a = preopties.get[Int](\"a\"),\n          b = properties.get[String](\"b\"),\n          c = properties.get[List[String]](\"c\"),\n          d = properties.get[List[Double]](\"d\"),\n          e = properties.getOpt[Int](\"e\") // use getOpt for optional data\n        )\n\n        (entityId, item)\n      } catch {\n        case e: Exception =>\n          logger.error(s\"Failed to get properties ${properties} of ${entityId}. Exception: ${e}.\")\n          throw e\n      }\n\n    }\n```\n\nExample:\n\n- [DataSource of Similar Product Template](/templates/similarproduct/dase/#data)\n\n# Preparator\n\nPreparator is responsible for pre-processing `TrainingData` for any necessary feature selection and data processing tasks and generate `PreparedData` which contains the data the Algorithm needs.\n\nA few example usages of Preparator:\n\n- Feature extraction\n- Common pre-processing logic if you have multiple algorithms\n- For simple cases, the Preparator may simply pass the same `TrainingData` as `PreparedData` for Algorithm.\n\n## prepare()\n\nYou need to implement the `prepare()` method of [PPrepartor](https://predictionio.apache.org/api/current/#org.apache.predictionio.controller.PPreparator) to perform such tasks.\n\nExample:\n\n- [Preparator of Leading Scoring Template](/templates/leadscoring/dase/#data): it pre-processes the TrainingData and generate the feature vectors needed for the algorithm.\n- [Preparator of Similar Product Template](/templates/similarproduct/dase/#data): it simply passes the TrainingData as PreparedData for the algorithm.\n\n# Algorithm\n\nThe two methods of the Algorithm class are train() and predict():\n\n## train()\n\ntrain() is responsible for training a predictive model. It is called when you\nrun `pio train`. Apache PredictionIO will store this model.\n\n## predict()\n\npredict() is responsible for using this model to make prediction. It is called when you send a JSON query to the engine. Note that predict() is called in real time.\n\nApache PredictionIO supports two types of algorithms:\n\n- **[P2LAlgorithm](https://predictionio.apache.org/api/current/#org.apache.predictionio.controller.P2LAlgorithm)**: trains a Model which does not contain RDD\n- **[PAlgorithm](https://predictionio.apache.org/api/current/#org.apache.predictionio.controller.PAlgorithm)**: trains a Model which contains RDD\n\n## P2LAlgorithm\n\nFor `P2LAlgorithm`, the Model is automatically serialized and persisted by\nApache PredictionIO after training.\n\nImplementing `IPersistentModel` and `IPersistentModelLoader` is optional for P2LAlgorithm.\n\nExample:\n\n- [Algorithm of Similar Product Template](/templates/similarproduct/dase/#algorithm)\n\n## PAlgorithm\n\n`PAlgorithm` should be used when your Model contains RDD. The model produced by `PAlgorithm` is not persisted by default. To persist the model, you need to do the following:\n\n- The Model class should extend the `IPersistentModel` trait and implement the `save()` method for saving the model. The trait `IPersistentModel` requires a type parameter which is the class type of algorithm parameter.\n- Implement a Model factory object which extends the `IPersistentModelLoader` trait and implement the `apply()` for loading the model. The trait `IPersistentModelLoader` requires two type parameters which are the types of algorithm parameter and the model produced by the algorithm.\n\nExample:\n\n- [Algorithm of Recommendation Template](/templates/recommendation/dase/#algorithm): it implements PAlgorithm and the IPersistentModel and IPersistentModelLoader.\n- [Algorithm of Vanilla Template](/templates/vanilla/dase): it walks through example of P2LAlgorithm and PAlgorithm.\n\n## using LEventStore Engine API in predict()\n\nYou may use [LEventStore.findByEntity()](https://predictionio.apache.org/api/current/#org.apache.predictionio.data.store.LEventStore$) to retrieve events of a specific entity. For example, retrieve recent events of the user specified in the query) and use these recent events to make prediction in real time.\n\n\nFor example, the following code reads the recent 10 view events of `query.user`:\n\n```scala\n    val recentEvents = try {\n      LEventStore.findByEntity(\n        appName = ap.appName,\n        // entityType and entityId is specified for fast lookup\n        entityType = \"user\",\n        entityId = query.user,\n        eventNames = Some(List(\"view\")),\n        targetEntityType = Some(Some(\"item\")),\n        limit = Some(10),\n        latest = true,\n        // set time limit to avoid super long DB access\n        timeout = Duration(200, \"millis\")\n      )\n    } catch {\n      case e: scala.concurrent.TimeoutException =>\n        logger.error(s\"Timeout when read recent events.\" +\n          s\" Empty list is used. ${e}\")\n        Iterator[Event]()\n      case e: Exception =>\n        logger.error(s\"Error when read recent events: ${e}\")\n        throw e\n    }\n```\n\n\nExample:\n\n- [Algorithm of E-Commerce Recommendation template](/templates/ecommercerecommendation/dase#algorithm): LEventStore.findByEntity() is used to retrieve all items seen by the user and filter them from recommendation in predict().\n\n\n# Serving\n\n## serve()\n\nYou need to implement the serve() method of the class [LServing](https://predictionio.apache.org/api/current/#org.apache.predictionio.controller.LServing). The serve() method processes predicted result. It is also responsible for combining multiple predicted results into one if you have more than one predictive model.\n\nExample:\n\n- [Serving of Similar Product Template](/templates/similarproduct/dase/#serving): It simply returns the predicted result\n- [Serving of multi-algorithm examples of Similar Product Template](/templates/similarproduct/multi-events-multi-algos/): It combines the result of multiple algorithms and return\n"
  },
  {
    "path": "docs/manual/source/customize/index.html.md",
    "content": "---\ntitle: Learning DASE\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThe code of an engine consists of D-A-S-E components:\n\n### [D] Data Source and Data Preparator\n\nData Source reads data from an input source and transforms it into a desired\nformat. Data Preparator preprocesses the data and forwards it to the algorithm\nfor model training.\n\n### [A] Algorithm\n\nThe Algorithm component includes the Machine Learning algorithm, and the\nsettings of its parameters, determines how a predictive model is constructed.\n\n### [S] Serving\n\nThe Serving component takes prediction *queries* and returns prediction results.\nIf the engine has multiple algorithms, Serving will combine the results into\none. Additionally, business-specific logic can be added in Serving to further\ncustomize the final returned results.\n\n### [E] Evaluation Metrics\n\nAn Evaluation Metric quantifies prediction accuracy with a numerical score. It\ncan be used for comparing algorithms or algorithm parameter settings.\n\n> Apache PredictionIO helps you modularize these components so you\ncan build, for example, several Serving components for an Engine. You will be\nable to choose which one to be deployed when you create an Engine.\n\n\n![Engine Overview](/images/engineinstance-overview.png)\n\n## The Roles of an Engine\n\nThe main functions of an engine are:\n\n* Train a model using the training data and be deployed as a web service\n* Respond to prediction query in real-time\n\nAn engine puts all DASE components into a deployable state by specifying:\n\n* One Data Source\n\n* One Data Preparator\n\n* One or more Algorithm(s)\n\n* One Serving\n\nINFO: If more than one algorithm is specified, each of their model prediction\nresults will be passed to Serving for ensembling.\n\nEach Engine processes data and constructs predictive models independently.\nTherefore, every engine serves its own set of prediction results. For example,\nyou may deploy two engines for your mobile application: one for recommending\nnews to users and another one for suggesting new friends to users.\n\n### Training a Model - The DASE View\n\nThe following graph shows the workflow of DASE components when `pio train` is run.\n\n![Engine Overview](/images/engine-training.png)\n\n\n### Respond to Prediction Query - The DASE View\n\nThe following graph shows the workflow of DASE components when a REST query is received by a deployed engine.\n\n![Engine Overview](/images/engine-query.png)\n\nPlease see [Implement DASE](/customize/dase) for DASE implementation details.\n\nPlease refer to following templates and their how-to guides for concrete examples.\n\n## Examples of DASE\n\n- [DASE of Recommendation Template](/templates/recommendation/dase/)\n- [DASE of Similar Product Template](/templates/similarproduct/dase/)\n- [DASE of Classification Template](/templates/classification/dase/)\n- [DASE of Lead Scoring Template](/templates/leadscoring/dase/)\n"
  },
  {
    "path": "docs/manual/source/customize/troubleshooting.html.md",
    "content": "---\ntitle: Engine Development - Troubleshoot\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nApache PredictionIO provides the following features to help you\ndebug engines during development cycle.\n\n## Stop Training between Stages\n\nBy default `pio train` runs through the whole training process including\n[DataSource, Preparator and Algorithm](/templates/recommendation/dase/). To\nspeed up the development and debug cycle, you can stop the process after each\nstage to verify it has completed correctly.\n\nIf you have modified DataSource and want to confirm the TrainingData is\ngenerated as expected, you can run `pio train` with `--stop-after-read` option:\n\n```\npio train --stop-after-read\n```\n\nThis would stop the training process after the TrainingData is generated.\n\nFor example, if you are running [Recommendation\nTemplate](/templates/recommendation/quickstart/), you should see the the\ntraining process stops after the TrainingData is printed.\n\n```\n[INFO] [CoreWorkflow$] TrainingData:\n[INFO] [CoreWorkflow$] ratings: [1501] (List(Rating(3,0,4.0), Rating(3,1,4.0))...)\n...\n[INFO] [CoreWorkflow$] Training interrupted by org.apache.predictionio.workflow.StopAfterReadInterruption.\n```\n\nSimilarly, you can stop the training after the Preparator phase by using\n--stop-after-prepare option and it would stop after PreparedData is generated:\n\n```\npio train --stop-after-prepare\n```\n\n##  Sanity Check\n\nYou can extend a trait `SanityCheck` and implement the method\n`sanityCheck()` with your error checking code. The `sanityCheck()` is called\nwhen the data is generated. This can be applied to `TrainingData`, `PreparedData` and the `Model` classes, which are outputs of DataSource's `readTraining()`, Preparator's `prepare()` and Algorithm's `train()` methods, respectively.\n\nFor example, one frequent error with the Recommendation Template is that the\nTrainingData is empty because the DataSource is not reading data correctly. You\ncan add the check of empty data inside the `sanityCheck()` function. You can\neasily add other checking logic into the `sanityCheck()` function based on your\nown needs. Also, If you implement `toString()` method in your TrainingData. You can call\n`toString()` inside `sanityCheck()` to print out some data for visual checking.\n\nFor example, to print TrainingData to console and check if the `ratings` is empty, you can\ndo the following:\n\n```scala\nimport org.apache.predictionio.controller.SanityCheck // ADDED\n\nclass TrainingData(\n  val ratings: RDD[Rating]\n) extends Serializable with SanityCheck { // EXTEND SanityCheck\n  override def toString = {\n    s\"ratings: [${ratings.count()}] (${ratings.take(2).toList}...)\"\n  }\n\n  // IMPLEMENT sanityCheck()\n  override def sanityCheck(): Unit = {\n    println(toString())\n    // add your other checking here\n    require(!ratings.take(1).isEmpty, s\"ratings cannot be empty!\")\n  }\n}\n```\n\nYou may also use together with --stop-after-read flag to debug the DataSource:\n\n```\npio build\npio train --stop-after-read\n```\n\nIf your data is empty, you should see the following error thrown by the\n`sanityCheck()` function:\n\n```\n[INFO] [CoreWorkflow$] Performing data sanity check on training data.\n[INFO] [CoreWorkflow$] org.template.recommendation.TrainingData supports data sanity check. Performing check.\nException in thread \"main\" java.lang.IllegalArgumentException: requirement failed: ratings cannot be empty!\n\tat scala.Predef$.require(Predef.scala:233)\n\tat org.template.recommendation.TrainingData.sanityCheck(DataSource.scala:73)\n\tat org.apache.predictionio.workflow.CoreWorkflow$$anonfun$runTypelessContext$7.apply(Workflow.scala:474)\n\tat org.apache.predictionio.workflow.CoreWorkflow$$anonfun$runTypelessContext$7.apply(Workflow.scala:465)\n\tat scala.collection.immutable.Map$Map1.foreach(Map.scala:109)\n  ...\n```\n\nYou can specify the `--skip-sanity-check` option to turn off sanityCheck:\n\n```\npio train --stop-after-read --skip-sanity-check\n```\n\nYou should see the checking is skipped such as the following output:\n\n```\n[INFO] [CoreWorkflow$] Data sanity checking is off.\n[INFO] [CoreWorkflow$] Data Source\n...\n[INFO] [CoreWorkflow$] Training interrupted by org.apache.predictionio.workflow.StopAfterReadInterruption.\n```\n\n## Engine Status Page\n\nAfter run `pio deploy`, you can access the engine status page by go to same URL and port of the deployed engine with your browser, which is \"http://localhost:8000\" by default. In the engine status page, you can find the Engine information, and parameters of each DASE components. In particular, you can also see the \"Model\" trained by the algorithm based on how `toString()` method is implemented in the Algorithm's Model class.\n\n## pio-shell\n\nApache PredictionIO also provides `pio-shell` in which you can\neasily access Apache PredictionIO API, Spark context and Spark API\nfor quickly testing code or debugging purposes.\n\nTo bring up the shell, simply run:\n\n```\n$ pio-shell --with-spark\n```\n\n(`pio-shell` is available inside `bin/` directory of installed Apache\nPredictionIO directory, you should be able to access it if you have\nadded PredictionIO/bin into your environment variable `PATH`)\n\nNote that the Spark context is available as variable `sc` inside the shell.\n\nFor example, to get the events of `MyApp1` using PEventStore API inside the pio-shell and collect them into an array `c`. run the following in the shell:\n\n```\n> import org.apache.predictionio.data.store.PEventStore\n> val eventsRDD = PEventStore.find(appName=\"MyApp1\")(sc)\n> val c = eventsRDD.collect()\n```\n\nThen you should see following returned in the shell:\n\n```\n...\n15/05/18 14:24:42 INFO DAGScheduler: Job 0 finished: collect at <console>:24, took 1.850779 s\nc: Array[org.apache.predictionio.data.storage.Event] = Array(Event(id=Some(AaQUUBsFZxteRpDV_7fDGQAAAU1ZfRW1tX9LSWdZSb0),event=$set,eType=item,eId=i42,tType=None,tId=None,p=DataMap(Map(categories -> JArray(List(JString(c2), JString(c1), JString(c6), JString(c3))))),t=2015-05-15T21:31:19.349Z,tags=List(),pKey=None,ct=2015-05-15T21:31:19.354Z), Event(id=Some(DjvP3Dnci9F4CWmiqoLabQAAAU1ZfROaqdRYO-pZ_no),event=$set,eType=user,eId=u9,tType=None,tId=None,p=DataMap(Map()),t=2015-05-15T21:31:18.810Z,tags=List(),pKey=None,ct=2015-05-15T21:31:18.817Z), Event(id=Some(DjvP3Dnci9F4CWmiqoLabQAAAU1ZfRq7tsanlemwmZQ),event=view,eType=user,eId=u9,tType=Some(item),tId=Some(i25),p=DataMap(Map()),t=2015-05-15T21:31:20.635Z,tags=List(),pKey=None,ct=2015-05-15T21:31:20.639Z), Event(id=Some(DjvP3Dnci9F4CWmiqoLabQAAAU1ZfR...\n```\n"
  },
  {
    "path": "docs/manual/source/datacollection/analytics-ipynb.html.md.erb",
    "content": "---\ntitle: Machine Learning Analytics with IPython Notebook\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n[IPython Notebook](http://ipython.org/notebook.html) is a very powerful\ninteractive computational environment, and with\n[Apache PredictionIO](http://predictionio.apache.org),\n[PySpark](http://spark.apache.org/docs/latest/api/python/) and [Spark\nSQL](https://spark.apache.org/sql/), you can easily analyze your collected\nevents when you are developing or tuning your engine.\n\n## Prerequisites\n\nBefore you begin, please make sure you have the latest stable IPython installed,\nand that the command `ipython` can be accessed from your shell's search path.\n\n<%= partial 'shared/datacollection/parquet' %>\n\n## Preparing IPython Notebook\n\nLaunch IPython Notebook with PySpark using the following command, with\n`$SPARK_HOME` replaced by the location of Apache Spark.\n\n```\n$ PYSPARK_DRIVER_PYTHON=ipython PYSPARK_DRIVER_PYTHON_OPTS=\"notebook --pylab inline\" $SPARK_HOME/bin/pyspark\n```\nIf you see a error appearing in the console like this:\n\n```\n[E 10:07:53.900 NotebookApp] Support for specifying --pylab on the command line has been removed.\n[E 10:07:53.901 NotebookApp] Please use `%pylab inline` or `%matplotlib inline` in the notebook itself.\n```\n\nThen you can use the following command.\n\n```\nPYSPARK_DRIVER_PYTHON=ipython PYSPARK_DRIVER_PYTHON_OPTS=\"notebook --`%pylab inline`\" $SPARK_HOME/bin/pyspark\n```\nBy default, you should be able to access your IPython Notebook via web browser\nat http://localhost:8888.\n\nLet's initialize our notebook for the following code in the first cell.\n\n```python\nimport pandas as pd\ndef rows_to_df(rows):\n    return pd.DataFrame(map(lambda e: e.asDict(), rows))\nfrom pyspark.sql import SQLContext\nsqlc = SQLContext(sc)\nrdd = sqlc.parquetFile(\"/tmp/movies\")\nrdd.registerTempTable(\"events\")\n```\n\n![Initialization for IPython Notebook](/images/datacollection/ipynb-01.png)\n\n`rows_to_df(rows)` will come in handy when we want to dump the results from\nSpark SQL using IPython Notebook's native table rendering.\n\n## Performing Analysis with Spark SQL\n\nIf all steps above ran successfully, you should have a ready-to-use analytics\nenvironment by now. Let's try a few examples to see if everything is functional.\n\nIn the second cell, put in this piece of code and run it.\n\n```python\nsummary = sqlc.sql(\"SELECT \"\n                   \"entityType, event, targetEntityType, COUNT(*) AS c \"\n                   \"FROM events \"\n                   \"GROUP BY entityType, event, targetEntityType\").collect()\nrows_to_df(summary)\n```\n\nYou should see the following screen.\n\n![Summary of Events](/images/datacollection/ipynb-02.png)\n\nWe can also plot our data, in the next two cells.\n\n```python\nimport matplotlib.pyplot as plt\ncount = map(lambda e: e.c, summary)\nevent = map(lambda e: \"%s (%d)\" % (e.event, e.c), summary)\ncolors = ['gold', 'lightskyblue']\nplt.pie(count, labels=event, colors=colors, startangle=90, autopct=\"%1.1f%%\")\nplt.axis('equal')\nplt.show()\n```\n\n![Summary in Pie Chart](/images/datacollection/ipynb-03.png)\n\n```python\nratings = sqlc.sql(\"SELECT properties.rating AS r, COUNT(*) AS c \"\n                   \"FROM events \"\n                   \"WHERE properties.rating IS NOT NULL \"\n                   \"GROUP BY properties.rating \"\n                   \"ORDER BY r\").collect()\ncount = map(lambda e: e.c, ratings)\nrating = map(lambda e: \"%s (%d)\" % (e.r, e.c), ratings)\ncolors = ['yellowgreen', 'plum', 'gold', 'lightskyblue', 'lightcoral']\nplt.pie(count, labels=rating, colors=colors, startangle=90,\n        autopct=\"%1.1f%%\")\nplt.axis('equal')\nplt.show()\n```\n\n![Breakdown of Ratings](/images/datacollection/ipynb-04.png)\n\nHappy analyzing!\n"
  },
  {
    "path": "docs/manual/source/datacollection/analytics-tableau.html.md.erb",
    "content": "---\ntitle: Machine Learning Analytics with Tableau\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nWith Spark SQL, it is possible to connect Tableau to Apache PredictionIO\nEvent Server for interactive analysis of event data.\n\n## Prerequisites\n\n- Tableau Desktop 8.3+ with a proper license key that supports Spark SQL;\n- Spark ODBC Driver from Databricks\n  (https://databricks.com/spark/odbc-driver-download);\n- Apache Hadoop 2.4+\n- Apache Hive 0.3.1+\n\nINFO: In this article, we will assume that you have a working HDFS, and that\nyour environmental variable `HADOOP_HOME` has been properly set. This is\nessential for Apache Hive to function properly. In addition, `HADOOP_CONF_DIR`\nin `$PIO_HOME/conf/pio-env.sh` must also be properly set for the `pio export`\ncommand to write to HDFS instead of the local filesystem.\n\n<%= partial 'shared/datacollection/parquet' %>\n\n## Creating Hive Tables\n\nBefore you can use Spark SQL's Thrift JDBC/ODBC Server, you will need to create\nthe table schema in Hive first. Please make sure to replace `path_of_hive` with\nthe real path.\n\n```\n$ cd path_of_hive\n$ bin/hive\nhive> CREATE EXTERNAL TABLE events (event STRING, entityType STRING, entityId STRING, targetEntityType STRING, targetEntityId STRING, properties STRUCT<rating:DOUBLE>) STORED AS parquet LOCATION '/tmp/movies';\nhive> exit;\n```\n\n## Launch Spark SQL's Thrift JDBC/ODBC Server\n\nOnce you have created your Hive tables, create a Hive configuration in your Spark\ninstallation. If you have a custom `hive-site.xml`, simply copy or link it to\n`$SPARK_HOME/conf`. Otherwise, Hive would have created a local Derby database,\nand you will need to let Spark knows about it. Create\n`$SPARK_HOME/conf/hive-site.xml` from scratch with the following template.\n\nWARNING: You must change `/opt/apache-hive-0.13.1-bin` below to a real Hive\npath.\n\n```xml\n<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<?xml-stylesheet type=\"text/xsl\" href=\"configuration.xsl\"?>\n<configuration>\n  <property>\n    <name>javax.jdo.option.ConnectionURL</name>\n    <value>jdbc:derby:;databaseName=/opt/apache-hive-0.13.1-bin/metastore_db;create=true</value>\n  </property>\n</configuration>\n```\n\nLaunch Spark SQL's Thift JDBC/ODBC Server by\n\n```\n$ $SPARK_HOME/sbin/start-thriftserver.sh\n```\n\nYou can test the server using the included Beeline client.\n\n```\n$ $SPARK_HOME/bin/beeline\nbeeline> !connect jdbc:hive2://localhost:10000\n(Use empty username and password when prompted)\n0: jdbc:hive2://localhost:10000> select * from events limit 10;\n+--------+-------------+-----------+-------------------+-----------------+------------------+\n| event  | entitytype  | entityid  | targetentitytype  | targetentityid  |    properties    |\n+--------+-------------+-----------+-------------------+-----------------+------------------+\n| buy    | user        | 3         | item              | 0               | {\"rating\":null}  |\n| buy    | user        | 3         | item              | 1               | {\"rating\":null}  |\n| rate   | user        | 3         | item              | 2               | {\"rating\":1.0}   |\n| buy    | user        | 3         | item              | 7               | {\"rating\":null}  |\n| buy    | user        | 3         | item              | 8               | {\"rating\":null}  |\n| buy    | user        | 3         | item              | 9               | {\"rating\":null}  |\n| rate   | user        | 3         | item              | 14              | {\"rating\":1.0}   |\n| buy    | user        | 3         | item              | 15              | {\"rating\":null}  |\n| buy    | user        | 3         | item              | 16              | {\"rating\":null}  |\n| buy    | user        | 3         | item              | 18              | {\"rating\":null}  |\n+--------+-------------+-----------+-------------------+-----------------+------------------+\n10 rows selected (0.515 seconds)\n0: jdbc:hive2://localhost:10000>\n```\n\nNow you are ready to use Tableau!\n\n## Performing Analysis with Tableau\n\nLaunch Tableau and Connect to Data. Click on **Spark SQL (Beta)** and enter\nSpark SQL's Thrift JDBC/ODBC Server information. Make sure to pick **User Name**\nas **Authentication**. Click **Connect**.\n\n![Tableau and Spark SQL](/images/datacollection/tableau-01.png)\n\nOn the next page, pick **default** under **Schema**.\n\nINFO: You may not see any choices when you click on Schema. Simply press Enter\nand Tableau will try to list all schemas.\n\nOnce you see a list of tables that includes **events**, click **New Custom\nSQL**, then enter the following.\n\n```sql\nSELECT event, entityType, entityId, targetEntityType, targetEntityId, properties.rating FROM events\n```\n\nClick **Update Now**. You should see the following screen by now, indicating\nsuccess in loading data. Using a custom SQL allows you to extract arbitrary\nfields from within properties.\n\n![Setting up Tableau](/images/datacollection/tableau-02.png)\n\nClick **Go to Worksheet** and start analyzing. The following shows an example of\nbreaking down different rating values.\n\n![Rating Values Breakdown](/images/datacollection/tableau-03.png)\n\nThe following shows a summary of interactions.\n\n![Interactions](/images/datacollection/tableau-04.png)\n\nHappy analyzing!\n"
  },
  {
    "path": "docs/manual/source/datacollection/analytics-zeppelin.html.md.erb",
    "content": "---\ntitle: Machine Learning Analytics with Zeppelin\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n[Apache Zeppelin](http://zeppelin-project.org/) is an interactive computational\nenvironment built on Apache Spark like the IPython Notebook. With [Apache\nPredictionIO](http://predictionio.apache.org) and [Spark\nSQL](https://spark.apache.org/sql/), you can easily analyze your collected\nevents when you are developing or tuning your engine.\n\n## Prerequisites\n\nThe following instructions assume that you have the command `sbt` accessible in\nyour shell's search path. Alternatively, you can use the `sbt` command that\ncomes with Apache PredictionIO at `$PIO_HOME/sbt/sbt`.\n\n<%= partial 'shared/datacollection/parquet' %>\n\n## Building Zeppelin for Apache Spark 1.2+\n\nStart by cloning Zeppelin.\n\n```\n$ git clone https://github.com/apache/zeppelin.git\n```\n\nBuild Zeppelin with Hadoop 2.4 and Spark 1.2 profiles.\n\n```\n$ cd zeppelin\n$ mvn clean package -Pspark-1.2 -Dhadoop.version=2.4.0 -Phadoop-2.4 -DskipTests\n```\n\nNow you should have working Zeppelin binaries.\n\n## Preparing Zeppelin\n\nFirst, start Zeppelin.\n\n```\n$ bin/zeppelin-daemon.sh start\n```\n\nBy default, you should be able to access Zeppelin via web browser at\nhttp://localhost:8080. Create a new notebook and put the following in the first\ncell.\n\n```scala\nsqlc.parquetFile(\"/tmp/movies\").registerTempTable(\"events\")\n```\n\n![Preparing Zeppelin](/images/datacollection/zeppelin-01.png)\n\n## Performing Analysis with Zeppelin\n\nIf all steps above ran successfully, you should have a ready-to-use analytics\nenvironment by now. Let's try a few examples to see if everything is functional.\n\nIn the second cell, put in this piece of code and run it.\n\n```\n%sql\nSELECT entityType, event, targetEntityType, COUNT(*) AS c FROM events\nGROUP BY entityType, event, targetEntityType\n```\n\n![Summary of Events](/images/datacollection/zeppelin-02.png)\n\nWe can also easily plot a pie chart.\n\n```\n%sql\nSELECT event, COUNT(*) AS c FROM events GROUP BY event\n```\n\n![Summary of Event in Pie Chart](/images/datacollection/zeppelin-03.png)\n\nAnd see a breakdown of rating values.\n\n```\n%sql\nSELECT properties.rating AS r, COUNT(*) AS c FROM events\nWHERE properties.rating IS NOT NULL GROUP BY properties.rating ORDER BY r\n```\n\n![Breakdown of Rating Values](/images/datacollection/zeppelin-04.png)\n\nHappy analyzing!\n"
  },
  {
    "path": "docs/manual/source/datacollection/analytics.html.md",
    "content": "---\ntitle: Using Analytics Tools\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nEvent Server collects and unifies data for your application from multiple channels.\n\nData can be exported to Apache parquet format with `pio export` for fast analysis. The following analytics tools are currently supported:\n\n1. [IPython Notebook](/datacollection/analytics-ipynb/)\n\n2. [Tableau](/datacollection/analytics-tableau/)\n\n3. [Zeppelin](/datacollection/analytics-zeppelin/)\n"
  },
  {
    "path": "docs/manual/source/datacollection/batchimport.html.md",
    "content": "---\ntitle: Importing Data in Batch\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nIf you have a large amount of data to start with, performing batch import will\nbe much faster than sending every event over an HTTP connection.\n\n## Preparing Input File\n\nThe import tool expects its input to be a file stored either in the local\nfilesystem or on HDFS. Each line of the file should be a JSON object string\nrepresenting an event. For more information about the format of event JSON\nobject, please refer to [this page](/datacollection/eventapi/#using-event-api).\n\nShown below is an example that contains 5 events ready to be imported to the\nEvent Server.\n\n```json\n{\"event\":\"buy\",\"entityType\":\"user\",\"entityId\":\"3\",\"targetEntityType\":\"item\",\"targetEntityId\":\"0\",\"eventTime\":\"2014-11-21T01:04:14.716Z\"}\n{\"event\":\"buy\",\"entityType\":\"user\",\"entityId\":\"3\",\"targetEntityType\":\"item\",\"targetEntityId\":\"1\",\"eventTime\":\"2014-11-21T01:04:14.722Z\"}\n{\"event\":\"rate\",\"entityType\":\"user\",\"entityId\":\"3\",\"targetEntityType\":\"item\",\"targetEntityId\":\"2\",\"properties\":{\"rating\":1.0},\"eventTime\":\"2014-11-21T01:04:14.729Z\"}\n{\"event\":\"buy\",\"entityType\":\"user\",\"entityId\":\"3\",\"targetEntityType\":\"item\",\"targetEntityId\":\"7\",\"eventTime\":\"2014-11-21T01:04:14.735Z\"}\n{\"event\":\"buy\",\"entityType\":\"user\",\"entityId\":\"3\",\"targetEntityType\":\"item\",\"targetEntityId\":\"8\",\"eventTime\":\"2014-11-21T01:04:14.741Z\"}\n```\n\nWARNING: Please make sure your import file does not contain any empty lines.\nEmpty lines will be treated as a null object and will return an error during\nimport.\n\n## Use SDK to Prepare Batch Input File\n\nSome of the Apache PredictionIO SDKs also provides FileExporter\nclient. You may use them to prepare the JSON file as described above. The\nFileExporter creates event in the same way as EventClient except that the events\nare written to a JSON file instead of being sent to EventSever. The written JSON\nfile can then be used by batch import.\n\n<div class=\"tabs\">\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n(coming soon)\n<!--\n```php\n<?php\n  require_once(\"vendor/autoload.php\");\n\n  use predictionio\\EventClient;\n\n  $accessKey = 'YOUR_ACCESS_KEY';\n  $client = new EventClient($accessKey);\n  $response = $client->createEvent(array(\n                        'event' => 'my_event',\n                        'entityType' => 'user',\n                        'entityId' => 'uid',\n                        'targetEntityType' => 'item',\n                        'targetEntityId' => 'iid',\n                        'properties' => array('someProperty'=>'value1',\n                                              'anotherProperty'=>'value2'),\n                        'eventTime' => '2004-12-13T21:39:45.618Z'\n                       ));\n?>\n```\n-->\n\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n\n```python\nimport predictionio\nfrom datetime import datetime\nimport pytz\n\n# Create a FileExporter and specify \"my_events.json\" as destination file\nexporter = predictionio.FileExporter(file_name=\"my_events.json\")\n\nevent_properties = {\n    \"someProperty\" : \"value1\",\n    \"anotherProperty\" : \"value2\",\n    }\n# write the events to a file\nevent_response = exporter.create_event(\n    event=\"my_event\",\n    entity_type=\"user\",\n    entity_id=\"uid\",\n    target_entity_type=\"item\",\n    target_entity_id=\"iid\",\n    properties=event_properties,\n    event_time=datetime(2014, 12, 13, 21, 38, 45, 618000, pytz.utc))\n\n# ...\n\n# close the FileExporter when finish writing all events\nexporter.close()\n\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n(coming soon)\n<!--\n```ruby\nrequire 'predictionio'\n\nevent_client = PredictionIO::EventClient.new('YOUR_ACCESS_KEY')\nevent_client.create_event('my_event', 'user', 'uid',\n                          'targetEntityType' => 'item',\n                          'targetEntityId' => 'iid',\n                          'eventTime' => '2004-12-13T21:39:45.618Z',\n                          'properties' => { 'someProperty' => 'value1',\n                                            'anotherProperty' => 'value2' })\n```\n-->\n\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n(coming soon)\n```\n  </div>\n</div>\n\n\n\n## Import Events from Input File\n\nImporting events from a file can be done easily using the command line\ninterface. Assuming that `pio` be in your search path, your App ID be `123`, and\nthe input file `my_events.json` be in your current working directory:\n\n```bash\n$ pio import --appid 123 --input my_events.json\n```\n\nAfter a brief while, the tool should return to the console without any error.\nCongratulations! You have successfully imported your events.\n"
  },
  {
    "path": "docs/manual/source/datacollection/channel.html.md.erb",
    "content": "---\ntitle: Channel\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nEach App has a default channel (without name) which stores all incoming events.\nThis \"default\" one is used when channel is not specified.\n\nYou may create additional Channels for the App. Creating multiple Channels is\nadvanced usage. You don't need to create any in order to use Apache PredictionIO.\nThe Channel is associated with one App only and must have unique\nname within the same App.\n\nCreating multiple Channels allows you more easily to identify, manage and use\nspecific event data if you may collect events from different multiple sources\n(eg. mobile, website, or third-party webhooks service) for the your application.\n\n(More usage details coming soon...)\n\n## Create a new Channel\n\nFor example, to create a new channel \"myChannel\" for app \"myApp\", run following\n`pio` command:\n\n```\npio app channel-new myApp myChannel\n```\n\nyou should see something like the following outputs:\n\n```\n[INFO] [App$] Updated Channel meta-data.\n[INFO] [HBLEvents] The table predictionio_eventdata:events_5_2 doesn't exist yet. Creating now...\n[INFO] [App$] Initialized Event Store for the channel: myChannel.\n[INFO] [App$] Created new channel:\n[INFO] [App$]     Channel Name: myChannel\n[INFO] [App$]       Channel ID: 2\n[INFO] [App$]           App ID: 5\n```\n\nNow \"myChannel\" is created and ready for collecting data.\n\n## Collect data through Channel\n\nThe Event API support optional `channel` query parameter. This allows you to import and query events of the specified channel. When the `channel` parameter is not specified, the data is collected through the default channel.\n\nURL: `http://localhost:7070/events.json?accessKey=yourAccessKeyString&channel=yourChannelName`\n\nQuery parameters:\n\nField | Type | Description\n:---- | :----| :-----\n`accessKey` | String | The Access Key for your App\n`channel` | String | The channel name (optional). Specify this to import data to this channel. **NOTE: supported in PIO version >= 0.9.2** only. Channel must be created first.\n\n\nFor SDK usage, one EventClient should be responsible for collecting data of one specific channel. The channel name is specified when the EventClient object is instantiated.\n\nFor example, the following code import event to \"YOUR_CHANNEL\" of the corresponding App.\n\n<div class=\"tabs\">\n  <div data-tab=\"Raw HTTP\" data-lang=\"bash\">\n```bash\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=YOUR_ACCESS_KEY&channel=YOUR_CHANNEL \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n  \"targetEntityType\" : \"item\",\n  \"targetEntityId\" : \"iid\",\n  \"properties\" : {\n    \"someProperty\" : \"value1\",\n    \"anotherProperty\" : \"value2\"\n  },\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\n```\n  </div>\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n(TODO: update me)\n<!--\n```php\n<?php\n  require_once(\"vendor/autoload.php\");\n\n  use predictionio\\EventClient;\n\n  $accessKey = 'YOUR_ACCESS_KEY';\n  $client = new EventClient($accessKey);\n  $response = $client->createEvent(array(\n                        'event' => 'my_event',\n                        'entityType' => 'user',\n                        'entityId' => 'uid',\n                        'targetEntityType' => 'item',\n                        'targetEntityId' => 'iid',\n                        'properties' => array('someProperty'=>'value1',\n                                              'anotherProperty'=>'value2'),\n                        'eventTime' => '2004-12-13T21:39:45.618Z'\n                       ));\n?>\n```\n-->\n\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n\n```python\nfrom predictionio import EventClient\nfrom datetime import datetime\nimport pytz\n\n# Create a EventClient for \"YOUR_CHANNEL\"\nclient = EventClient('YOUR_ACCESS_KEY', \"http://localhost:7070\",\n  channel='YOUR_CHANNEL') # default channel if not specified\n\nevent_properties = {\n    \"someProperty\" : \"value1\",\n    \"anotherProperty\" : \"value2\",\n    }\nevent_response = client.create_event(\n    event=\"my_event\",\n    entity_type=\"user\",\n    entity_id=\"uid\",\n    target_entity_type=\"item\",\n    target_entity_id=\"iid\",\n    properties=event_properties,\n    event_time=datetime(2014, 12, 13, 21, 38, 45, 618000, pytz.utc))\n\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n(TODO: update me)\n<!--\n```ruby\nrequire 'predictionio'\n\nevent_client = PredictionIO::EventClient.new('YOUR_ACCESS_KEY')\nevent_client.create_event('my_event', 'user', 'uid',\n                          'targetEntityType' => 'item',\n                          'targetEntityId' => 'iid',\n                          'eventTime' => '2004-12-13T21:39:45.618Z',\n                          'properties' => { 'someProperty' => 'value1',\n                                            'anotherProperty' => 'value2' })\n```\n-->\n\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n(coming soon)\n```\n  </div>\n</div>\n\nYou can also follow the EventAPI [debug receipts](/datacollection/eventapi/#debugging-recipes) to query the events of specific channel by adding the `channel` query parameter in the URL.\n\n\n## Delete a Channel (including all imported data)\n\n```\npio app channel-delete <app name> <channel name>\n```\n\n## Delete the data-only of a Channel\n\n```\npio app data-delete <app name> --channel <channel name>\n```\n\n## Accessing Channel Data in Engine\n\nTo acccess channel data, simply specify the channel name when use the PEventStore or LEventStore API. Data is read from from the default channel if channelName is not specified.\n\nFor example, read data from default channel:\n\n```scala\n\n    val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"rate\", \"buy\")), // read \"rate\" and \"buy\" event\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n\n```\n\nFor examlpe, read data from the channel \"CHANNEL_NAME\"\n\n```scala\n\n    val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      channelName = Some(\"CHANNEL_NAME\"), // ADDED\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"rate\", \"buy\")), // read \"rate\" and \"buy\" event\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n\n```\n"
  },
  {
    "path": "docs/manual/source/datacollection/eventapi.html.md",
    "content": "---\ntitle: Collecting Data through REST/SDKs\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n**Event Server** is designed to collect data into Apache PredictionIO\nin an event-based style. Once the Event Server is launched, your\napplication can send data to it through its **Event API** with HTTP requests or\nwith `EventClient`s of PredictionIO's SDKs.\n\nINFO: All Apache PredictionIO-compliant engines support accessing\nthe Event Store (i.e. the data store of Event Server) through [Apache\nPredictionIO's Storage\nAPI](http://predictionio.apache.org/api/current/index.html#org.apache.predictionio.data.storage.package).\n\n## Launching the Event Server\n\nINFO: Before launching the Event Server, make sure that your event data store\nbackend is properly configured and is running. By default, Apache PredictionIO\nuses Apache HBase, and a quick configuration can be found\n[here](/install/install-sourcecode/#hbase). Please allow a minute (usually less\nthan 30 seconds) after HBase is started for its initialization to complete\nbefore starting the Event Server.\n\n\nEverything about Apache PredictionIO can be done through the `pio`\ncommand. Please add PIO binary command path to to your `PATH` first. Assuming\nPredictionIO is installed at `/home/yourname/PredictionIO/`, you can run\n\n```\n$ PATH=$PATH:/home/yourname/PredictionIO/bin; export PATH\n```\n\nTo start the event server, run\n\n```\n$ pio eventserver\n```\n\nINFO: By default, the Event Server is bound to 0.0.0.0, which serves global\ntraffic. To tighten security, you may use `pio eventserver --ip 127.0.0.1` to\nserve only local traffic.\n\n### Check Server Status\n\n```\n$ curl -i -X GET http://localhost:7070\n```\n\nSample response:\n\n```\nHTTP/1.1 200 OK\nServer: akka-http/10.1.5\nDate: Wed, 10 Sep 2014 22:37:30 GMT\nContent-Type: application/json; charset=UTF-8\nContent-Length: 18\n\n{\"status\":\"alive\"}\n```\n\n\n### Generating App ID and Access Key\n\nFirst, you need to create a new app in the Event Server. You will later send data into it.\n\n```\n$ pio app new MyTestApp\n```\n\n> You can replace `MyTestApp` with name of your App.\n\nTake note of the *Access Key* and *App ID* generated. You need the *Access Key*\nto use the Event API. You should see something like the following output:\n\n```\n[INFO] [App$] Created new app:\n[INFO] [App$]         Name: MyTestApp\n[INFO] [App$]           ID: 6\n[INFO] [App$]   Access Key: WPgcXKd42FPQpZHVbVeMyqF4CQJUnXQmIMTHhX3ZUrSzvy1KXJjdFUrslifa9rnB\n```\n\n### Creating Your First Event\n\nYou may connect to the Event Server with HTTP request or by using one of many\n**Apache PredictionIO SDKs**.\n\nFor example, the following shows how one can create an event involving a single entity.\nReplace the value of `accessKey` by the *Access Key* generated for your App.\n\n<div class=\"tabs\">\n  <div data-tab=\"Raw HTTP\" data-lang=\"bash\">\n```bash\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=WPgcXKd42FPQpZHVbVeMyqF4CQJUnXQmIMTHhX3ZUrSzvy1KXJjdFUrslifa9rnB \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n  \"properties\" : {\n    \"prop1\" : 1,\n    \"prop2\" : \"value2\",\n    \"prop3\" : [1, 2, 3],\n    \"prop4\" : true,\n    \"prop5\" : [\"a\", \"b\", \"c\"],\n    \"prop6\" : 4.56\n  }\n  \"eventTime\" : \"2004-12-13T21:39:45.618-07:00\"\n}'\n```\n  </div>\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\n  require_once(\"vendor/autoload.php\");\n\n  use predictionio\\EventClient;\n\n  $accessKey = 'YOUR_ACCESS_KEY';\n  $client = new EventClient($accessKey);\n  $response = $client->createEvent(array(\n                        'event' => 'my_event',\n                        'entityType' => 'user',\n                        'entityId' => 'uid',\n                        'properties' => array('prop1' => 1,\n                                              'prop2' => 'value2',\n                                              'prop3' => array(1,2,3),\n                                              'prop4' => true,\n                                              'prop5' => array('a','b','c'),\n                                              'prop6' => 4.56\n                                        ),\n                        'eventTime' => '2004-12-13T21:39:45.618-07:00'\n                       ));\n?>\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nfrom predictionio import EventClient\nfrom datetime import datetime\nimport pytz\nclient = EventClient('YOUR_ACCESS_KEY', \"http://localhost:7070\")\n\nfirst_event_properties = {\n    \"prop1\" : 1,\n    \"prop2\" : \"value2\",\n    \"prop3\" : [1, 2, 3],\n    \"prop4\" : True,\n    \"prop5\" : [\"a\", \"b\", \"c\"],\n    \"prop6\" : 4.56 ,\n    }\nfirst_event_time = datetime(\n  2004, 12, 13, 21, 39, 45, 618000, pytz.timezone('US/Mountain'))\nfirst_event_response = client.create_event(\n    event=\"my_event\",\n    entity_type=\"user\",\n    entity_id=\"uid\",\n    properties=first_event_properties,\n    event_time=first_event_time,\n)\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\nrequire 'predictionio'\n\nevent_client = PredictionIO::EventClient.new('YOUR_ACCESS_KEY')\nevent_client.create_event('my_event', 'user', 'uid',\n                          'eventTime' => '2004-12-13T21:39:45.618-07:00',\n                          'properties' => { 'prop1' => 1,\n                                            'prop2' => 'value2',\n                                            'prop3' => [1, 2, 3],\n                                            'prop4' => true,\n                                            'prop5' => %w(a b c),\n                                            'prop6' => 4.56 })\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n(coming soon)\n```\n  </div>\n</div>\n\nFor example, the following shows how one can create an event involving two entities (with\n`targetEntity`).\n\n<div class=\"tabs\">\n  <div data-tab=\"Raw HTTP\" data-lang=\"bash\">\n```bash\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=WPgcXKd42FPQpZHVbVeMyqF4CQJUnXQmIMTHhX3ZUrSzvy1KXJjdFUrslifa9rnB \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"my_event\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"uid\",\n  \"targetEntityType\" : \"item\",\n  \"targetEntityId\" : \"iid\",\n  \"properties\" : {\n    \"someProperty\" : \"value1\",\n    \"anotherProperty\" : \"value2\"\n  },\n  \"eventTime\" : \"2004-12-13T21:39:45.618Z\"\n}'\n```\n  </div>\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\n  require_once(\"vendor/autoload.php\");\n\n  use predictionio\\EventClient;\n\n  $accessKey = 'YOUR_ACCESS_KEY';\n  $client = new EventClient($accessKey);\n  $response = $client->createEvent(array(\n                        'event' => 'my_event',\n                        'entityType' => 'user',\n                        'entityId' => 'uid',\n                        'targetEntityType' => 'item',\n                        'targetEntityId' => 'iid',\n                        'properties' => array('someProperty'=>'value1',\n                                              'anotherProperty'=>'value2'),\n                        'eventTime' => '2004-12-13T21:39:45.618Z'\n                       ));\n?>\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# Second Event\nsecond_event_properties = {\n    \"someProperty\" : \"value1\",\n    \"anotherProperty\" : \"value2\",\n    }\nsecond_event_response = client.create_event(\n    event=\"my_event\",\n    entity_type=\"user\",\n    entity_id=\"uid\",\n    target_entity_type=\"item\",\n    target_entity_id=\"iid\",\n    properties=second_event_properties,\n    event_time=datetime(2014, 12, 13, 21, 38, 45, 618000, pytz.utc))\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\nrequire 'predictionio'\n\nevent_client = PredictionIO::EventClient.new('YOUR_ACCESS_KEY')\nevent_client.create_event('my_event', 'user', 'uid',\n                          'targetEntityType' => 'item',\n                          'targetEntityId' => 'iid',\n                          'eventTime' => '2004-12-13T21:39:45.618Z',\n                          'properties' => { 'someProperty' => 'value1',\n                                            'anotherProperty' => 'value2' })\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n(coming soon)\n```\n  </div>\n</div>\n\n\nSample response:\n\n```\nHTTP/1.1 201 Created\nServer: akka-http/10.1.5\nDate: Wed, 10 Sep 2014 22:51:33 GMT\nContent-Type: application/json; charset=UTF-8\nContent-Length: 41\n\n{\"eventId\":\"AAAABAAAAQDP3-jSlTMGVu0waj8\"}\n```\n\n\n## Using Event API\n\n### Event Creation API\n\nURL: `http://localhost:7070/events.json?accessKey=yourAccessKeyString`\n\nQuery parameters:\n\nField | Type | Description\n:---- | :----| :-----\n`accessKey` | String | The Access Key for your App\n\nThe event creation support many commonly used data. POST request body:\n\nField | Type | Description\n:---- | :----| :-----\n`event` | String | Name of the event.\n        | | (Examples: \"sign-up\", \"rate\", \"view\", \"buy\").\n        | | **Note**: All event names start with \"$\" and \"pio_\" are reserved\n        | | and shouldn't be used as your custom event name (eg. \"$set\").\n`entityType` | String | The entity type. It is the namespace of the entityId and\n             | | analogous to the table name of a relational database. The\n             | | entityId must be unique within same entityType.\n             | | **Note**: All entityType names start with \"$\" and \"pio_\" are\n             | | reserved and shouldn't be used.\n`entityId` | String | The entity ID. `entityType-entityId` becomes the unique\n           | | identifier of the entity. For example, you may have entityType\n           | | named `user`, and different entity IDs, say `1` and `2`. In this\n           | | case, `user-1` and `user-2` uniquely identifies | these two\n           | | entities.\n`targetEntityType` | String | (Optional) The target entity type.\n                   | | **Note**: All entityType names start with \"$\" and \"pio_\"\n                   | | are reserved and shouldn't be used.\n`targetEntityId` | String | (Optional) The target entity ID.\n`properties` | JSON | (Optional) See **Note About Properties** below\n             | | **Note**: All property names start with \"$\" and \"pio_\"\n             | | are reserved and shouldn't be used as keys inside `properties`.\n`eventTime` | String | (Optional) The time of the event. Although Event Server's\n            | | current system time and UTC timezone will be used if this is\n            | | unspecified, it is highly recommended that this time should be\n            | | generated by the client application in order to accurately\n            | | record the time of the event.\n            | |  Must be in ISO 8601 format (e.g.\n            | | `2004-12-13T21:39:45.618Z`, or `2014-09-09T16:17:42.937-08:00`).\n\n## Note About Properties\n\nNote that `properties` can be:\n\n1. Associated with an *generic event*: The `properties` field provide additional information about this event\n2. Associated with an *entity*: The `properties` field is used to record the changes of an entity's properties with special events `$set`, `$unset` and `$delete`.\n\nPlease see the [Events Modeling](/datacollection/eventmodel/) for detailed explanation.\n\n\n## Debugging Recipes\n\nWARNING: The following API are mainly for development or debugging purpose\nonly. They should not be supported by SDK nor used by real application under\nnormal circumstances and they are subject to changes.\n\nINFO: Instead of using `curl`, you can also install JSON browser plugins such as **JSONView** to pretty-print the JSON on your browser. With the browser plugin you can make the `GET` queries below by passing in the URL. Plugins like **Postman - REST Client** provide a more advanced interface for making queries.\n\nThe `accessKey` query parameter is mandatory.\n\nReplace `<your_accessKey>` and `<your_eventId>` by a real one in the following:\n\n### Get an Event\n\n```\n$ curl -i -X GET http://localhost:7070/events/<your_eventId>.json?accessKey=<your_accessKey>\n```\n\n### Delete an Event\n\n```\n$ curl -i -X DELETE http://localhost:7070/events/<your_eventId>.json?accessKey=<your_accessKey>\n```\n\n### Get Events of an App\n\n```\n$ curl -i -X GET http://localhost:7070/events.json?accessKey=<your_accessKey>\n```\n\nINFO: By default, it returns at most 20 events. Use the `limit` parameter to specify how many events returned (see below). Use cautiously!\n\nIn addition, the following *optional* parameters are supported:\n\n- `startTime`: time in ISO8601 format. Return events with `eventTime >= startTime`.\n- `untilTime`: time in ISO8601 format. Return events with `eventTime < untilTime`.\n- `entityType`: String. The entityType. Return events for this `entityType` only.\n- `entityId`: String. The entityId. Return events for this `entityId` only.\n- `event`: String. The event name. Return events with this name only.\n- `targetEntityType`: String. The targetEntityType. Return events for this `targetEntityType` only.\n- `targetEntityId`: String. The targetEntityId. Return events for this `targetEntityId` only.\n- `limit`: Integer. The number of record events returned. Default is 20. -1 to\n  get all.\n- `reversed`: Boolean. **Must be used with both `entityType` and `entityId` specified**, returns events in reversed chronological order. Default is false.\n\nWARNING: If you are using <code>curl</code> with the <code>&</code> symbol, you should quote the entire URL by using single or double quotes.\n\nWARNING: Depending on the size of data, you may encounter timeout when querying with some of the above filters. Event server uses `entityType` and `entityId` as the key so any query without both `entityType` and `entityId` specified might result in a timeout.\n\nFor example, get all events of an app with `eventTime >= startTime`\n\n```\n$ curl -i -X GET \"http://localhost:7070/events.json?accessKey=<your_accessKey>&startTime=<time in ISO8601 format>\"\n```\n\nFor example, get all events of an app with `eventTime < untilTime`:\n\n```\n$ curl -i -X GET \"http://localhost:7070/events.json?accessKey=<your_accessKey>&untilTime=<time in ISO8601 format>\"\n```\n\nFor example, get all events of an app with `eventTime >= startTime` and `eventTime < untilTime`:\n\n```\n$ curl -i -X GET \"http://localhost:7070/events.json?accessKey=<your_accessKey>&startTime=<time in ISO8601 format>&untilTime=<time in ISO8601 format>\"\n```\n\nFor example, get all events of a specific entity with `eventTime < untilTime`:\n\n```\n$ curl -i -X GET \"http://localhost:7070/events.json?accessKey=<your_accessKey>&entityType=<your_entityType>&entityId=<your_entityId>&untilTime=<time in ISO801 format>\"\n```\n\n### Delete All Events of an App\n\nPlease use the following CLI command:\n\n```\n$ pio app data-delete <your_app_name>\n```\n\nINFO: See [here](/cli/#event-server-commands) to know details of command-line interface for the event server.\n"
  },
  {
    "path": "docs/manual/source/datacollection/eventmodel.html.md.erb",
    "content": "---\ntitle: Events Modeling\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis section explains how to model your application data as events.\n\n**Entity**: it's the real world object involved in the events. The entity may perform the events, or interact with other entity (which became `targetEntity` in an event).\n\nFor example, your application may have users and some items which the user can interact with. Then you can model them as two entity types: **user** and **item** and the entityId can uniquely identify the entity within each entityType (e.g. user with ID 1, item with ID 1).\n\nAn entity may perform some events (e.g user 1 does something), and entity may have properties associated with it (e.g. user may have gender, age, email etc). Hence, **events** involve **entities** and there are three types of events, respectively:\n\n1. Generic events performed by an entity.\n2. Special events for recording changes of an entity's properties\n3. Batch events\n\nThey are explained in details below.\n\n## 1. Generic events performed by an entity\n\nWhenever the entity performs an action, you can describe such event as `entity \"verb\" targetEntity with \"some extra information\"`. The *\"targetEntity\"* and *\"some extra information\"* can be optional. The *\"verb\"* can be used as the name of the *\"event\"*. The *\"some extra information\"* can be recorded as `properties` of the event.\n\nThe following are some simple examples:\n\n  * user-1 signs-up\n\n```json\n{\n  \"event\" : \"sign-up\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"1\"\n}\n```\n\n  * user-1 views item-1 *(with targetEntity)*\n\n```json\n{\n  \"event\" : \"view\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"1\",\n  \"targetEntityType\" : \"item\",\n  \"targetEntityId\" : \"1\"\n}\n```\n\n  * user-1 rates item-1 with rating of 4 stars *(with targetEntity and properties)*\n\n```json\n{\n  \"event\" : \"rate\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"1\",\n  \"targetEntityType\" : \"item\",\n  \"targetEntityId\" : \"1\",\n  \"properties\" : {\n    \"rating\" : 4\n  }\n}\n```\n\n## 2. Special events for recording changes of an entity's properties\n\nThe generic events described above are used to record general actions performed by the entity. However, an entity may have properties (or attributes) associated with it. Moreover, the properties of the entity may change over time (for example, user may have new address, item may have new categories). In order to record such changes of an entity's properties. Special events `$set` , `$unset` and `$delete` are introduced.\n\nThe following special events are reserved for updating entities and their properties:\n\n-  `\"$set\"` event: Set properties of an entity (also implicitly create the entity). To change properties of entity, you simply set the corresponding properties with value again. The `$set` events should be created only when:\n  *  The entity is *first* created (or re-create after `$delete` event), or\n  *  Set the entity's existing or new properties to new values (For example, user updates his email, user adds a phone number, item has a updated categories)\n-  `\"$unset\"` event: Unset properties of an entity. It means treating the specified properties as not existing anymore. Note that the field `properties` cannot be empty for `$unset` event.\n-  `\"$delete\"` event: delete the entity.\n\nThere is no `targetEntityId` for these special events.\n\nFor example, setting entity `user-1`'s properties `birthday` and `address`:\n\n```json\n{\n  \"event\" : \"$set\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"1\",\n  \"properties\" : {\n    \"birthday\" : \"1984-10-11\",\n    \"address\" : \"1234 Street, San Francisco, CA 94107\"\n  }\n}\n```\n\n**Note** that the properties values of the entity will be aggregated based on these special events and the eventTime. The state of the entity is different depending on the time you are looking at the data. In engine's DataSource, you can use [PEventStore.aggregateProperties() API](https://predictionio.apache.org/api/current/#org.apache.predictionio.data.store.PEventStore$) to retrieve the state of entity's properties (based on time).\n\nNOTE: Although it doesn't hurt to import duplicated special events for an entity (exactly same properties) into event server (it just means that the entity changes to the same state as before and new duplicated event provides no new information about the user), it could waste storage space.\n\nTo demonstrate the concept of these special events, we are going to import a sequence of events and see how it affects the retrieved entity's properties.\n\nAssuming you have created the App (named \"MyTestApp\") for testing and Event Server is started.\n\n#### Event 1\n\nFor example, on `2014-09-09T...`, a user with ID \"2\" is newly added in your application. Also, this user has properties a = 3 and b = 4. To record such event, we can create a `$set` event for the user.\n\nfor convenience, assign the ACCESS_KEY of your test app to the shell variable `ACCESS_KEY` and run following curl command to import the event:\n\n```bash\n$ ACCESS_KEY=\"<YOUR_ACCESS_KEY>\"\n\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"2\",\n  \"properties\" : {\n    \"a\" : 3,\n    \"b\" : 4\n  },\n  \"eventTime\" : \"2014-09-09T16:17:42.937-08:00\"\n}'\n```\n\nYou should see something like the following, meaning the events are imported successfully.\n\n```\nHTTP/1.1 201 Created\nServer: akka-http/10.1.5\nDate: Tue, 02 Jun 2015 23:13:58 GMT\nContent-Type: application/json; charset=UTF-8\nContent-Length: 57\n\n{\"eventId\":\"PVjOIP6AJ5PgsiGQW6pgswAAAUhc7EwZpCfSj5bS5yg\"}\n```\n\nAfter this eventTime, user-2 is created and has properties of a = 3 and b = 4.\n\n#### Event 2\n\nThen, on `2014-09-10T...`, let's say the user has updated the properties b = 5 and c = 6. To record such property change, create another `$set` event. Run the following command:\n\n```bash\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"2\",\n  \"properties\" : {\n    \"b\" : 5,\n    \"c\" : 6\n  },\n  \"eventTime\" : \"2014-09-10T13:12:04.937-08:00\"\n}'\n```\n\nAfter this eventTime, user-2 has properties of a = 3, b = 5 and c = 6. Note that property `b` is updated with latest value.\n\n\n#### Event 3\n\nThen, let's say on `2014-09-11T...`, the user's properties 'b' is removed for some reasons. To record such event, create `$unset` event for user-2 with properties b:\n\n```bash\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$unset\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"2\",\n  \"properties\" : {\n    \"b\" : null\n  },\n  \"eventTime\" : \"2014-09-11T14:17:42.456-08:00\"\n}'\n```\n\nAfter this eventTime, user-2 has properties of a = 3, and c = 6. Note that property `b` is removed.\n\n#### Event 4\n\nThen, on `2014-09-12T...`, the user is removed from the application data. To record such event, create `$delete` event:\n\n```bash\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$delete\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"2\",\n  \"eventTime\" : \"2014-09-12T16:13:41.452-08:00\"\n}'\n```\n\nAfter this eventTime, user-2 is removed.\n\n#### Event 5\n\nThen, on `2014-09-13T...`, let's say we want to add back the user-2 into the application again for some reasons. To record such event, create `$set` event for user-2 with empty properties:\n\n```bash\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"2\",\n  \"eventTime\" : \"2014-09-13T16:17:42.143-08:00\"\n}'\n```\nAfter this eventTime, user-2 is created again with empty properties.\n\nNote that all above events are recorded in Event Store. Let's query Event Server and see if these events are imported.\n\nGo to following URL with your browser:\n\n`http://localhost:7070/events.json?accessKey=<YOUR_ACCESS_KEY>`\n\nor run the following command in terminal:\n\n```\n$ curl -i -X GET \"http://localhost:7070/events.json?accessKey=$ACCESS_KEY\"\n```\n\nNOTE: Note that you should quote the entire URL by using single or double quotes when you run the curl command.\n\nYou should see all events being created for this user-2.\n\nNow, let's retrieve the user-2's properties using the [PEventStore API](https://predictionio.apache.org/api/current/#org.apache.predictionio.data.store.PEventStore$).\n\nFirst, start `pio-shell` by running:\n\n```\n$ pio-shell --with-spark\n```\n\nYou should see the following output and shell prompt:\n\n```\n15/06/02 16:01:54 INFO SparkILoop: Created spark context..\nSpark context available as sc.\n15/06/02 16:01:54 INFO SparkILoop: Created sql context (with Hive support)..\nSQL context available as sqlContext.\n\nscala>\n```\n\nRun the following code in PIO shell (Replace `\"MyTestApp\"` with your app name):\n\n```scala\nscala> val appName=\"MyTestApp\"\nscala> import org.apache.predictionio.data.store.PEventStore\nscala> PEventStore.aggregateProperties(appName=appName, entityType=\"user\")(sc).collect()\n```\n\nThis command is using PEventStore to aggregate the user properties as a Map of user Id and the PropertyMap. `collect()` will return the data as array. You should see the following output at the end, which indicates there is user id 2 with empty properties because that's the state of user 2 with all imported events taken into account.\n\n```\nres0: Array[(String, org.apache.predictionio.data.storage.PropertyMap)] =\nArray((2,PropertyMap(Map(), 2014-09-09T16:17:42.937-08:00, 2014-09-13T16:17:42.143-08:00)))\n```\n\nLet's say we want to retrieve the state of user 2 properties with only events 1 and event 2 imported. To do that, we can specify the untilTime (aggregate the user properties with events up to the specified time) in the API.\n\nRun the following in the pio-shell. the untilTime is set to DateTime(2014, 9, 11, 0, 0) which is the time right before event 3.\n\n```\nscala> import org.joda.time.DateTime\nscala> PEventStore.aggregateProperties(appName=appName, entityType=\"user\", untilTime=Some(new DateTime(2014, 9, 11, 0, 0)))(sc).collect()\n```\n\nYou should see the following ouptut and the aggregated properties matches what we expected as described earlier (right before event 3): user-2 has properties of a = 3, b = 5 and c = 6.\n\n```\nres2: Array[(String, org.apache.predictionio.data.storage.PropertyMap)] =\nArray((2,PropertyMap(Map(b -> JInt(5), a -> JInt(3), c -> JInt(6)), 2014-09-09T16:17:42.937-08:00, 2014-09-10T13:12:04.937-08:00))\n```\n\nAs you have seen in the example above, the state of user-2 is different depending on the available events or the time you are looking at the data. Recording events in logging fashioned allows us to re-construct the state the entity according to the time.\n\n## 3. Batch Events to the EventServer\n\nUsing a different REST address on the usual EventServer port, as of PredictionIO 0.9.5 you can send batches of up to 50 events as a time. The format is as described above but the JSON payload is packaged as an array of Event objects.\n\n**Response:**\n\n* Status:\n    * 200 on success if we can return an array data in the response even when some events fail (e.g. because of ill-format). Client needs to check individual dictionary to verify all events were successfully created.\n    * 400 otherwise. Perhaps exceeded 50 events?\n* Data: an array of dictionaries each of which contains either following keys\n    * “status”: 201 if the event was successfully created; otherwise, 400.\n    * \"eventID\": the value is the eventID if the event is successfully created and\n    * \"message\": the error message string if any error occurs during creation\n\nThe order in the response array is corresponding to the order of the request array. However, the events might be imported in any order.\n\n###Sample Request:\n\n    curl -i -X POST http://localhost:7070/batch/events.json?accessKey=...\n    -H \"Content-Type: application/json\" -d ‘ \\\n    [\n        {\n            \"event\": \"$create\",\n            \"entityType\": \"user\",\n            \"entityId\": \"uid\",\n            \"properties\": {\n                ...\n            }\n        },\n        {\n            \"event\": \"like\",\n            \"entityType\": \"user\",\n            \"entityId\": \"uid\",\n            \"targetEntityType\": \"item\",\n            \"targetEntityId\": \"iid\",\n            \"properties\": {\n                ...\n            }\n            \"eventTime\": \"2004-12-13T21:39:45.618-07:00\"\n        },\n    \t...\n    ]‘\n\n\n###Sample Response:\n\n    HTTP/1.1 200 Successful\n    Server: akka-http/10.1.5\n    Date: Wed, 10 Sep 2014 22:51:33 GMT\n    Content-Type: application/json; charset=UTF-8\n    Content-Length: 41\n    [\n        {\"eventId\":\"AAAABAAAAQDP3-jSlTMGVu0waj8\"},\n        {\n            \"status\": 201,\n            \"eventId\": \"AAAABAAAAQDP3-jSlTMGVu0waj8\"\n        },\n        {\n            \"status\": 201,\n            \"eventId\":\"AAAABAAAAQDP3-jSlTMGVu0waj9\"\n        },\n         …\n        {\n            \"status\": 400,\n            \"message\":\"Required entityType is missing”\n        },\n\t\t…\n    ]\n\n\nNotice that each subrequest receives a status response. The limit of 50 events per batch requests is in line with Facebook, Mixpanel, SegmentIO and other event syncs that accept batches.\n"
  },
  {
    "path": "docs/manual/source/datacollection/index.html.md",
    "content": "---\ntitle: Event Server Overview\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nApache PredictionIO offers an Event Server that collects data in an\nevent-based style via a RESTful API. By default, Event Server uses Apache HBase\nas data store.\n\n![EventServer Highlight](/images/eventserver-overview.png)\n\n\n## What data should I collect?\n\nThe Event Server can collect and store arbitrary events. At the beginning of\nyour project, it is recommended to collect as much data as you can. Later on,\nyou can exclude data that are not relevant to your predictive model in Data\nPreparator.\n\n### Recommendation Engine\n\nWith Collaborative Filtering based Recommendation Engine, a common pattern is\n\n```\nuser -- action -- item\n```\n\nwhere users and items have properties associated with them.\n\nFor example, for personalized book recommendation, some events to collect would\nbe\n\n- User 1 purchased product X\n- User 2 viewed product Y\n- User 1 added product Z in the cart\n\nUser properties can be gender, age, location, etc. Item properties can be genre,\nauthor, and other attributes that may be related to the the user's preference.\n\nData collection varies quite a bit based on your application and your prediction\ngoal. We are happy to [assist you with your\nquestions](mailto:support@prediction.io).\n"
  },
  {
    "path": "docs/manual/source/datacollection/plugin.html.md",
    "content": "---\ntitle: Event Server Plugin\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nYou can write event server plugins to handle input data. For example, it's able to block invalid data, log, get statics or forward to other processing systems. There are two types of event server plugin.\n\n- `Input Blocker`: When these plugins are present, events coming into event server will be passed through all loaded and active plugins before reaching the actual event store. The order of processing is not defined, so events can go through these plugins in arbitrary order. One use case is for validating input data and throw exceptions to prevent bad data from going in. These plugins cannot transform the event.\n- `Input Sniffer`: When these are present, events will be broadcasted to these plugins in parallel. They do not block the event from reaching event store. They are useful for logging, statistics, and forwarding to other processing systems.\n\n## Create an event server plugin\n\nAt first, create a sbt project with following `build.sbt`:\n\n```scala\nname := \"pio-plugin-example\"\nversion := \"1.0\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies += \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\"\n```\n\nEvent server plug-ins must extend `EventServerPlugin`. Here is an example of event server plug-in:\n\n```scala\npackage com.example\n\nimport org.apache.predictionio.data.api._\n\nclass MyEventServerPlugin extends EventServerPlugin {\n  val pluginName = \"my-eventserver-plugin\"\n  val pluginDescription = \"an example of event server plug-in\"\n  \n  // inputBlocker or inputSniffer\n  val pluginType = EventServerPlugin.inputBlocker\t\n  \n  // Plug-in can handle input data in this method.\n  // If plug-in found invalid data, it's possible to block them \n  // by throwing an exception in this method.\n  override def process(\n      eventInfo: EventInfo, \n      context: EventServerPluginContext): Unit = {\n    println(eventInfo)\n  }\n\n  // Plug-in can handle requests to /plugins/<pluginType>/<pluginName>/* \n  // on the event server in this method.\n  override def handleREST(\n      appId: Int, \n      channelId: Option[Int], \n      arguments: Seq[String]): String = {\n    \"\"\"{\"pluginName\": \"my-eventserver-plugin\"}\"\"\"\n  }\n}\n```\n\nPlug-ins are loaded by `ServiceLoader`, so you must create `META-INF/services/org.apache.predictionio.data.api.EventServerPlugin` with a following content:\t\n\n```\ncom.example.MyEventServerPlugin\n```\n\nFinally, run `sbt package` to package plugin as a jar file. In this case, the plugin jar file is generated at `target/scala-2.11/pio-plugin-example_2.11-1.0.jar`, so copy this file to `PIO_HOME/plugins`.\n\nWhen you start (or restart) the event server, this plugin should be enabled.\n\n## Plugin APIs of event server\n\nThe event server has some plugins related APIs:\n\n- `/plugins.json`: Show all enabled plugins.\n- `/plugins/inputblocker/<pluginName>/*`: Handled by a corresponding input blocker plugin.\n- `/plugins/inputsniffer/<pluginName>/*`: Handled by a corresponding input sniffer plugin.\n\nFor example, if you send following request to the event server:\n\t\n```\ncurl -XGET http://localhost:7070/plugins.json?accessKey=$ACCESS_KEY\n```\n\nThe event server should respond following JSON response:\n\t\n```json\n{\n  \"plugins\": {\n    \"inputblockers\": {\n      \"my-eventserver-plugin\": {\n        \"name\": \"my-eventserver-plugin\",\n        \"description\": \"an example of event server plug-in\",\n        \"class\": \"com.example.MyEventServerPlugin\"\n      }\n    },\n    \"inputsniffers\": {}\n  }\n}\n```"
  },
  {
    "path": "docs/manual/source/datacollection/webhooks.html.md.erb",
    "content": "---\ntitle: Unifying Multichannel Data with Webhooks\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nWebhooks are used to collects data for your application from multiple channels automatically.\n\nApache PredictionIO offers webhook connectors for Segment.io and MailChimp\n(backend only). Several users also have expressed interest in Magento, Shopify\nand KeenIO so we'd be happy to help if anyone wishes to implement these.\n"
  },
  {
    "path": "docs/manual/source/demo/index.html.md.erb",
    "content": "---\ntitle: Demos\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Current Demos\n\n[Text Classification](/demo/textclassification/) - an official demo of Apache PredictionIO.\n"
  },
  {
    "path": "docs/manual/source/demo/textclassification.html.md.erb",
    "content": "---\ntitle: Text Classification Engine Tutorial\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n(Updated for Text Classification Template version 3.1)\n\n## Introduction\n\nIn the real world, there are many applications that collect text as data. For example, spam detectors take email and header content to automatically determine what is or is not spam; applications can gauge the general sentiment in a geographical area by analyzing Twitter data; and news articles can be automatically categorized based solely on the text content.There are a wide array of machine learning models you can use to create, or train, a predictive model to assign an incoming article, or query, to an existing category. Before you can use these techniques you must first transform the text data (in this case the set of news articles) into numeric vectors, or feature vectors, that can be used to train your model.\n\nThe purpose of this tutorial is to illustrate how you can go about doing this using PredictionIO's platform. The advantages of using this platform include: a dynamic engine that responds to queries in real-time; [separation of concerns](http://en.wikipedia.org/wiki/Separation_of_concerns), which offers code re-use and maintainability, and distributed computing capabilities for scalability and efficiency. Moreover, it is easy to incorporate non-trivial data modeling tasks into the DASE architecture allowing Data Scientists to focus on tasks related to modeling. This tutorial will exemplify some of these ideas by guiding you through PredictionIO's [text classification template](/gallery/template-gallery/#natural-language-processing).\n\n\n\n## Prerequisites\n\nBefore getting started, please make sure that you have the latest version of\nApache PredictionIO\n[installed](http://predictionio.apache.org/install/). We emphasize\nhere that this is an engine template written in **Scala** and can be more\ngenerally thought of as an SBT project containing all the necessary components.\n\nYou should also download the engine template named Text Classification Engine\nthat accompanies this tutorial by cloning the template repository:\n\n```\ngit clone https://github.com/apache/predictionio-template-text-classifier.git < Your new engine directory >\n```\n\n\n## Engine Overview\n\nThe engine follows the DASE architecture which we briefly review here. As a user, you are tasked with collecting data for your web or application, and importing it into PredictionIO's Event Server. Once the data is in the server, it  can be read and processed by the engine via the Data Source and Preparation components, respectively. The Algorithm component uses the processed, or prepared, data to train a set of predictive models. Once you have trained these models, you are ready to deploy your engine and respond to real-time queries via the Serving component which combines the results from different fitted models. The Evaluation component is used to compute an appropriate metric to test the performance of a fitted model, as well as aid in the tuning of model hyper parameters.\n\nThis engine template is meant to handle text classification which means you will be working with text data. This means that a query, or newly observed documents, will be of the form:\n\n`{text : String}`.\n\nIn the running example, a query would be an incoming news article. Once the engine is deployed it can process the query, and then return a Predicted Result of the form\n\n`{category : String, confidence : Double}`.\n\nHere category is the model's class assignment for this new text document (i.e. the best guess for this article's categorization), and confidence, a value between 0 and 1 representing your confidence in the category prediction (0 meaning you have no confidence in the prediction). The Actual Result is of the form\n\n`{category : String}`.\n\nThis is used in the evaluation stage when estimating the performance of your predictive model (how well does the model predict categories). Please refer to the [following tutorial](https://predictionio.apache.org/customize/) for a more detailed explanation of how your engine will interact with your web application, as well as an in depth-overview of DASE.\n\n\n## Quick Start\n\nThis is a quick start guide in case you want to start using the engine right away. Sample email data for spam classification will be used. For more detailed information, read the subsequent sections.\n\n\n### 1. Create a new application.\n\nAfter the application is created, you will be given an access key and application ID for the application.\n\n```\n$ pio app new MyTextApp\n```\n\n### 2. Import the tutorial data.\n\nThere are three different data sets available, each giving a different use case for this engine. Please refer to the **Data Source: Reading Event Data** section to see how to appropriate modify the `DataSource` class for use with each respective data set. The default data set is an e-mail spam data set.\n\nThese data sets have already been processed and are ready for [batch import](/datacollection/batchimport/). Replace `***` with your actual application ID.\n\n```\n$ pio import --appid *** --input data/stopwords.json\n\n$ pio import --appid *** --input data/emails.json\n```\n\n### 3. Set the engine parameters in the file `engine.json`.\n\nThe default settings are shown below. By default, it uses the algorithm name \"lr\" which is logistic regression. Please see later section for more detailed explanation of engine.json setting.\n\nMake sure the \"appName\" is same as the app you created in step1.\n\n\n```\n{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.example.textclassification.TextClassificationEngine\",\n  \"datasource\": {\n    \"params\": {\n      \"appName\": \"MyTextApp\"\n    }\n  },\n  \"preparator\": {\n    \"params\": {\n      \"nGram\": 1,\n      \"numFeatures\": 500,\n      \"SPPMI\": false\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"lr\",\n      \"params\": {\n        \"regParam\": 0.00000005\n      }\n    }\n  ]\n}\n\n```\n\n### 4. Build your engine.\n\n```\n$ pio build --verbose\n```\n\nThis command should take few minutes for the first time; all subsequent builds should be less than a minute. You can also run it without `--verbose` if you don't want to see all the log messages.\n\nUpon successful build, you should see a console message similar to the following\n\n```\n[INFO] [RegisterEngine$] Registering engine 6wxDy2hxLbvaMJra927ahFdQHDIVXeQz 266bae678c570dee58154b2338cef7aa1646e0d3\n[INFO] [Console$] Your engine is ready for training.\n```\n\n### 5.a. Train your model and deploy.\n\n```\n$ pio train\n```\n\nWhen your engine is trained successfully, you should see a console message similar to the following.\n\n```\n[INFO] [CoreWorkflow$] Training completed successfully.\n```\n\nNow your engine is ready to deploy. Run:\n\n```\n$ pio deploy\n```\n\nWhen the engine is deployed successfully and running, you should see a console message similar to the following:\n\n```\n[INFO] [HttpListener] Bound to /0.0.0.0:8000\n[INFO] [MasterActor] Engine is deployed and running. Engine API is live at http://0.0.0.0:8000.\n```\n\nNow you can send query to the engine. Open another terminal and send the following http request to the deployed engine:\n\n```\n$ curl -H \"Content-Type: application/json\" -d '{ \"text\":\"I like speed and fast motorcycles.\" }' http://localhost:8000/queries.json\n```\n\nyou should see following outputs returned by the engine:\n\n```\n{\"category\":\"not spam\",\"confidence\":0.852619510921587}\n```\n\nTry another query:\n\n```\n$ curl -H \"Content-Type: application/json\" -d '{ \"text\":\"Earn extra cash!\" }' http://localhost:8000/queries.json\n```\n\nyou should see following outputs returned by the engine:\n\n```\n{\"category\":\"spam\",\"confidence\":0.5268770133242983}\n```\n\n\n### 5.b.Evaluate your training model and tune parameters.\n\n```\n$ pio eval org.example.textclassification.AccuracyEvaluation org.example.textclassification.EngineParamsList\n```\n\n**Note:** Training and evaluation stages are generally different stages of engine development. Evaluation is there to help you choose the best [algorithm parameters](/evaluation/paramtuning/) to use for training an engine that is to be deployed as a web service.\n\nDepending on your needs, in steps (5.x.) above, you can configure your Spark settings by typing a command of the form:\n\n```\n$ pio command command_parameters -- --master url --driver-memory {0}G --executor-memory {1}G --conf spark.akka.framesize={2} --total_executor_cores {3}\n```\n\nOnly the latter commands are listed as these are some of the more commonly\nmodified values. See the [Spark\ndocumentation](https://spark.apache.org/docs/latest/spark-standalone.html) and\nthe [PredictionIO\nFAQ's](http://predictionio.apache.org/resources/faq/) for more\ninformation.\n\n**Note:** We recommend you set your driver memory to `1G` or `2G` as the data size when dealing with text can be very large.\n\n\n# Detailed Explanation of DASE\n\n## Importing Data\n\nIn the quick start, email spam classification is used. This template can easily be modified for other types text classification.\n\nIf you want to import different sets of data, follow the Quick Start instructions to import data from different files. Make sure that the Data Source is modified accordingly to match the `event`, `entityType`, and `properties` fields set for the specific dataset. The following section explains this in more detail.\n\n## Data Source: Reading Event Data\n\nNow that the data has been imported into PredictionIO's Event Server, it needs to be read from storage to be used by the engine. This is precisely what the DataSource engine component is for, which is implemented in the template script `DataSource.scala`. The class `Observation` serves as a wrapper for storing the information about a news document needed to train a model. The attribute label refers to the label of the category a document belongs to, and text, stores the actual document content as a string. The class TrainingData is used to store an RDD of Observation objects along with the set of stop words.\n\nThe class `DataSourceParams` is used to specify the parameters needed to read and prepare the data for processing. This class is initialized with two parameters `appName` and `evalK`. The first parameter specifies your application name (i.e. MyTextApp), which is needed so that the DataSource component knows where to pull the event data from. The second parameter is used for model evaluation and specifies the number of folds to use in [cross-validation](http://en.wikipedia.org/wiki/Cross-validation_%28statistics%29) when estimating a model performance metric.\n\nThe final and most important ingredient is the DataSource class. This is initialized with its corresponding parameter class, and extends `PDataSource`. This **must** implement the method `readTraining` which returns an instance of type TrainingData. This method completely relies on the defined private methods readEventData and readStopWords. Both of these functions read data observations as Event instances, create an RDD containing these events and finally transforms the RDD of events into an object of the appropriate type as seen below:\n\n```scala\n...\nprivate def readEventData(sc: SparkContext) : RDD[Observation] = {\n    //Get RDD of Events.\n    PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"content\"), // specify data entity type\n      eventNames = Some(List(\"e-mail\")) // specify data event name\n\n      // Convert collected RDD of events to and RDD of Observation\n      // objects.\n    )(sc).map(e => {\n      val label : String = e.properties.get[String](\"label\")\n      Observation(\n        if (label == \"spam\") 1.0 else 0.0,\n        e.properties.get[String](\"text\"),\n        label\n      )\n    }).cache\n  }\n\n  // Helper function used to store stop words from\n  // event server.\n  private def readStopWords(sc : SparkContext) : Set[String] = {\n    PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"resource\"),\n      eventNames = Some(List(\"stopwords\"))\n\n    //Convert collected RDD of strings to a string set.\n    )(sc)\n      .map(e => e.properties.get[String](\"word\"))\n      .collect\n      .toSet\n  }\n...\n```\n\nNote that `readEventData` and `readStopWords` use different entity types and event names, but use the same application name. This is because the sample import script imports two different data types, documents and stop words. These field distinctions are required for distinguishing between the two. The method `readEval` is used to prepare the different cross-validation folds needed for evaluating your model and tuning hyper parameters.\n\nNow, the default dataset used for training is contained in the file `data/emails.json` and contains a set of e-mail spam data. If we want to switch over to one of the other data sets we must make sure that the `eventNames` and `entityType` fields are changed accordingly.\n\nIn the data/ directory, you will find different sets of data files for different types of text classificaiton application. The following show one observation from each of the provided data files:\n\n- `emails.json`:\n\n```\n{\"eventTime\": \"2015-06-08T16:45:00.590+0000\", \"entityId\": 1, \"properties\": {\"text\": \"Subject: dobmeos with hgh my energy level has gone up ! stukm\\nintroducing\\ndoctor - formulated\\nhgh\\nhuman growth hormone - also called hgh\\nis referred to in medical science as the master hormone . it is very plentiful\\nwhen we are young , but near the age of twenty - one our bodies begin to produce\\nless of it . by the time we are forty nearly everyone is deficient in hgh ,\\nand at eighty our production has normally diminished at least 90 - 95 % .\\nadvantages of hgh :\\n- increased muscle strength\\n- loss in body fat\\n- increased bone density\\n- lower blood pressure\\n- quickens wound healing\\n- reduces cellulite\\n- improved vision\\n- wrinkle disappearance\\n- increased skin thickness texture\\n- increased energy levels\\n- improved sleep and emotional stability\\n- improved memory and mental alertness\\n- increased sexual potency\\n- resistance to common illness\\n- strengthened heart muscle\\n- controlled cholesterol\\n- controlled mood swings\\n- new hair growth and color restore\\nread\\nmore at this website\\nunsubscribe\\n\", \"label\": \"spam\"}, \"event\": \"e-mail\", \"entityType\": \"content\"}\n\n```\n\n- `20newsgroups.json`:\n\n```\n{\"entityType\": \"source\", \"eventTime\": \"2015-06-08T18:01:55.003+0000\", \"event\": \"documents\", \"entityId\": 1, \"properties\": {\"category\": \"sci.crypt\", \"text\": \"From: rj@ri.cadre.com (Rob deFriesse)\\nSubject: Can DES code be shipped to Canada?\\nArticle-I.D.: fripp.1993Apr22.125402.27561\\nReply-To: rj@ri.cadre.com\\nOrganization: Cadre Technologies Inc.\\nLines: 13\\nNntp-Posting-Host: 192.9.200.19\\n\\nSomeone in Canada asked me to send him some public domain DES file\\nencryption code I have.  Is it legal for me to send it?\\n\\nThanx.\\n--\\nEschew Obfuscation\\n\\nRob deFriesse                    Mail:  rj@ri.cadre.com\\nCadre Technologies Inc.          Phone:  (401) 351-5950\\n222 Richmond St.                 Fax:    (401) 351-7380\\nProvidence, RI  02903\\n\\nI don't speak for my employer.\\n\", \"label\": 11.0}}\n```\n\n- `sentimentanalysis.json`:\n\n```\n{\"eventTime\": \"2015-06-08T16:58:14.278+0000\", \"entityId\": 23714, \"entityType\": \"source\", \"properties\": {\"phrase\": \"Tosca 's intoxicating ardor\", \"sentiment\": 3}, \"event\": \"phrases\"}\n```\n\nNow, note that the `entityType`, `event`, and `properties`  fields for the `20newsgroups.json` dataset differ from the default `emails.json` set. Default DataSource implementation is to read from `email.json` data set. If you want to use others such as newsgroups data set, the engine's Data Source component must be modified accordingly. To do this, you need only modify the method `readEventData` as follows:\n\n### Modify DataSource to Read `20newsgroups.json`\n\n```scala\nprivate def readEventData(sc: SparkContext) : RDD[Observation] = {\n    //Get RDD of Events.\n    PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"source\"), // specify data entity type\n      eventNames = Some(List(\"documents\")) // specify data event name\n\n      // Convert collected RDD of events to and RDD of Observation\n      // objects.\n    )(sc).map(e => {\n\n      Observation(\n        e.properties.get[Double](\"label\"),\n        e.properties.get[String](\"text\"),\n        e.properties.get[String](\"category\")\n      )\n    }).cache\n  }\n```\n\n### Modify DataSource to Read `sentimentanalysis.json`\n\n```scala\nprivate def readEventData(sc: SparkContext) : RDD[Observation] = {\n    //Get RDD of Events.\n    PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"source\"), // specify data entity type\n      eventNames = Some(List(\"phrases\")) // specify data event name\n\n      // Convert collected RDD of events to and RDD of Observation\n      // objects.\n    )(sc).map(e => {\n      val label = e.properties.get[Double](\"sentiment\")\n\n      Observation(\n        label,\n        e.properties.get[String](\"phrase\"),\n        label.toString\n      )\n    }).cache\n  }\n```\n\nNote that `event` field in the json file refers to the `eventNames` field in the `readEventData` method. When using this engine with a custom data set, you need to make sure that the respective json fields match with the corresponding fields in the DataSource component. We have included a data sanity check with this engine component that lets you know if your data is actually being read in. If you have 0 observations being read, you should see the following output when your training process performs the Training Data sanity check:\n\n`Data set is empty, make sure event fields match imported data.`\n\nThis data sanity check is a PredictionIO feature available for your `TrainingData` and `PreparedData` classes. The following code block demonstrates how the sanity check is implemented:\n\n```scala\nclass TrainingData(\n  val data : RDD[Observation],\n  val stopWords : Set[String]\n) extends Serializable with SanityCheck {\n\n  // Sanity check to make sure your data is being fed in correctly.\n\n  def sanityCheck {\n    try {\n      val obs : Array[Double] = data.takeSample(false, 5).map(_.label)\n\n      println()\n      (0 until 5).foreach(\n        k => println(\"Observation \" + (k + 1) +\" label: \" + obs(k))\n      )\n      println()\n    } catch {\n      case (e : ArrayIndexOutOfBoundsException) => {\n        println()\n        println(\"Data set is empty, make sure event fields match imported data.\")\n        println()\n      }\n    }\n\n  }\n\n}\n```\n\n## Preparator : Data Processing With DASE\n\nRecall that the Preparator stage is used for doing any prior data processing needed to fit a predictive model. In line with the separation of concerns, the Data Model implementation, PreparedData, is built to do the heavy lifting needed for this data processing. The Preparator must simply implement the prepare method which outputs an object of type PreparedData. This requires you to specify two n-gram window components, and two inverse i.d.f. window components (these terms will be defined in the following section). Therefore a custom class of parameters for the Preparator component, PreparatorParams, must be incorporated. The code defining the full Preparator component is given below:\n\n```scala\n// 1. Initialize Preparator parameters. Recall that for our data\n// representation we are only required to input the n-gram window\n// components.\n\ncase class PreparatorParams(\n  nGram: Int,\n  numFeatures: Int = 5000,\n  SPPMI: Boolean\n) extends Params\n\n\n\n// 2. Initialize your Preparator class.\n\nclass Preparator(pp: PreparatorParams) extends PPreparator[TrainingData, PreparedData] {\n\n  // Prepare your training data.\n  def prepare(sc : SparkContext, td: TrainingData): PreparedData = {\n    new PreparedData(td, pp.nGram)\n  }\n}\n\n```\n\nThe simplicity of this stage implementation truly exemplifies one of the benefits of using the PredictionIO platform. For developers, it is easy to incorporate different classes and tools into the DASE framework so that the process of creating an engine is greatly simplified which helps increase your productivity. For data scientists, the load of implementation details you need to worry about is minimized so that you can focus on what is important to you: training a good predictive model.\n\nThe following subsection explains the class PreparedData, which actually handles the transformation of text documents to feature vectors.\n\n### PreparedData: Text Vectorization and Feature Reduction\n\nThe Scala class PreparedData which takes the parameters td, nGram, where td is an object of class TrainingData. The other parameter specifies the n-gram parametrization which will be described shortly.\n\nIt will be easier to explain the preparation process with an example, so consider the document \\\\(d\\\\):\n\n`\"Hello, my name is Marco.\"`\n\nThe first thing you need to do is break up \\\\(d\\\\) into an array of \"allowed tokens.\" You can think of a token as a terminating sequence of characters that exist in a document (think of a word in a sentence). For example, the list of tokens that appear in \\\\(d\\\\) is:\n\n```scala\nval A = Array(\"Hello\", \",\", \"my\",  \"name\", \"is\", \"Marco\", \".\")\n```\n\nRecall that a set of stop words was also imported in the previous sections. This set of stop words contains all the words (or tokens) that you do not want to include once documents are tokenized. Those tokens that appear in \\\\(d\\\\) and are not contained in the set of stop words will be called allowed tokens. So, if the set of stop words is `{\"my\", \"is\"}`, then the list of allowed tokens appearing in \\\\(d\\\\) is:\n\n```scala\nval A = Array(\"Hello\", \",\",  \"name\", \"Marco\", \".\")\n```\n\nThe next step in the data representation is to take the array of allowed tokens and extract a set of n-grams and a corresponding value indicating the number of times a given n-gram appears. The set of n-grams for n equal to 1 and 2 in the running example is the set of elements of the form `[A(`\\\\(i\\\\)`)]` and `[A(`\\\\(j\\\\)`), A(`\\\\(j + 1\\\\)`)]`, respectively. In the general case, the set of n-grams extracted from an array of allowed tokens `A` will be of the form `[A(`\\\\(i\\\\)`), A(`\\\\(i + 1\\\\)`), ..., A(`\\\\(i + n - 1\\\\)`)]` for \\\\(i = 0, 1, 2, ...,\\\\) `A.size` \\\\(- n\\\\). You can set `n` with the `nGram` parameter option in your `PreparatorParams`.\n\nWe use MLLib's `HashingTF` class to implement the conversion from text to term frequency vectors, and can be seen in the following method of the class `PreparedData`:\n\n```scala\n...\n   // 1. Hashing function: Text -> term frequency vector.\n\n  private val hasher = new HashingTF()\n\n  private def hashTF (text : String) : Vector = {\n    val newList : Array[String] = text.split(\" \")\n    .sliding(nGram)\n    .map(_.mkString)\n    .toArray\n\n    hasher.transform(newList)\n  }\n\n  // 2. Term frequency vector -> t.f.-i.d.f. vector.\n\n  val idf : IDFModel = new IDF().fit(td.data.map(e => hashTF(e.text)))\n...\n```\n\nThe next step is, once all of the observations have been hashed, to collect all n-grams and compute their corresponding [t.f.-i.d.f. value](http://en.wikipedia.org/wiki/Tf%E2%80%93idf). The t.f.-i.d.f. transformation is defined for n-grams, and helps to give less weight to those n-grams that appear with high frequency across all documents, and vice versa. This helps to leverage the predictive power of those words that appear rarely, but can make a big difference in the categorization of a given text document. This is implemented using MLLib's `IDF` and `IDFModel` classes:\n\n```scala\n// 2. Term frequency vector -> t.f.-i.d.f. vector.\n\n  val idf : IDFModel = new IDF().fit(td.data.map(e => hashTF(e.text)))\n```\n\n\nThe last two functions that will be mentioned are the methods you will actually use for the data transformation. The method transform takes a document and outputs a sparse vector (MLLib implementation). The transformData method simply transforms the TrainingData input (a corpus of documents) into a set of vectors that can now be used for training. The method transform is used both to transform the training data and future queries.\n\n```scala\n...\n// 3. Document Transformer: text => tf-idf vector.\n\n  def transform(text : String): Vector = {\n    // Map(n-gram -> document tf)\n    idf.transform(hashTF(text))\n  }\n\n\n  // 4. Data Transformer: RDD[documents] => RDD[LabeledPoints]\n\n  val transformedData: RDD[(LabeledPoint)] = {\n    td.data.map(e => LabeledPoint(e.label, transform(e.text)))\n  }\n```\n\nThe last and final object implemented in this class simply creates a Map with keys being class labels and values, the corresponding category.\n\n```scala\n // 5. Finally extract category map, associating label to category.\n  val categoryMap = td.data.map(e => (e.label, e.category)).collectAsMap\n```\n\n\n## Algorithm Component\n\nThe algorithm components in this engine, `NBAlgorithm` and `LRAlgorithm`, actually follows a very general form. Firstly, a parameter class must again be initialized to feed in the corresponding Algorithm model parameters. For example, NBAlgorithm incorporates NBAlgorithmParams which holds the appropriate additive smoothing parameter lambda for the Naive Bayes model.\n\n\nThe main class of interest in this component is the class that extends [P2LAlgorithm](https://predictionio.apache.org/api/current/#org.apache.predictionio.controller.P2LAlgorithm). This class must implement a method named train which will output your predictive model (as a concrete object, this will be implemented via a Scala  class). It must also implement a predict method that transforms a query to an appropriate feature vector, and uses this to predict with the fitted model. The vectorization function is implemented by a PreparedData object, and the categorization (prediction) is handled by an instance of the NBModel implementation. Again, this demonstrates the facility with which different models can be incorporated into PredictionIO's DASE architecture.\n\nThe model class itself will be discussed in the following section, however, turn your attention to the TextManipulationEngine object defined in the script `Engine.scala`. You can see here that the engine is initialized by specifying the DataSource, Preparator, and Serving classes, as well as a Map of algorithm names to Algorithm classes. This tells the engine which algorithms to run. In practice, you can have as many statistical learning models as you'd like, you simply have to implement a new algorithm component to do this. However, this general design form will persist, and the main meat of the work should be in the implementation of your model class.\n\nThe following subsection will go over our Naive Bayes implementation in NBModel.\n\n\n### Naive Bayes Classification\n\nThis Training Model class only uses the Multinomial Naive Bayes [implementation](https://spark.apache.org/docs/latest/mllib-naive-bayes.html) found in the Spark MLLib library. However, recall that the predicted results required in the specifications listed in the overview are of the form:\n\n\n`{category: String, confidence: Double}`.\n\nThe confidence value should really be interpreted as the probability that a document belongs to a category given its vectorized form. Note that MLLib's Naive Bayes model has the class members pi (\\\\(\\pi\\\\)), and theta (\\\\(\\theta\\\\)). \\\\(\\pi\\\\) is a vector of log prior class probabilities, which shows your prior beliefs regarding the probability that an arbitrary document belongs in a category. \\\\(\\theta\\\\) is a C \\\\(\\times\\\\) D matrix, where C is the number of classes, and D, the number of features, giving the log probabilities that parametrize the Multinomial likelihood model assumed for each class. The multinomial model is easiest to think about as a problem of randomly throwing balls into bins, where the ball lands in each bin with a certain probability. The model treats each n-gram as a bin, and the corresponding t.f.-i.d.f. value as the number of balls thrown into it. The likelihood is the probability of observing a (vectorized) document given that it comes from a particular class.\n\nNow, letting \\\\(\\mathbf{x}\\\\) be a vectorized text document, then it can be shown that the vector\n\n$$\n\\frac{\\exp\\left(\\pi + \\theta\\mathbf{x}\\right)}{\\left|\\left|\\exp\\left(\\pi + \\theta\\mathbf{x}\\right)\\right|\\right|}\n$$\n\nis a vector with C components that represent the posterior class membership probabilities for the document given \\\\(\\mathbf{x}\\\\). That is, the update belief regarding what category this document belongs to after observing its vectorized form. This is the motivation behind defining the class NBModel which uses Spark MLLib's NaiveBayesModel, but implements a separate prediction method.\n\nThe private methods innerProduct and getScores are implemented to do the matrix computation above.\n\n```scala\n...\n // 2. Set up linear algebra framework.\n\n  private def innerProduct (x : Array[Double], y : Array[Double]) : Double = {\n    x.zip(y).map(e => e._1 * e._2).sum\n  }\n\n  val normalize = (u: Array[Double]) => {\n    val uSum = u.sum\n\n    u.map(e => e / uSum)\n  }\n\n\n\n  // 3. Given a document string, return a vector of corresponding\n  // class membership probabilities.\n\n  private def getScores(doc: String): Array[Double] = {\n    // Helper function used to normalize probability scores.\n    // Returns an object of type Array[Double]\n\n    // Vectorize query,\n    val x: Vector = pd.transform(doc)\n\n    normalize(\n      nb.pi\n      .zip(nb.theta)\n      .map(\n      e => exp(innerProduct(e._2, x.toArray) + e._1))\n    )\n  }\n...\n```\n\n\nOnce you have a vector of class probabilities, you can classify the text document to the category with highest posterior probability, and, finally, return both the category as well as the probability of belonging to that category (i.e. the confidence in the prediction) given the observed data. This is implemented in the method predict.\n\n```scala\n...\n  // 4. Implement predict method for our model using\n  // the prediction rule given in tutorial.\n\n  def predict(doc : String) : PredictedResult = {\n    val x: Array[Double] = getScores(doc)\n    val y: (Double, Double) = (nb.labels zip x).maxBy(_._2)\n    PredictedResult(pd.categoryMap.getOrElse(y._1, \"\"), y._2)\n  }\n```\n\n### Logistic Regression Classification\n\nTo use the alternative multinomial logistic regression algorithm change your `engine.json` as follows:\n\n```json\n  {\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.example.textclassification.TextClassificationEngine\",\n  \"datasource\": {\n    \"params\": {\n      \"appName\": \"MyTextApp\"\n    }\n  },\n  \"preparator\": {\n    \"params\": {\n      \"nGram\": 2\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"regParam\",\n      \"params\": {\n        \"regParam\": 0.1\n      }\n    }\n  ]\n}\n```\n\n\n## Serving: Delivering the Final Prediction\n\nThe serving component is the final stage in the engine, and in a sense, the most important. This is the final stage in which you combine the results obtained from the different models you choose to run. The Serving class extends the [LServing](https://predictionio.apache.org/api/current/#org.apache.predictionio.controller.LServing) class which must implement a method called serve. This takes a query and an associated sequence of predicted results, which contains the predicted results from the different algorithms that are implemented in your engine, and combines the results to yield a final prediction.  It is this final prediction that you will receive after sending a query.\n\nFor example, you could choose to slightly modify the implementation to return class probabilities coming from a mixture of model estimates for class probabilities, or any other technique you could conceive for combining your results. The default engine setting has this set to yield the label from the model predicting with greater confidence.\n\n\n\n## Evaluation: Model Assessment and Selection\n\n A predictive model needs to be evaluated to see how it will generalize to future observations. PredictionIO uses cross-validation to perform model performance metric estimates needed to assess your particular choice of model. The script `Evaluation.scala` available with the engine template exemplifies what a usual evaluator setup will look like. First, you must define an appropriate metric. In the engine template, since the topic is text classification, the default metric implemented is category accuracy.\n\n Second you must define an evaluation object (i.e. extends the class [Evaluation](https://predictionio.apache.org/api/current/#org.apache.predictionio.controller.Evaluation)).\nHere, you must specify the actual engine and metric components that are to be used for the evaluation. In the engine template, the specified engine is the TextManipulationEngine object, and metric, Accuracy. Lastly, you must specify the parameter values that you want to test in the cross validation. You see in the following block of code:\n\n```scala\nobject EngineParamsList extends EngineParamsGenerator {\n\n  // Set data source and preparator parameters.\n  private[this] val baseEP = EngineParams(\n    dataSourceParams = DataSourceParams(appName = \"marco-MyTextApp\", evalK = Some(5)),\n    preparatorParams = PreparatorParams(nMin = 1, nMax = 2)\n  )\n\n  // Set the algorithm params for which we will assess an accuracy score.\n  engineParamsList = Seq(\n    baseEP.copy(algorithmParamsList = Seq((\"nb\", NBAlgorithmParams(0.5)))),\n    baseEP.copy(algorithmParamsList = Seq((\"nb\", NBAlgorithmParams(1.5)))),\n    baseEP.copy(algorithmParamsList = Seq((\"nb\", NBAlgorithmParams(5))))\n  )\n```\n\n\n## Engine Deployment\n\nOnce an engine is ready for deployment it can interact with your web application in real-time. This section will cover how to send and receive queries from your engine, gather more data, and re-training your model with the newly gathered data.\n\n### Sending Queries\n\nRecall that one of the greatest advantages of using the PredictionIO platform is that once your engine is deployed, you can respond to queries in real-time. Recall that our queries are of the form\n\n`{\"text\" : \"...\"}`.\n\nTo actually send a query you can use our REST API by typing in the following shell command:\n\n```\ncurl -H \"Content-Type: application/json\" -d '{ \"text\":\"I like speed and fast motorcycles.\" }' http://localhost:8000/queries.json\n```\n\nThere are a number of [SDK's](https://github.com/PredictionIO) you can use to send your queries and obtain a response. Recall that our predicted response is of the form\n\n```\n{\"category\" : \"class\", \"confidence\" : 1.0}\n```\n\nwhich is what you should see upon inputting the latter command for querying.\n\n### Gathering More Data and Retraining Your Model\n\nThe importing data section that is included in this tutorial uses a sample data set for illustration purposes, and uses the PredictionIO Python SDK to import the data. However, there are a variety of ways that you can [import](<%= url_root %>/datacollection/eventapi/) your collected data (via REST or other SDKs).\n\n\nAs you continue to collect your data, it is quite easy to retrain your model once you actually import your data into the Event Server. You simply repeat the steps listed in the Quick Start guide. We re-list them here again:\n\n\n**1.** Build your engine.\n\n```\n$ pio build\n```\n\n**2.a.** Evaluate your training model and tune parameters.\n\n```\n$ pio eval org.example.textclassification.AccuracyEvaluation org.example.textclassification.EngineParamsList\n```\n\n**2.b.** Train your model and deploy.\n\n```\n$ pio train\n$ pio deploy\n```\n"
  },
  {
    "path": "docs/manual/source/deploy/engineparams.html.md",
    "content": "---\ntitle: Setting Engine Parameters\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n(coming soon)\n"
  },
  {
    "path": "docs/manual/source/deploy/enginevariants.html.md",
    "content": "---\ntitle: Deploying Multiple Engine Variants\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n(coming soon)\n"
  },
  {
    "path": "docs/manual/source/deploy/index.html.md",
    "content": "---\ntitle: Deploying an Engine\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nAn engine must be **built** (i.e. `pio build`) and **trained** (i.e. `pio\ntrain`)  before it can be deployed as a web service.\n\nWARNING: The engine server is not protected by authentication, and the\ninstructions below assume deployment in a trusted environment.\n\n## Deploying an Engine the First Time\n\nAfter you have [downloaded an Engine Template](/start/download/),  you can deploy it with these steps:\n\n1. Run `pio app new **your-app-name-here**` and specify the `appName` used in the template's *engine.json* file (you can set it there to your preference).\n2. Run `pio build` to update the engine\n3. Run `pio train` to train a predictive model with training data\n4. Run `pio deploy` to deploy the engine as a service\n\nINFO: See [here](/cli/#engine-commands) to know details of command-line interface for the engine server.\n\nA deployed engine listens to port 8000 by default. Your application can [send query to retrieve prediction](/appintegration/) in real-time through the REST interface.\n\n**Note**: a new engine deployed as above will have no data to start with. Your engine may  come with a `data/` directory with some sample data that you can import, not all have this. Check the quickstart instructions for your template.\n\n## Update Model with New Data\n\nYou probably want to update the trained predictive model with newly collected data regularly.\nTo do so, run the `pio train` and `pio deploy` commands again:\n\n```\n$ pio train\n$ pio deploy\n```\n\nFor example, if you want to re-train the model every day, you may add this to your *crontab*:\n\n```\n0 0 * * *   $PIO_HOME/bin/pio train; $PIO_HOME/bin/pio deploy\n```\nwhere *$PIO_HOME* is the installation path of PredictionIO. See [Retrain and Deploy Script](#retrain-and-deploy-script) below for a script ready for customization.\n\n\n## Specify a Different Engine Port\n\nBy default, `pio deploy` deploys an engine on **port 8000**.\n\nYou can specify another port with an *--port* argument. For example, to deploy on port 8123\n\n```\npio deploy --port 8123\n```\n\nYou can also specify the binding IP with *--ip*, which is set to *localhost* if not specified. For example:\n\n```\npio deploy --port 8123 --ip 1.2.3.4\n```\n\n## Retrain and Deploy Script\n\nA retrain and deploy script is available [in the *examples/redeploy-script*\ndirectory](https://github.com/apache/predictionio/tree/develop/examples/redeploy-script).\n\nTo use the script, copy *local.sh.template* as *local.sh*, *redeploy.sh* as (say) *MyEngine_Redeploy_(production).sh* (Name of the script will appear as title of email) and put both files under the *scripts/* directory of your engine.\nThen, modify the settings inside both file, filling in details like `PIO_HOME`, `LOG_DIR`, `TARGET_EMAIL`, `ENGINE_JSON` and others.\nYou need to do `pio build` once before using this script. This script only trains and deploys.\nIf `pio train` or `pio deploy` fails for some reason, the running engine stays put in most cases.\nIf engine is retrained and deployed successfully, the email sent will have *Normal* in the title so you can set filtering rules.\n\n`mailutils` is used in this script. For Ubuntu, you can do `sudo update-alternatives --config mailx` and see if `/usr/bin/mail.mailutils` is selected.\nIf you are using a server that blocks email, you will need to use services like SendGrid.\n\nThis script does not guarantee no down time since at some point during `pio deploy` the original engine is shut down.\nThe down time is usually not more than a few seconds though it can be more.\n\nThe last thing to do is to add this to your *crontab*:\n\n```\n0 0 * * *   /path/to/script >/dev/null 2>/dev/null # mute both stdout and stderr to suppress email sent from cron\n```\n"
  },
  {
    "path": "docs/manual/source/deploy/monitoring.html.md",
    "content": "---\ntitle: Monitoring an Engine\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nIf you're using PredictionIO in a production setting, you'll want some way to make sure it is always up. [Monit](https://mmonit.com/monit/) is a tool which will monitor important processes and programs. This guide will show how to set up monit on your PredictionIO server to keep an engine always up and running.\n\nYou can install monit on ubuntu with\n\n```bash\nsudo apt-get install monit\n```\n\n##Configure Basics\nNow we can configure monit by the configuration file  `/etc/monit/monitrc` with your favorite editor. You will notice that this file contains quite a bit already, most of which is commented instructions/examples.\n\nFirst, choose the interval on which you want monit to check the status of your system. Use the `set daemon` command for this, it should already exist in the configuration file.\n\n```\nset daemon 60 #checks at 1-minute intervals\n```\n\nThe `check system` block should also already be present, under the services block.\n\n```\n  check system 127.0.0.1\n    if memory usage > 75% then alert\n    if swap usage > 25% then alert\n    if loadavg (1min) > 4 then alert\n    if loadavg (5min) > 2 then alert\n    if cpu usage (user) > 70% then alert\n    if cpu usage (system) > 30% then alert\n    if cpu usage (wait) > 20% then alert\n```\n\nYou might also want to configure the built in web server.\n\n```\nset httpd port 2812\n     allow admin:yourpassword      # require user 'admin' with password 'yourpassword'\n```\nMore examples on configuring the web server are included in the default config file.\n\nConfiguration blocks for common services like apache, nginx, or PostgreSQL can be found [here](http://www.stuartellis.eu/articles/monit/)\n\n##Configure for PredictionIO\n###Event Server\nNow the interesting stuff, lets add monitoring for the event server.\n\n```     \ncheck process eventserver\n\tmatching \"Console eventserver\"\n        start program = \"/etc/monit/modebug /home/ubuntu/event_scripts.sh start\"\n        stop program = \"/etc/monit/modebug /home/ubuntu/event_scripts.sh stop\"\n        if cpu usage > 95% for 10 cycles then restart\n```\nThis block references a script, event_scripts.sh. This script tell monit how to restart the engine and event server if they go down.\n\nThe script might differ slightly depending on your environment but it should look something like what is shown below. Assume SimilarProduct is the your pio app directory.\n\n```bash\n#!/bin/bash\n case $1 in\n    start)\n       cd /home/ubuntu/SimilarProduct/\n       nohup /opt/PredictionIO/bin/pio eventserver > /home/ubuntu/events.log &\n       ;;\n     stop)\n       event_pid=`pgrep -f \"Console eventserver\"`\n       kill \"$event_pid\"\n       ;;\n     *)\n esac\n exit 0\n```\nNote that this is dumping output to an events log at `/home/ubuntu/events.log`. Also, be sure that this file is executable with `sudo chmod +x event_scripts.sh`\n\n###Engine\nThe first step here is similar to checking the engine process.\n\n```\ncheck process pioengine\n        matching \"Console deploy\"\n        start program = \"/etc/monit/modebug /home/ubuntu/engine_scripts.sh start\"\n        stop program = \"/etc/monit/modebug /home/ubuntu/engine_scripts.sh stop\"\n        if cpu usage > 95% for 10 cycles then restart\n```\nBe sure to adjust your deploy command to your environment (driver-memry, postgres jar path)\n\n```bash\n#!/bin/bash\n case $1 in\n    start)\n       cd /home/ubuntu/SimilarProduct/\n       nohup /opt/PredictionIO/bin/pio deploy -- --driver-class-path /home/ubuntu/postgresql-9.4.1208.jre6.jar --driver-memory 16G > /home/ubuntu/deploy.log &\n       ;;\n     stop)\n       deploy_pid=`pgrep -f \"Console deploy\"`\n       kill \"$deploy_pid\"\n       ;;\n     *)\n esac\n exit 0\n```\n\nThere can be  cases when the process is running but the engine is down however. If the Akka HTTP REST API used by PredictionIO crashes, the engine process continues but the engine to fail when queried.\n\nThis sort of crash can be taken care of by using monits `check program` capability.\n\n```\ncheck program pioengine-http with path \"/etc/monit/bin/check_engine.sh\"\n        start program = \"/etc/monit/modebug /home/ubuntu/engine_scripts.sh start\"\n        stop program = \"/etc/monit/modebug /home/ubuntu/engine_scripts.sh stop\"\n\tif status != 1\n\tthen restart\n```\nThis block executes the script at /etc/monit/bin/check_engine.sh and reads the exit status. Depending on the exit status, the block can run a restart script. The restart script can be the same as what is used in the process monitor, but we need a check_engine script.\n\n```bash\n#!/bin/bash\n# source: /etc/monit/bin/check_engine.sh\nurl=\"http://127.0.0.1:8000/queries.json\"\ncheck_string=\"itemScores\"\nresponse=$(curl -H \"Content-Type: application/json\" -d '{ \"user\": \"1\", \"num\": 0}' $url)\n\nif [[ \"$response\" =~ \"$check_string\" ]]\nthen\n  exit 1\nelse\n  exit 0\nfi\n```\nThis script does a curl request and checks the response. In this example, a user known to  exist is used and then check  make sure the json returned has \"itemScores\". This can vary between use cases but the idea should be similar.\n\nAgain, make sure this file is executable.\n\n##Start it All Up\nNow we can get monit running with\n\n```bash\nsudo service monit restart\n```\n\nNavigate to http://\\<your ip\\>:2812/ to check out your status page\n\n![monit screen](/images/monit.png)\n\n##Testing\nTo test, try killing your deployed engine or event server and see if monit brings it back up. You can even use the scripts we described above to do this\n\n```\nsudo ./engine_scripts.sh stop\n```\n\nRemember that monit checks only as often as you tell it to, so it may need a few minutes.\n"
  },
  {
    "path": "docs/manual/source/deploy/plugin.html.md",
    "content": "---\ntitle: Engine Server Plugin\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nYou can write engine server plugins to handle output data. For example, it's able to transform or log prediction result. There are two types of engine server plugin.\n\n- `Output Blocker`: Before predictions go out, they will be processed through all loaded and active plugins. The order of processing is not defined. They are useful for transforming prediction results (e.g. if you do not have access to engine source code).\n- `Output Sniffer`: These should have similar benefits with event server sniffers.\n\n## Create an engine server plugin\n\nAt first, create a sbt project with following `build.sbt`:\n\n```scala\nname := \"pio-plugin-example\"\nversion := \"1.0\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies += \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\"\n```\n\nEngine server plug-ins must extend `EngineServerPlugin`. Here is an example of engine server plug-in:\n\n```scala\npackage com.example\n\nimport org.apache.predictionio.data.storage.EngineInstance\nimport org.apache.predictionio.workflow._\nimport org.json4s.JValue\n\nclass MyEngineServerPlugin extends EngineServerPlugin {\n  val pluginName = \"my-engineserver-plugin\"\n  val pluginDescription = \"an example of engine server plug-in\"\n  \n  // inputBlocker or inputSniffer\n  val pluginType = EngineServerPlugin.outputBlocker\t\n  \n  // Plug-in can handle output data in this method.\n  override def process(\n      engineInstance: EngineInstance,\n      query: JValue,\n      prediction: JValue,\n      context: EngineServerPluginContext): JValue = {\n    println(prediction)\n    prediction\n  }\n\n  // Plug-in can handle requests to /plugins/<pluginType>/<pluginName>/* \n  // on the engine server in this method.\n  override def handleREST(arguments: Seq[String]): String = {\n     \"\"\"{\"pluginName\": \"my-engineserver-plugin\"}\"\"\"\n  }\n}\n```\n\nPlug-ins are loaded by `ServiceLoader`, so you must create `META-INF/services/org.apache.predictionio.workflow.EngineServerPlugin` with a following content:\t\n\n```\ncom.example.MyEngineServerPlugin\n```\n\nThen, run `sbt package` to package plugin as a jar file. In this case, the plugin jar file is generated at `target/scala-2.11/pio-plugin-example_2.11-1.0.jar`, so copy this file to `PIO_HOME/plugins`.\n\nTo enable plugins, you have to modify `engine.json` in the root directory of your engine as follows. Defined plugins parameters can be accessed via `EngineServerPluginContext` in plugins.\n\n```json\n{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.example.recommendation.RecommendationEngine\",\n  \"plugins\": {\n    \"my-engineserver-plugin\": {\n      \"enabled\": true\n    }\n  },\n  ...\n}\n```\n\nWhen you start (or restart) the engine server, this plugin should be enabled.\n\n## Plugin APIs of engine server\n\nThe engine server has some plugins related APIs:\n\n- `/plugins.json`: Show all enabled plugins.\n- `/plugins/outputblocker/<pluginName>/*`: Handled by a corresponding output blocker plugin.\n- `/plugins/outputsniffer/<pluginName>/*`: Handled by a corresponding output sniffer plugin.\n\nFor example, if you send following request to the engine server:\n\t\n```\ncurl -XGET http://localhost:7070/plugins.json?accessKey=$ACCESS_KEY\n```\n\nThe engine server should respond following JSON response:\n\t\n```json\n{\n  \"plugins\": {\n    \"outputblockers\": {\n      \"my-engineserver-plugin\": {\n        \"name\": \"my-engineserver-plugin\",\n        \"description\": \"an example of engine server plug-in\",\n        \"class\": \"com.example.MyEngineServerPlugin\",\n        \"params\": {\n          \"enabled\": true\n        }\n      }\n    },\n    \"outputsniffers\": {}\n  }\n}\n```"
  },
  {
    "path": "docs/manual/source/evaluation/evaluationdashboard.html.md",
    "content": "---\ntitle: Evaluation Dashboard\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nWARNING: This is an experimental development tool, which exposes environment variables and other sensitive information about the PredictionIO application (e.g. storage configs, credentials etc.). It is not recommended to be run in production.\n\nPredictionIO provides a web dashboard which allows you to see previous\nevaluation and a drill down page about each evaluation. It is particularly\nuseful when we ran multiple [hyperparameter tunings](/evaluation/paramtuning/)\nas we may easily lose track of all the engine variants evaluated.\n\nWe can start the dashboard with the following command:\n\n```\n$ pio dashboard\n```\n\nThe dashboard lists out all completed evaluations in a reversed chronological\norder. A high level description of each evaluation can be seen directly from the\ndashboard. We can also click on the *HTML* button to see the evaluation drill\ndown page.\n\n*Note:* The dashboard server has SSL enabled and is authenticated by a key passed as a query string param `accessKey`. The configuration is in `conf/server.conf`\n"
  },
  {
    "path": "docs/manual/source/evaluation/history.html.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n"
  },
  {
    "path": "docs/manual/source/evaluation/index.html.md",
    "content": "---\ntitle: Tuning and Evaluation\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nPredictionIO's evaluation module allows you to streamline the process of\ntesting lots of knobs in engine parameters and deploy the best one out\nof it using statistically sound cross-validation methods.\n\nThere are two key components:\n\n### Engine\n\nIt is our evaluation target. During evaluation, in addition to\nthe *train* and *deploy* mode we describe in earlier sections,\nthe engine also generates a list of testing data points. These data\npoints are a sequence of *Query* and *Actual Result* tuples. *Queries* are\nsent to the engine and the engine responds with a *Predicted Result*,\nin the same way as how the engine serves a query.\n\n### Evaluator\n\nThe evaluator joins the sequence of *Query*, *Predicted Result*, and *Actual Result*\ntogether and evaluates the quality of the engine.\nPredictionIO enables you to implement any metric with just a few lines of code.\n\n![PredictionIO Evaluation Overview](/images/engine-evaluation.png)\n\nWe will discuss various aspects of evaluation with PredictionIO.\n\n- [Hyperparameter Tuning](/evaluation/paramtuning/) - it is an end-to-end example\n  of using PredictionIO evaluation module to select and deploy the best engine\n  parameter.\n- [Evaluation Dashboard](/evaluation/evaluationdashboard/) - it is the dashboard\n  where you can see a detailed breakdown of all previous evaluations.\n- [Choosing Evaluation Metrics](/evaluation/metricchoose/) - we cover some basic\n  machine learning metrics\n- [Building Evaluation Metrics](/evaluation/metricbuild/) - we illustrate how to\n  implement a custom metric with as few as one line of code (plus some\n  boilerplates).\n"
  },
  {
    "path": "docs/manual/source/evaluation/metricbuild.html.md",
    "content": "---\ntitle: Building Evaluation Metrics\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nPredictionIO enables developer to implement evaluation custom evaluation\nmetric with just a few lines of code.\nWe illustrate it with [the classification\ntemplate](/templates/classification/quickstart/).\n\n## Overview\n\nA simplistic form of metric is a function which takes a\n`(Query, PredictedResult, ActualResult)`-tuple (*QPA-tuple*) as input\nand return a score.\nExploiting this properties allows us to implement custom metric with a single\nline of code (plus some boilerplates). We demonstrate this with two metrics:\naccuracy and precision.\n\n<!--\n(Note: This simple form may not be able to handle metrics which require\nmulti-stage computation, for example root-mean-square-error.)\n-->\n\n\n## Example 1: Accuracy Metric\n\nAccuracy is a metric capturing\nthe portion of correct prediction among all test data points. A way\nto model this is for each correct QPA-tuple, we give a score of 1.0 and\notherwise 0.0, then we take an average of all tuple scores.\n\nPredictionIO has a [[AverageMetric]] helper class which provides this feature.\nThis class takes 4 type parameters, [[EvalInfo]], [[Query]],\n[[PredictedResult]], and\n[[ActualResult]], these types can be found from the engine's signature.\nLine 5 below is the custom calculation.\n\n```scala\ncase class Accuracy\n  extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  def calculate(query: Query, predicted: PredictedResult, actual: ActualResult)\n  : Double =\n    (if (predicted.label == actual.label) 1.0 else 0.0)\n}\n```\n\nOnce we define a metric, we tell PredictionIO we are using it in the `Evaluation`\nobject. We can run the following command to kick start the evaluation.\n\n```\n$ pio build\n...\n$ pio eval org.example.classification.AccuracyEvaluation org.example.classification.EngineParamsList\n...\n```\n\n(See MyClassification/src/main/scala/***Evaluation.scala*** for full usage.)\n\n\n## Example 2: Precision Metric\n\nPrecision is a metric for binary classifier\ncapturing the portion of correction prediction among\nall *positive* predictions.\nWe don't care about the cases where the QPA-tuple gives a negative prediction.\n(Recall that a binary classifier only provide two output values: *positive* and\n*negative*.)\nThe following table illustrates all four cases:\n\n| PredictedResult | ActualResult | Value |\n| :----: | :----: | :----: |\n| Positive | Positive | 1.0 |\n| Positive | Negative | 0.0 |\n| Negative | Positive | Don't care |\n| Negative | Negative | Don't care |\n\nCalculating the precision metric is a slightly more involved procedure than\ncalculating the accuracy metric as we have to specially handle the *don't care*\nnegative cases.\n\nPredictionIO provides a helper class `OptionAverageMetric` allows user to\nspecify *don't care* values as `None`. It only aggregates the non-None values.\nLines 3 to 4 is the method signature of `calculate` method. The key difference\nis that the return value is a `Option[Double]`, in contrast to `Double` for\n`AverageMetric`. This class only computes the average of `Some(.)` results.\nLines 5 to 13 are the actual logic. The first `if` factors out the\npositively predicted case, and the computation is similar to the accuracy\nmetric. The negatively predicted case are the *don't cares*, which we return\n`None`.\n\n```scala\ncase class Precision(label: Double)\n  extends OptionAverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  def calculate(query: Query, predicted: PredictedResult, actual: ActualResult)\n  : Option[Double] = {\n    if (predicted.label == label) {\n      if (predicted.label == actual.label) {\n        Some(1.0)  // True positive\n      } else {\n        Some(0.0)  // False positive\n      }\n    } else {\n      None  // Unrelated case for calcuating precision\n    }\n  }\n}\n```\n\nWe define a new `Evaluation` object to tell PredictionIO how to use this\nnew precision metric.\n\n```\nobject PrecisionEvaluation extends Evaluation {\n  engineMetric = (ClassificationEngine(), new Precision(label = 1.0))\n}\n```\n\nWe can kickstarts the evaluation with the following command, notice that\nwe are reusing the same engine params list as before. This address the\nseparation of concern when we conduct hyperparameter tuning.\n\n```\n$ pio build\n...\n$ pio eval org.example.classification.PrecisionEvaluation org.example.classification.EngineParamsList\n...\n[INFO] [CoreWorkflow$] Starting evaluation instance ID: SMhzYbJ9QgKkD0fQzTA7MA\n...\n[INFO] [MetricEvaluator] Iteration 0\n[INFO] [MetricEvaluator] EngineParams: {\"dataSourceParams\":{\"\":{\"appId\":19,\"evalK\":5}},\"preparatorParams\":{\"\":{}},\"algorithmParamsList\":[{\"naive\":{\"lambda\":10.0}}],\"servingParams\":{\"\":{}}}\n[INFO] [MetricEvaluator] Result: MetricScores(0.8846153846153846,List())\n[INFO] [MetricEvaluator] Iteration 1\n[INFO] [MetricEvaluator] EngineParams: {\"dataSourceParams\":{\"\":{\"appId\":19,\"evalK\":5}},\"preparatorParams\":{\"\":{}},\"algorithmParamsList\":[{\"naive\":{\"lambda\":100.0}}],\"servingParams\":{\"\":{}}}\n[INFO] [MetricEvaluator] Result: MetricScores(0.7936507936507936,List())\n[INFO] [MetricEvaluator] Iteration 2\n[INFO] [MetricEvaluator] EngineParams: {\"dataSourceParams\":{\"\":{\"appId\":19,\"evalK\":5}},\"preparatorParams\":{\"\":{}},\"algorithmParamsList\":[{\"naive\":{\"lambda\":1000.0}}],\"servingParams\":{\"\":{}}}\n[INFO] [MetricEvaluator] Result: MetricScores(0.37593984962406013,List())\n[INFO] [CoreWorkflow$] Updating evaluation instance with result: MetricEvaluatorResult:\n  # engine params evaluated: 3\nOptimal Engine Params:\n  {\n  \"dataSourceParams\":{\n    \"\":{\n      \"appId\":19,\n      \"evalK\":5\n    }\n  },\n  \"preparatorParams\":{\n    \"\":{\n\n    }\n  },\n  \"algorithmParamsList\":[\n    {\n      \"naive\":{\n        \"lambda\":10.0\n      }\n    }\n  ],\n  \"servingParams\":{\n    \"\":{\n\n    }\n  }\n}\nMetrics:\n  org.example.classification.Precision: 0.8846153846153846\n```\n\n(See MyClassification/src/main/scala/***PrecisionEvaluation.scala*** for\nthe full usage.)\n"
  },
  {
    "path": "docs/manual/source/evaluation/metricchoose.html.md",
    "content": "---\ntitle: Choosing Evaluation Metrics\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThe [hyperparameter tuning module](/evaluation/paramtuning/) allows us to select\nthe optimal engine parameter defined by a `Metric`.\n`Metric` determines the quality of an engine variant.\nWe have skimmed through the process of choosing the right `Metric` in previous\nsections.\n\nThis section discusses basic evaluation metrics commonly used for\nclassification problems.\nIf you are more interested in knowing how to *implement* a custom metric, please\nskip to [the next section](/evaluation/metricbuild/).\n\n## Defining Metric\n\nMetric evaluates the quality of an engine by comparing engine's output\n(predicted result) with the original label (actual result).\nA engine serving better prediction should yield a higher metric score,\nthe tuning module returns the engine parameter with the highest score.\nIt is sometimes called [*loss\nfunction*](http://en.wikipedia.org/wiki/Loss_function) in literature, where the\ngoal is to minimize the loss function.\n\nDuring tuning, it is important for us to understand the definition of the\nmetric, to make sure it is aligned with the prediction engine's goal.\n\nIn the classification template, we use *Accuracy* as our metric.\n*Accuracy* is defined as:\nthe percentage\nof queries which the engine is able to predict the correct label.\n\n## Common Metrics\n\nWe illustrate the choice of metric with the following confusion matrix. Row\nrepresents the engine predicted label, column represents the actual label.\nThe second row means that of the 200 testing data points,\nthe engine predicted 60 (15 + 35 + 10) of them as label 2.0,\namong which 35 are correct prediction (i.e. actual label is 2.0, matches with\nthe prediction), and 25 are wrong.\n\n|                | Actual = 1.0 | Actual = 2.0 | Actual = 3.0 |\n| :--------------: | :----------: | :----------: | :----------: |\n| **Predicted = 1.0** | 30 | 0 | 60 |\n| **Predicted = 2.0** | 15 | 35 | 10 |\n| **Predicted = 3.0** | 0 | 0 | 50 |\n\n### Accuracy\n\nAccuracy means that how many data points are predicted correctly.\nIt is one of the simplest form of evaluation metrics.\nThe accuracy score is # of correct points / # total = (30 + 35 + 50) / 200 =\n0.575.\n\n### Precision\n\nPrecision is a metric for binary classifier\nwhich measures the correctness among all positive labels.\nA binary classifier gives only two\noutput values (i.e. positive and negative).\nFor problem where there are multiple values (3 in our example),\nwe first have to transform our problem into\na binary classification problem. For example, we can have problem whether\nlabel = 1.0. The confusion matrix now becomes:\n\n|   | Actual = 1.0 | Actual != 1.0 |\n| :-----: | :-----: | :-----: |\n| **Predicted = 1.0** | 30 |  60 |\n| **Predicted != 1.0** | 15 | 95 |\n\nPrecision is the ratio between the number of correct positive answer\n(true positive)\nand the sum of correct positive answer (true positive) and wrong but positively\nlabeled answer (false positive). In this case, the precision is 30 / (30 + 60) =\n~0.3333.\n\n### Recall\n\nRecall is a metric for binary classifier\nwhich measures how many positive labels are successfully predicted amongst\nall positive labels.\nFormally, it is the ratio between the number of correct positive answer\n(true positive) and the sum of correct positive answer (true positive) and\nwrongly negatively labeled answer (false negative).\nIn this case, the recall is 30 / (30 + 15) = ~0.6667.\n\n\nAs we have discussed several common metrics for classification problem,\nwe can implement them using the `Metric` class in [the next section](\n/evaluation/metricbuild).\n"
  },
  {
    "path": "docs/manual/source/evaluation/paramtuning.html.md",
    "content": "---\ntitle: Hyperparameter Tuning\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nA PredictionIO engine is instantiated by a set of parameters. These parameters\ndefine which algorithm is to be used, as well supply the parameters for the algorithm itself. This naturally raises the question of how to choose the best set of parameters.\nThe evaluation module streamlines the process of *tuning* the engine to the best\nparameter set and deploys it.\n\n## Quick Start\n\nWe demonstrate the evaluation with [the classification template]\n(/templates/classification/quickstart/).\nThe classification template uses a naive bayesian algorithm that has a smoothing\nparameter. We evaluate the prediction quality against different parameter values\nto find the best parameter values, and then deploy it.\n\n### Edit the AppId\n\nEdit MyClassification/src/main/scala/***Evaluation.scala*** to specify the\n*appId* you used to import the data.\n\n```scala\nobject EngineParamsList extends EngineParamsGenerator {\n  ...\n  private[this] val baseEP = EngineParams(\n    dataSourceParams = DataSourceParams(appId = <YOUR_APP_ID>, evalK = Some(5)))\n  ...\n}\n```\n\n### Build and run the evaluation\nTo run an evaluation, the command `pio eval` is used. It takes two\nmandatory parameter,\n1. the `Evaluation` object, which tells PredictionIO the engine and metric we use\n   for the evaluation; and\n2. the `EngineParamsGenerator`, which contains a list of engine params to test\n   against.\nThe following command kickstarts the evaluation\nworkflow for the classification template.\n\n```\n$ pio build\n...\n$ pio eval org.example.classification.AccuracyEvaluation org.example.classification.EngineParamsList\n```\n\nYou will see the following output:\n\n```\n...\n[INFO] [CoreWorkflow$] runEvaluation started\n...\n[INFO] [MetricEvaluator] Iteration 0\n[INFO] [MetricEvaluator] EngineParams: {\"dataSourceParams\":{\"\":{\"appId\":19,\"evalK\":5}},\"preparatorParams\":{\"\":{}},\"algorithmParamsList\":[{\"naive\":{\"lambda\":10.0}}],\"servingParams\":{\"\":{}}}\n[INFO] [MetricEvaluator] Result: MetricScores(0.9281045751633987,List())\n[INFO] [MetricEvaluator] Iteration 1\n[INFO] [MetricEvaluator] EngineParams: {\"dataSourceParams\":{\"\":{\"appId\":19,\"evalK\":5}},\"preparatorParams\":{\"\":{}},\"algorithmParamsList\":[{\"naive\":{\"lambda\":100.0}}],\"servingParams\":{\"\":{}}}\n[INFO] [MetricEvaluator] Result: MetricScores(0.9150326797385621,List())\n[INFO] [MetricEvaluator] Iteration 2\n[INFO] [MetricEvaluator] EngineParams: {\"dataSourceParams\":{\"\":{\"appId\":19,\"evalK\":5}},\"preparatorParams\":{\"\":{}},\"algorithmParamsList\":[{\"naive\":{\"lambda\":1000.0}}],\"servingParams\":{\"\":{}}}\n[INFO] [MetricEvaluator] Result: MetricScores(0.4444444444444444,List())\n[INFO] [MetricEvaluator] Writing best variant params to disk...\n[INFO] [CoreWorkflow$] Updating evaluation instance with result: MetricEvaluatorResult:\n  # engine params evaluated: 3\nOptimal Engine Params:\n  {\n  \"dataSourceParams\":{\n    \"\":{\n      \"appId\":19,\n      \"evalK\":5\n    }\n  },\n  \"preparatorParams\":{\n    \"\":{\n\n    }\n  },\n  \"algorithmParamsList\":[\n    {\n      \"naive\":{\n        \"lambda\":10.0\n      }\n    }\n  ],\n  \"servingParams\":{\n    \"\":{\n\n    }\n  }\n}\nMetrics:\n  org.example.classification.Accuracy: 0.9281045751633987\nThe best variant params can be found in best.json\n[INFO] [CoreWorkflow$] runEvaluation completed\n```\n\nThe console prints out the evaluation metric score of each engine params, and\nfinally pretty print the optimal engine params.\nAmongst the 3 engine params we evaluate, *lambda = 10.0* yields the highest\naccuracy score of ~0.9281.\n\n### Deploy the best engine parameter\n\nThe evaluation module also writes out the best engine parameter to disk at\n`best.json`. We can train and deploy this specify engine variant using the\nextra parameter `-v`. For example:\n\n```bash\n$ pio train -v best.json\n...\n[INFO] [CoreWorkflow$] Training completed successfully.\n$ pio deploy -v best.json\n...\n[INFO] [HttpListener] Bound to localhost/127.0.0.1:8000\n[INFO] [MasterActor] Bind successful. Ready to serve.\n```\n\nAt this point, we have successfully deployed the best engine variant we found\nthrough the evaluation process.\n\n\n## Detailed Explanation\n\nAn engine often depends on a number of parameters, for example, the naive bayesian\nclassification algorithm has a smoothing parameter to make the model more\nadaptive to unseen data. Compared with parameters which are *learnt* by the\nmachine learning algorithm, this smoothing parameter *teaches* the algorithm\nhow to work. Therefore, such parameters are usually called *hyperparameters*.\n\nIn PredictionIO, we always take a holistic view of an engine. An engine is\ncomprised of a set of ***DAS*** controllers, as well as the necessary parameters for the\ncontrollers themselves.\nIn the evaluation, we attempt to find out the best hyperparameters for an\n*engine*, which we call ***engine params***. Using engine params we can\ndeploy a complete engine.\n\nThis section demonstrates how to select the optimal engine params\nwhilst ensuring the model doesn't overfit using PredictionIO's evaluation\nmodule.\n\n## The Evaluation Design\n\nThe PredictionIO evaluation module tests for the best engine params for an\nengine.\n\nGiven a set of engine params, we instantiate an engine and evaluate it with existing data.\nThe data is split into two sets, a training set and a validation set.\nThe training set is used to train the engine, which is deployed using the same steps described in earlier sections.\nWe query the engine with the test set data, and compare the predicted values in the response\nwith the actual data contained in the validation set.\nWe define a ***metric*** to compare ***predicted result*** returned from\nthe engine with the ***actual result*** which we obtained from the test data.\nThe goal is to maximize the metric score.\n\nThis process is repeated many times with a series of engine params.\nAt the end, PredictionIO returns the best engine params.\n\nWe demonstrate the evaluation with [the classification template]\n(/templates/classification/quickstart/).\n\n## Evaluation Data Generation\n\nIn evaluation data generation, the goal is to generate a sequence of (training,\nvalidation) data tuple. A common way is to use a *k-fold* generation process.\nThe data set is split into *k folds*. We generate k tuples of training and\nvalidation sets, for each tuple, the training set takes *k - 1* of the folds and\nthe validation set takes the remaining fold.\n\nTo enable evaluation data generation, we need to define the ***actual result***\nand implement the method for generating the (training, validation) data tuple.\n\n### Actual Result\n\nIn MyClassification/src/main/scala/***Engine.scala***, the `ActualResult` class\ndefines the ***actual result***:\n\n```scala\nclass ActualResult(\n  val label: Double\n) extends Serializable\n```\n\nThis class is used to store the actual label of the data (contrast to\n`PredictedResult` which is output of the engine).\n\n### Implement Data Generation Method in DataSource\n\nIn MyClassification/src/main/scala/***DataSource.scala***, the method\n`readEval` reads and selects data from datastore and returns a\nsequence of (training, validation) data.\n\n```scala\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData, EmptyEvaluationInfo, Query, ActualResult] {\n\n  ...\n\n  override\n  def readEval(sc: SparkContext)\n  : Seq[(TrainingData, EmptyEvaluationInfo, RDD[(Query, ActualResult)])] = {\n    require(!dsp.evalK.isEmpty, \"DataSourceParams.evalK must not be None\")\n\n    // The following code reads the data from data store. It is equivalent to\n    // the readTraining method. We copy-and-paste the exact code here for\n    // illustration purpose, a recommended approach is to factor out this logic\n    // into a helper function and have both readTraining and readEval call the\n    // helper.\n    val eventsDb = Storage.getPEvents()\n    val labeledPoints: RDD[LabeledPoint] = eventsDb.aggregateProperties(\n      appId = dsp.appId,\n      entityType = \"user\",\n      // only keep entities with these required properties defined\n      required = Some(List(\"plan\", \"attr0\", \"attr1\", \"attr2\")))(sc)\n      // aggregateProperties() returns RDD pair of\n      // entity ID and its aggregated properties\n      .map { case (entityId, properties) =>\n        try {\n          LabeledPoint(properties.get[Double](\"plan\"),\n            Vectors.dense(Array(\n              properties.get[Double](\"attr0\"),\n              properties.get[Double](\"attr1\"),\n              properties.get[Double](\"attr2\")\n            ))\n          )\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Failed to get properties ${properties} of\" +\n              s\" ${entityId}. Exception: ${e}.\")\n            throw e\n          }\n        }\n      }.cache()\n    // End of reading from data store\n\n    // K-fold splitting\n    val evalK = dsp.evalK.get\n    val indexedPoints: RDD[(LabeledPoint, Long)] = labeledPoints.zipWithIndex\n\n    (0 until evalK).map { idx =>\n      val trainingPoints = indexedPoints.filter(_._2 % evalK != idx).map(_._1)\n      val testingPoints = indexedPoints.filter(_._2 % evalK == idx).map(_._1)\n\n      (\n        new TrainingData(trainingPoints),\n        new EmptyEvaluationInfo(),\n        testingPoints.map {\n          p => (new Query(p.features.toArray), new ActualResult(p.label))\n        }\n      )\n    }\n  }\n}\n```\n\nThe `readEval` method returns a sequence of (`TrainingData`, `EvaluationInfo`,\n`RDD[(Query, ActualResult)]`.\n`TrainingData` is the same class we use for deploy,\n`RDD[(Query, ActualResult)]` is the\nvalidation set, `EvaluationInfo` can be used to hold some global evaluation data\n; it is not used in the current example.\n\nLines 11 to 41 is the logic of reading and transforming data from the\ndatastore; it is equivalent to the existing `readTraining` method. After line\n41, the variable `labeledPoints` contains the complete dataset with which we use\nto generate the (training, validation) sequence.\n\nLines 43 to 57 is the *k-fold* logic. Line 45 gives each data point a unique id,\nand we decide whether the point belongs to the training or validation set\ndepends on the *mod* of the id (lines 48 to 49).\nFor each point in the validation set, we construct the `Query` and\n`ActualResult` (line 55) which is used validate the engine.\n\n## Evaluation Metrics\n\nWe define a `Metric` which gives a *score* to engine params. The higher the\nscore, the better the engine params are.\nIn this template, we use accuracy score which measures\nthe portion of correct prediction among all data points.\n\nIn MyClassification/src/main/scala/**Evaluation.scala**, the class\n`Accuracy` implements the *accuracy* score.\nIt extends a base helper class `AverageMetric` which calculates the average\nscore overall *(Query, PredictionResult, ActualResult)* tuple.\n\n```scala\ncase class Accuracy\n  extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  def calculate(query: Query, predicted: PredictedResult, actual: ActualResult)\n  : Double = (if (predicted.label == actual.label) 1.0 else 0.0)\n}\n```\n\nThen, implement a `Evaluation` object to define the engine and metric\nused in this evaluation.\n\n```scala\nobject AccuracyEvaluation extends Evaluation {\n  engineMetric = (ClassificationEngine(), new Accuracy())\n}\n```\n\n## Parameters Generation\nThe last component is to specify the list of engine params we want to evaluate.\nIn this guide, we discuss the simplest method. We specify an explicit list of\nengine params to be evaluated.\n\nIn MyClassification/src/main/scala/**Evaluation.scala**, the object\n`EngineParamsList` specifies the engine params list to be used.\n\n```scala\nobject EngineParamsList extends EngineParamsGenerator {\n  // Define list of EngineParams used in Evaluation\n\n  // First, we define the base engine params. It specifies the appId from which\n  // the data is read, and a evalK parameter is used to define the\n  // cross-validation.\n  private[this] val baseEP = EngineParams(\n    dataSourceParams = DataSourceParams(appId = 18, evalK = Some(5)))\n\n  // Second, we specify the engine params list by explicitly listing all\n  // algorithm parameters. In this case, we evaluate 3 engine params, each with\n  // a different algorithm params value.\n  engineParamsList = Seq(\n    baseEP.copy(algorithmParamsList = Seq((\"naive\", AlgorithmParams(10.0)))),\n    baseEP.copy(algorithmParamsList = Seq((\"naive\", AlgorithmParams(100.0)))),\n    baseEP.copy(algorithmParamsList = Seq((\"naive\", AlgorithmParams(1000.0)))))\n}\n```\n\nA good practice is to first define a base engine params, it contains the common\nparameters used in all evaluations (lines 7 to 8). With the base params, we\nconstruct the list of engine params we want to evaluation by\nadding or replacing the controller parameter. Lines 13 to 16 generate 3 engine\nparameters, each has a different smoothing parameters.\n\n\n\n## Running the Evaluation\n\nIt remains to run the evaluation. Let's recap the quick start section above.\nThe `pio eval` command kick starts the evaluation, and the result can be seen\nfrom the console.\n\n```\n$ pio build\n...\n$ pio eval org.example.classification.AccuracyEvaluation org.example.classification.EngineParamsList\n```\n\nYou will see the following output:\n\n```\n...\n[INFO] [CoreWorkflow$] runEvaluation started\n...\n[INFO] [MetricEvaluator] Iteration 0\n[INFO] [MetricEvaluator] EngineParams: {\"dataSourceParams\":{\"\":{\"appId\":19,\"evalK\":5}},\"preparatorParams\":{\"\":{}},\"algorithmParamsList\":[{\"naive\":{\"lambda\":10.0}}],\"servingParams\":{\"\":{}}}\n[INFO] [MetricEvaluator] Result: MetricScores(0.9281045751633987,List())\n[INFO] [MetricEvaluator] Iteration 1\n[INFO] [MetricEvaluator] EngineParams: {\"dataSourceParams\":{\"\":{\"appId\":19,\"evalK\":5}},\"preparatorParams\":{\"\":{}},\"algorithmParamsList\":[{\"naive\":{\"lambda\":100.0}}],\"servingParams\":{\"\":{}}}\n[INFO] [MetricEvaluator] Result: MetricScores(0.9150326797385621,List())\n[INFO] [MetricEvaluator] Iteration 2\n[INFO] [MetricEvaluator] EngineParams: {\"dataSourceParams\":{\"\":{\"appId\":19,\"evalK\":5}},\"preparatorParams\":{\"\":{}},\"algorithmParamsList\":[{\"naive\":{\"lambda\":1000.0}}],\"servingParams\":{\"\":{}}}\n[INFO] [MetricEvaluator] Result: MetricScores(0.4444444444444444,List())\n[INFO] [MetricEvaluator] Writing best variant params to disk...\n[INFO] [CoreWorkflow$] Updating evaluation instance with result: MetricEvaluatorResult:\n  # engine params evaluated: 3\nOptimal Engine Params:\n  {\n  \"dataSourceParams\":{\n    \"\":{\n      \"appId\":19,\n      \"evalK\":5\n    }\n  },\n  \"preparatorParams\":{\n    \"\":{\n\n    }\n  },\n  \"algorithmParamsList\":[\n    {\n      \"naive\":{\n        \"lambda\":10.0\n      }\n    }\n  ],\n  \"servingParams\":{\n    \"\":{\n\n    }\n  }\n}\nMetrics:\n  org.template.classification.Accuracy: 0.9281045751633987\nThe best variant params can be found in best.json\n[INFO] [CoreWorkflow$] runEvaluation completed\n```\n\n## Notes\n\n- We deliberately not mention ***test set*** in this hyperparameter tuning guide.\nIn machine learning literature, the ***test set*** is a separate piece of data\nwhich is used to evaluate the final engine params outputted by the evaluation\nprocess. This guarantees that no information in the training / validation set is\n*leaked* into the engine params and yields a biased outcome. With PredictionIO,\nthere are multiple ways of conducting robust tuning, we will cover this\ntopic in the coming sections.\n"
  },
  {
    "path": "docs/manual/source/gallery/templates.yaml",
    "content": "# Similarity\n\n- template:\n    name: Content Based SVD Item Similarity Engine\n    repo: \"https://github.com/alexice/template-scala-parallel-svd-item-similarity\"\n    description: |-\n      Template to calculate similarity between items based on their attributes&mdash;sometimes called content-based similarity. Attributes can be either numeric or categorical in the last case it will be encoded using one-hot encoder. Algorithm uses SVD in order to reduce data dimensionality. Cosine similarity is now implemented but can be easily extended to other similarity measures.\n    tags: [similarity]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://groups.google.com/forum/#!forum/actionml-user\">The Universal Recommender user group</a>'\n\n- template:\n    name: Cstablo-template-text-similarity-classification\n    repo: \"https://github.com/goliasz/pio-template-text-similarity\"\n    description: |-\n      Text similarity engine based on Word2Vec algorithm. Builds vectors of full documents in training phase. Finds similar documents in query phase.\n    tags: [similarity, nlp]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.5\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/goliasz/pio-template-text-similarity/issues\">Github issues</a>'\n\n# Clustering\n\n- template:\n    name: MLlibKMeansClustering\n    repo: \"https://github.com/sahiliitm/predictionio-MLlibKMeansClusteringTemplate\"\n    description: |-\n      This is a template which demonstrates the use of K-Means clustering algorithm which can be deployed on a spark-cluster using prediction.io.\n    tags: [clustering]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: '-'\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/sahiliitm/predictionio-MLlibKMeansClusteringTemplate/issues\">Github issues</a>'\n\n- template:\n    name: Topc Model (LDA)\n    repo: \"https://github.com/EmergentOrder/template-scala-topic-model-LDA\"\n    description: |-\n      A PredictionIO engine template using Latent Dirichlet Allocation to learn a topic model from raw text\n    tags: [clustering]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.4\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/EmergentOrder/template-scala-topic-model-LDA/issues\">Github issues</a>'\n\n- template:\n    name: KMeans-Clustering-Template\n    repo: \"https://github.com/singsanj/KMeans-parallel-template\"\n    description: |-\n      forked from PredictionIO/template-scala-parallel-vanilla. It implements the KMeans Algorithm. Can be extended to mainstream implementation with minor changes.\n    tags: [clustering]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/singsanj/KMeans-parallel-template/issues\">Github issues</a>'\n\n- template:\n    name: Topic Labelling with Wikipedia\n    repo: \"https://github.com/rajdeepd/template-Labelling-Topics-with-wikipedia\"\n    description: |-\n      This template will label topics (e.g. topic generated through LDA topic modeling) with relevant category by referring to Wikipedia as a knowledge base.\n    tags: [clustering, nlp]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.10.0-incubating\n    apache_pio_convesion_required: \"already compatible\"\n    support_link: '<a href=\"https://github.com/peoplehum/template-Labelling-Topics-with-wikipedia/issues\">Github issues</a>'\n\n- template:\n    name: Bayesian Nonparametric Chinese Restaurant Process Clustering\n    repo: \"https://github.com/jirotubuyaki/predictionio-template-crp-clustering\"\n    description: |-\n      Chinese restaurant process is stochastic process for statistical inference. The clustering which uses Chinese restaurant process does not need to decide the number of clusters in advance. This algorithm automatically adjusts it.\n    tags: [clustering]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.10.0-incubating\n    apache_pio_convesion_required: \"already compatible\"\n    support_link: '<a href=\"https://github.com/jirotubuyaki/predictionio-template-crp-clustering/issues\">Github issues</a>'\n\n# Recommenders\n\n- template:\n    name: The Universal Recommender\n    repo: \"https://github.com/actionml/universal-recommender\"\n    description: |-\n      Use for:\n      <ul class=tab-list>\n      <li class=tab-list-element>Personalized recommendations&mdash;user-based</li>\n      <li class=tab-list-element>Similar items&mdash;item-based</li>\n      <li class=tab-list-element>Viewed this bought that&mdash;item-based cross-action</li>\n      <li class=tab-list-element>Popular Items and User-defined ranking</li>\n      <li class=tab-list-element>Item-set recommendations for complimentarty purchases or shopping carts&mdash;item-set-based</li>\n      <li class=tab-list-element>Hybrid collaborative filtering and content based recommendations&mdash;limited content-based</li>\n      <li class-tab-list-element>Business rules</li>\n      </ul>\n\n      <p>The name \"Universal\" refers to the use of this template in virtually any case that calls for recommendations - ecommerce, news, videos, virtually anywhere user behavioral data is known. This recommender uses the new <a href=\"http://mahout.apache.org/users/algorithms/intro-cooccurrence-spark.html\">Cross-Occurrence (CCO) algorithm</a> to auto-correlate different user actions (clickstream data), profile data, contextual information (location, device), and some content types to make better recommendations. It also implements flexible filters and boosts for implementing business rules.</p>\n    tags: [recommender]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.10.0-incubating\n    apache_pio_convesion_required: \"already compatible\"\n    support_link: '<a href=\"https://groups.google.com/forum/#!forum/actionml-user\">The Universal Recommender user group</a>'\n\n- template:\n    name: Recommendation\n    repo: \"https://github.com/apache/predictionio-template-recommender\"\n    description: |-\n      An engine template is an almost-complete implementation of an engine. PredictionIO's Recommendation Engine Template has integrated Apache Spark MLlib's Collaborative Filtering algorithm by default. You can customize it easily to fit your specific needs.\n    tags: [recommender]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.11.0-incubating\n    apache_pio_convesion_required: \"already compatible\"\n    support_link: '<a href=\"http://predictionio.apache.org/support/\">Apache PredictionIO mailing lists</a>'\n\n- template:\n    name: E-Commerce Recommendation\n    repo: \"https://github.com/apache/predictionio-template-ecom-recommender\"\n    description: |-\n      This engine template provides personalized recommendation for e-commerce applications with the following features by default:\n\n      <ul class=tab-list>\n      <li class=tab-list-element>Exclude out-of-stock items</li>\n      <li class=tab-list-element>Provide recommendation to new users who sign up after the model is trained</li>\n      <li class=tab-list-element>Recommend unseen items only (configurable)</li>\n      <li class=tab-list-element>Recommend popular items if no information about the user is available (added in template version v0.4.0)</li>\n      </ul>\n    tags: [recommender]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.11.0-incubating\n    apache_pio_convesion_required: \"already compatible\"\n    support_link: '<a href=\"http://predictionio.apache.org/support/\">Apache PredictionIO mailing lists</a>'\n\n- template:\n    name: Similar Product\n    repo: \"https://github.com/apache/predictionio-template-similar-product\"\n    description: |-\n       This engine template recommends products that are \"similar\" to the input product(s). Similarity is not defined by user or item attributes but by users' previous actions. By default, it uses 'view' action such that product A and B are considered similar if most users who view A also view B. The template can be customized to support other action types such as buy, rate, like..etc\n    tags: [recommender]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.11.0-incubating\n    apache_pio_convesion_required: \"already compatible\"\n    support_link: '<a href=\"http://predictionio.apache.org/support/\">Apache PredictionIO mailing lists</a>'\n\n- template:\n    name: E-Commerce Recommendation (Java)\n    repo: \"https://github.com/apache/predictionio-template-java-ecom-recommender\"\n    description: |-\n      This engine template provides personalized recommendation for e-commerce applications with the following features by default:\n\n      <ul class=tab-list>\n      <li class=tab-list-element>Exclude out-of-stock items</li>\n      <li class=tab-list-element>Provide recommendation to new users who sign up after the model is trained</li>\n      <li class=tab-list-element>Recommend unseen items only (configurable)</li>\n      <li class=tab-list-element>Recommend popular items if no information about the user is available</li>\n      </ul>\n    tags: [recommender]\n    type: Parallel\n    language: Java\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.11.0-incubating\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"http://predictionio.apache.org/support/\">Apache PredictionIO mailing lists</a>'\n\n- template:\n    name: Product Ranking\n    repo: \"https://github.com/PredictionIO/template-scala-parallel-productranking\"\n    description: |-\n      This engine template sorts a list of products for a user based on his/her preference. This is ideal for personalizing the display order of product page, catalog, or menu items if you have large number of options. It creates engagement and early conversion by placing products that a user prefers on the top.\n    tags: [recommender]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n\n- template:\n    name: Complementary Purchase\n    repo: \"https://github.com/PredictionIO/template-scala-parallel-complementarypurchase\"\n    description: |-\n      This engine template recommends the complementary items which most user frequently buy at the same time with one or more items in the query.\n    tags: [recommender]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n\n- template:\n    name: Music Recommendations\n    repo: \"https://github.com/vaibhavist/template-scala-parallel-recommendation\"\n    description: |-\n      This is very similar to music recommendations template. It is integrated with all the events a music application can have such as song played, liked, downloaded, purchased, etc.\n    tags: [recommender]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/vaibhavist/template-scala-parallel-recommendation/issues\">Github issues</a>'\n\n- template:\n    name: Viewed This Bought That\n    repo: \"https://github.com/vngrs/template-scala-parallel-viewedthenbought\"\n    description: |-\n      This Engine uses co-occurrence algorithm to match viewed items to bought items. Using this engine you may predict which item the user will buy, given the item(s) browsed.\n    tags: [recommender]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/vngrs/template-scala-parallel-viewedthenbought/issues\">Github issues</a>'\n\n- template:\n    name: Frequent Pattern Mining\n    repo: \"https://github.com/goliasz/pio-template-fpm\"\n    description: |-\n      Template uses FP Growth algorithm allowing to mine for frequent patterns. Template returns subsequent items together with confidence score. Sometimes used as a shopping cart recommender but has other uses.\n    tags: [recommender]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.5\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/goliasz/pio-template-fpm/issues\">Github issues</a>'\n\n- template:\n    name: Similar Product with Rating\n    repo: \"https://github.com/ramaboo/template-scala-parallel-similarproduct-with-rating\"\n    description: |-\n      Similar product template with rating support! Used for the MovieLens Demo.\n    tags: [recommender, similarity]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: beta\n    pio_min_version: 0.9.0\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/ramaboo/template-scala-parallel-similarproduct-with-rating/issues\">Github issues</a>'\n\n- template:\n    name: Frequent Pattern Mining\n    repo: \"https://github.com/goliasz/pio-template-fpm\"\n    description: |-\n      Template uses FP Growth algorithm allowing to mine for frequent patterns. Template returns subsequent items together with confidence score.\n    tags: [recommender, other]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.5\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/goliasz/pio-template-fpm/issues\">Github issues</a>'\n\n# classification\n\n- template:\n    name: Classification\n    repo: \"https://github.com/apache/predictionio-template-attribute-based-classifier\"\n    description: |-\n      An engine template is an almost-complete implementation of an engine. PredictionIO's Classification Engine Template has integrated Apache Spark MLlib's Naive Bayes algorithm by default.\n    tags: [classification]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.11.0-incubating\n    apache_pio_convesion_required: \"already compatible\"\n    support_link: '<a href=\"http://predictionio.apache.org/support/\">Apache PredictionIO mailing lists</a>'\n\n- template:\n    name: Classification\n    repo: \"https://github.com/haricharan123/PredictionIo-lingpipe-MultiLabelClassification\"\n    description: |-\n      This engine template is an almost-complete implementation of an engine meant to used with PredictionIO. This Multi-label Classification Engine Template has integrated LingPipe (http://alias-i.com/lingpipe/) algorithm by default.\n    tags: [classification]\n    type: Parallel\n    language: Java\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.9.5\n    apache_pio_convesion_required: \"already compatible\"\n\n- template:\n    name: Lead Scoring\n    repo: \"https://github.com/PredictionIO/template-scala-parallel-leadscoring\"\n    description: |-\n      This engine template predicts the probability of an user will convert (conversion event by user) in the current session.\n    tags: [classification]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n\n- template:\n    name: Text Classification\n    repo: \"https://github.com/apache/predictionio-template-text-classifier\"\n    description: |-\n      Use this engine for general text classification purposes. Uses OpenNLP library for text vectorization, includes t.f.-i.d.f.-based feature transformation and reduction, and uses Spark MLLib's Multinomial Naive Bayes implementation for classification.\n    tags: [classification, nlp]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.11.0-incubating\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/apache/predictionio-template-text-classifier/issues\">Github issues</a>'\n\n- template:\n    name: Churn Prediction - H2O Sparkling Water\n    repo: \"https://github.com/andrewwuan/PredictionIO-Churn-Prediction-H2O-Sparkling-Water\"\n    description: |-\n      This is an engine template with Sparkling Water integration. The goal is to use Deep Learning algorithm to predict the churn rate for a phone carrier's customers.\n    tags: [classification]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/andrewwuan/PredictionIO-Churn-Prediction-H2O-Sparkling-Water/issues\">Github issues</a>'\n\n- template:\n    name: Classification Deeplearning4j\n    repo: \"https://github.com/detrevid/predictionio-template-classification-dl4j\"\n    description: |-\n      A classification engine template that uses Deeplearning4j library.\n    tags: [classification]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/detrevid/predictionio-template-classification-dl4j/issues\">Github issues</a>'\n\n- template:\n    name: Probabilistic Classifier (Logistic Regression w/ LBFGS)\n    repo: \"https://github.com/EmergentOrder/template-scala-probabilistic-classifier-batch-lbfgs\"\n    description: |-\n      A PredictionIO engine template using logistic regression (trained with limited-memory BFGS ) with raw (probabilistic) outputs.\n    tags: [classification]\n    type: Parallel\n    language: Scala\n    license: \"MIT License\"\n    status: alpha\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/EmergentOrder/template-scala-probabilistic-classifier-batch-lbfgs/issues\">Github issues</a>'\n\n- template:\n    name: Document Classification with OpenNLP\n    repo: \"https://github.com/chrischris292/template-classification-opennlp\"\n    description: |-\n      Document Classification template with OpenNLP GISModel.\n    tags: [classification]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.0\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/chrischris292/template-classification-opennlp/issues\">Github issues</a>'\n\n- template:\n    name: Circuit End Use Classification\n    repo: \"https://github.com/harry5z/template-circuit-classification-sparkling-water\"\n    description: |-\n      A classification engine template that uses machine learning models trained with sample circuit energy consumption data and end usage to predict the end use of a circuit by its energy consumption history.\n    tags: [classification]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.1\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/harry5z/template-circuit-classification-sparkling-water/issues\">Github issues</a>'\n\n- template:\n    name: GBRT_Classification\n    repo: \"https://github.com/ailurus1991/GBRT_Template_PredictionIO\"\n    description: |-\n      The Gradient-Boosted Regression Trees(GBRT) for classification.\n    tags: [classification]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/ailurus1991/GBRT_Template_PredictionIO/issues\">Github issues</a>'\n\n- template:\n    name: MLlib-Decision-Trees-Template\n    repo: \"https://github.com/mohanaprasad1994/PredictionIO-MLlib-Decision-Trees-Template\"\n    description: |-\n      An engine template is an almost-complete implementation of an engine. This is a classification engine template which has integrated Apache Spark MLlib's Decision tree algorithm by default.\n    tags: [classification]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.0\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/mohanaprasad1994/PredictionIO-MLlib-Decision-Trees-Template/issues\">Github issues</a>'\n\n- template:\n    name: Classification with MultiLayerNetwork\n    repo: \"https://github.com/jimmyywu/predictionio-template-classification-dl4j-multilayer-network\"\n    description: |-\n      This engine template integrates the MultiLayerNetwork implementation from the Deeplearning4j library into PredictionIO. In this template, we use PredictionIO to classify the widely-known IRIS flower dataset by constructing a deep-belief net.\n    tags: [classification]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.0\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/jimmyywu/predictionio-template-classification-dl4j-multilayer-network/issues\">Github issues</a>'\n\n- template:\n    name: Deeplearning4j RNTN\n    repo: \"https://github.com/thomasste/template-scala-parallel-dl4j-rntn\"\n    description: |-\n      Recursive Neural Tensor Network algorithm is supervised learning algorithm used to predict sentiment of sentences. This template is based on deeplearning4j RNTN example: https://github.com/SkymindIO/deeplearning4j-nlp-examples/tree/master/src/main/java/org/deeplearning4j/rottentomatoes/rntn. It's goal is to show how to integrate deeplearning4j library with PredictionIO.\n    tags: [classification, nlp]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/thomasste/template-scala-parallel-dl4j-rntn/issues\">Github issues</a>'\n\n- template:\n    name: classifier-kafka-streaming-template\n    repo: \"https://github.com/singsanj/classifier-kafka-streaming-template\"\n    description: |-\n      The template will provide a simple integration of DASE with kafka using spark streaming capabilities in order to play around with real time notification, messages ..\n    tags: [classification]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: \"-\"\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/singsanj/classifier-kafka-streaming-template/issues\">Github issues</a>'\n\n- template:\n    name: Sentiment Analysis - Bag of Words Model\n    repo: \"https://github.com/peoplehum/BagOfWords_SentimentAnalysis_Template\"\n    description: |-\n      This sentiment analysis template uses a bag of words model. Given text, the engine will return sentiment as 1.0 (positive) or 0.0 (negative) along with scores indicating how +ve or -ve it is.\n    tags: [classification, nlp]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.10.0-incubating\n    apache_pio_convesion_required: \"already compatible\"\n    support_link: '<a href=\"https://github.com/peoplehum/BagOfWords_SentimentAnalysis_Template/issues\">Github issues</a>'\n\n# Regression\n\n- template:\n    name: Survival Regression\n    repo: \"https://github.com/goliasz/pio-template-sr\"\n    description: |-\n      Survival regression template is based on brand new Spark 1.6 AFT (accelerated failure time) survival analysis algorithm. There are interesting applications of survival analysis like:\n\n      <ul class=tab-list>\n        <li class=tab-list-element>Business Planning : Profiling customers who has a higher survival rate and make strategy accordingly.</li>\n        <li class=tab-list-element>Lifetime Value Prediction : Engage with customers according to their lifetime value</li>\n        <li class=tab-list-element>Active customers : Predict when the customer will be active for the next time and take interventions accordingly.     * Campaign evaluation : Monitor effect of campaign on the survival rate of customers.</li>\n      </ul>\n\n      Source: http://www.analyticsvidhya.com/blog/2014/04/survival-analysis-model-you/\n    tags: [regression]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: beta\n    pio_min_version: 0.9.5\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"http://www.analyticsvidhya.com/blog/2014/04/survival-analysis-model-you/\">Blog post</a>'\n\n- template:\n    name: Sparkling Water-Deep Learning Energy Forecasting\n    repo: \"https://github.com/BensonQiu/predictionio-template-recommendation-sparklingwater\"\n    description: |-\n      This Engine Template demonstrates an energy forecasting engine. It integrates Deep Learning from the Sparkling Water library to perform energy analysis. We can query the circuit and time, and return predicted energy usage.\n    tags: [regression]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/BensonQiu/predictionio-template-recommendation-sparklingwater/issues\">Github issues</a>'\n\n- template:\n    name: Electric Load Forecasting\n    repo: \"https://github.com/detrevid/predictionio-load-forecasting\"\n    description: |-\n      This is a PredictionIO engine for electric load forecasting. The engine is using linear regression with stochastic gradient descent from Spark MLlib.\n    tags: [regression]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/detrevid/predictionio-load-forecasting/issues\">Github issues</a>'\n\n- template:\n    name: MLLib-LinearRegression\n    repo: \"https://github.com/RAditi/PredictionIO-MLLib-LinReg-Template\"\n    description: |-\n      This template uses the linear regression with stochastic gradient descent algorithm from MLLib to make predictions on real-valued data based on features (explanatory variables)\n    tags: [regression]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.1\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/RAditi/PredictionIO-MLLib-LinReg-Template/issues\">Github issues</a>'\n\n# NLP\n\n- template:\n    name: OpenNLP Sentiment Analysis Template\n    repo: \"https://github.com/infoquestsolutions/OpenNLP-SentimentAnalysis-Template\"\n    description: |-\n      Given a sentence, this engine will return a score between 0 and 4. This is the sentiment of the sentence. The lower the number the more negative the sentence is. It uses the OpenNLP library.\n    tags: [nlp]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: beta\n    pio_min_version: \"0.10.0-incubating\"\n    apache_pio_convesion_required: \"already compatible\"\n    support_link: '<a href=\"https://github.com/infoquestsolutions/OpenNLP-SentimentAnalysis-Template/issues\">Github issues</a>'\n\n- template:\n    name: Sentiment analysis\n    repo: \"https://github.com/pawel-n/template-scala-cml-sentiment\"\n    description: |-\n      This template implements various algorithms for sentiment analysis, most based on recursive neural networks (RNN) and recursive neural tensor networks (RNTN)[1]. It uses an experimental library called Composable Machine Learning (CML) and the Stanford Parser. The example data set is the Stanford Sentiment Treebank.\n    tags: [nlp]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/pawel-n/template-scala-cml-sentiment/issues\">Github issues</a>'\n\n- template:\n    name: Word2Vec\n    repo: \"https://github.com/pawel-n/template-scala-parallel-word2vec\"\n    description: |-\n      This template integrates the Word2Vec implementation from deeplearning4j with PredictionIO. The Word2Vec algorithm takes a corpus of text and computes a vector representation for each word. These representations can be subsequently used in many natural language processing applications.\n    tags: [nlp]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: 0.9.0\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/pawel-n/template-scala-parallel-word2vec/issues\">Github issues</a>'\n\n- template:\n    name: Spark Deeplearning4j Word2Vec\n    repo: \"https://github.com/thomasste/template-scala-spark-dl4j-word2vec\"\n    description: |-\n      This template shows how to integrate Deeplearnign4j spark api with PredictionIO on example of app which uses Word2Vec algorithm to predict nearest words.\n    tags: [nlp]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/thomasste/template-scala-spark-dl4j-word2vec/issues\">Github issues</a>'\n\n- template:\n    name: Sentiment Analysis Template\n    repo: \"https://github.com/whhone/template-sentiment-analysis\"\n    description: |-\n      Given a sentence, return a score between 0 and 4, indicating the sentence's sentiment. 0 being very negative, 4 being very positive, 2 being neutral. The engine uses the stanford CoreNLP library and the Scala binding `gangeli/CoreNLP-Scala` for parsing.\n    tags: [nlp]\n    type: Parallel\n    language: Scala\n    license: None\n    status: stable\n    pio_min_version: 0.9.0\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/whhone/template-sentiment-analysis/issues\">Github issues</a>'\n\n- template:\n    name: Recursive Neural Networks (Sentiment Analysis)\n    repo: \"https://github.com/thomasste/template-scala-rnn\"\n    description: |-\n      Predicting sentiment of phrases with use of Recursive Neural Network algorithm and OpenNLP parser.\n    tags: [nlp]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.9.2\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/thomasste/template-scala-rnn/issues\">Github issues</a>'\n\n- template:\n    name: CoreNLP Text Classification\n    repo: \"https://github.com/Ling-Ling/CoreNLP-Text-Classification\"\n    description: |-\n      This engine uses CoreNLP to do text analysis in order to classify the category a strings of text falls under.\n    tags: [nlp]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: alpha\n    pio_min_version: \"-\"\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/Ling-Ling/CoreNLP-Text-Classification/issues\">Github issues</a>'\n\n# other\n\n- template:\n    name: template-decision-tree-feature-importance\n    repo: \"https://github.com/anthill/template-decision-tree-feature-importance\"\n    description: |-\n      This template shows how to use spark' decision tree. It enables : - both categorical and continuous features - feature importance calculation - tree output in json - reading training data from a csv file\n    tags: [other]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.9.0\n    apache_pio_convesion_required: \"requires conversion\"\n    support_link: '<a href=\"https://github.com/anthill/template-decision-tree-feature-importance/issues\">Github issues</a>'\n\n- template:\n    name: Skeleton\n    repo: \"https://github.com/apache/predictionio-template-skeleton\"\n    description: |-\n      Skeleton template is for developing new engine when you find other engine templates do not fit your needs. This template provides a skeleton to kick start new engine development.\n    tags: [other]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.11.0-incubating\n    apache_pio_convesion_required: \"already compatible\"\n    support_link: '<a href=\"http://predictionio.apache.org/support/\">Apache PredictionIO mailing lists</a>'\n\n- template:\n    name: Linear Regression BFGS\n    repo: \"https://github.com/mgcdanny/pio-linear-regression-bfgs\"\n    description: |-\n      Modeling the relationship between a dependent variable, y, and one or more explanatory variables, denoted X.\n    tags: [regression]\n    type: Parallel\n    language: Scala\n    license: \"Apache Licence 2.0\"\n    status: beta\n    pio_min_version: 0.10.0\n\n- template:\n    name: Classification template for Iris\n    repo: \"https://github.com/jpioug/predictionio-template-iris\"\n    description: |-\n      This is Python(PySpark) based classification example for Iris dataset.\n    tags: [classification]\n    type: Parallel\n    language: Python\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.12.0-incubating\n\n- template:\n    name: Regression template for Boston House Prices\n    repo: \"https://github.com/jpioug/predictionio-template-boston-house-prices\"\n    description: |-\n      This is Python(PySpark) based regression example for Boston House Prices dataset.\n    tags: [regression]\n    type: Parallel\n    language: Python\n    license: \"Apache Licence 2.0\"\n    status: stable\n    pio_min_version: 0.12.0-incubating\n"
  },
  {
    "path": "docs/manual/source/github.html",
    "content": "---\nlayout: raw\n---\n\n<!--\n  Licensed to the Apache Software Foundation (ASF) under one or more\n  contributor license agreements.  See the NOTICE file distributed with\n  this work for additional information regarding copyright ownership.\n  The ASF licenses this file to You under the Apache License, Version 2.0\n  (the \"License\"); you may not use this file except in compliance with\n  the License.  You may obtain a copy of the License at\n\n      http://www.apache.org/licenses/LICENSE-2.0\n\n  Unless required by applicable law or agreed to in writing, software\n  distributed under the License is distributed on an \"AS IS\" BASIS,\n  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n  See the License for the specific language governing permissions and\n  limitations under the License.\n-->\n\n<html><body><style type=\"text/css\">\nbody {\n  padding: 0;\n  margin: 0;\n  font: bold 11px/14px \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n  overflow: hidden;\n}\n.github-btn {\n  height: 20px;\n  overflow: hidden;\n}\n.gh-btn,\n.gh-count,\n.gh-ico {\n  float: left;\n}\n.gh-btn,\n.gh-count {\n  padding: 2px 5px 2px 4px;\n  color: #333;\n  text-decoration: none;\n  text-shadow: 0 1px 0 #fff;\n  white-space: nowrap;\n  cursor: pointer;\n  border-radius: 3px;\n}\n.gh-btn {\n  background-color: #e6e6e6;\n  background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#fafafa), to(#eaeaea));\n  background-image: -webkit-linear-gradient(#fafafa, #eaeaea);\n  background-image: -moz-linear-gradient(top, #fafafa, #eaeaea);\n  background-image: -ms-linear-gradient(#fafafa, #eaeaea);\n  background-image: -o-linear-gradient(#fafafa, #eaeaea);\n  background-image: linear-gradient(#fafafa, #eaeaea);\n  background-repeat: no-repeat;\n  border: 1px solid #d4d4d4;\n  border-bottom-color: #bcbcbc;\n}\n.gh-btn:hover,\n.gh-btn:focus,\n.gh-btn:active {\n  color: #fff;\n  text-decoration: none;\n  text-shadow: 0 -1px 0 rgba(0,0,0,.25);\n  border-color: #518cc6 #518cc6 #2a65a0;\n  background-color: #3072b3;\n}\n.gh-btn:hover,\n.gh-btn:focus {\n  background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#599bdc), to(#3072b3));\n  background-image: -webkit-linear-gradient(#599bdc, #3072b3);\n  background-image: -moz-linear-gradient(top, #599bdc, #3072b3);\n  background-image: -ms-linear-gradient(#599bdc, #3072b3);\n  background-image: -o-linear-gradient(#599bdc, #3072b3);\n  background-image: linear-gradient(#599bdc, #3072b3);\n}\n.gh-btn:active {\n  background-image: none;\n  -webkit-box-shadow: inset 0 2px 5px rgba(0,0,0,.10);\n  -moz-box-shadow: inset 0 2px 5px rgba(0,0,0,.10);\n  box-shadow: inset 0 2px 5px rgba(0,0,0,.10);\n}\n.gh-ico {\n  width: 14px;\n  height: 14px;\n  margin-right: 4px;\n  vertical-align: 3px;\n  background-image: url(data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz4NCjwhLS0gR2VuZXJhdG9yOiBBZG9iZSBJbGx1c3RyYXRvciAxNy4xLjAsIFNWRyBFeHBvcnQgUGx1Zy1JbiAuIFNWRyBWZXJzaW9uOiA2LjAwIEJ1aWxkIDApICAtLT4NCjwhRE9DVFlQRSBzdmcgUFVCTElDICItLy9XM0MvL0RURCBTVkcgMS4xLy9FTiIgImh0dHA6Ly93d3cudzMub3JnL0dyYXBoaWNzL1NWRy8xLjEvRFREL3N2ZzExLmR0ZCI+DQo8c3ZnIHZlcnNpb249IjEuMSIgaWQ9IkxheWVyXzEiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgeG1sbnM6eGxpbms9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkveGxpbmsiIHg9IjBweCIgeT0iMHB4Ig0KCSB3aWR0aD0iMTMycHgiIGhlaWdodD0iNjZweCIgdmlld0JveD0iMCAwIDEzMiA2NiIgZW5hYmxlLWJhY2tncm91bmQ9Im5ldyAwIDAgMTMyIDY2IiB4bWw6c3BhY2U9InByZXNlcnZlIj4NCjxwYXRoIGZpbGwtcnVsZT0iZXZlbm9kZCIgY2xpcC1ydWxlPSJldmVub2RkIiBmaWxsPSIjMzMzMzMzIiBkPSJNMzMsMS44Yy0xNy43LDAtMzIsMTQuMy0zMiwzMmMwLDE0LjEsOS4yLDI2LjEsMjEuOSwzMC40DQoJYzEuNiwwLjMsMi4yLTAuNywyLjItMS41YzAtMC44LDAtMi44LDAtNS40Yy04LjksMS45LTEwLjgtNC4zLTEwLjgtNC4zYy0xLjUtMy43LTMuNi00LjctMy42LTQuN2MtMi45LTIsMC4yLTEuOSwwLjItMS45DQoJYzMuMiwwLjIsNC45LDMuMyw0LjksMy4zYzIuOSw0LjksNy41LDMuNSw5LjMsMi43YzAuMy0yLjEsMS4xLTMuNSwyLTQuM2MtNy4xLTAuOC0xNC42LTMuNi0xNC42LTE1LjhjMC0zLjUsMS4yLTYuMywzLjMtOC42DQoJYy0wLjMtMC44LTEuNC00LjEsMC4zLTguNWMwLDAsMi43LTAuOSw4LjgsMy4zYzIuNi0wLjcsNS4zLTEuMSw4LTEuMWMyLjcsMCw1LjUsMC40LDgsMS4xYzYuMS00LjEsOC44LTMuMyw4LjgtMy4zDQoJYzEuNyw0LjQsMC42LDcuNywwLjMsOC41YzIuMSwyLjIsMy4zLDUuMSwzLjMsOC42YzAsMTIuMy03LjUsMTUtMTQuNiwxNS44YzEuMSwxLDIuMiwyLjksMi4yLDUuOWMwLDQuMywwLDcuNywwLDguOA0KCWMwLDAuOSwwLjYsMS45LDIuMiwxLjVDNTUuOCw1OS45LDY1LDQ3LjksNjUsMzMuOEM2NSwxNi4xLDUwLjcsMS44LDMzLDEuOHoiLz4NCjxwYXRoIGZpbGwtcnVsZT0iZXZlbm9kZCIgY2xpcC1ydWxlPSJldmVub2RkIiBmaWxsPSIjRkZGRkZGIiBkPSJNOTksMS44Yy0xNy43LDAtMzIsMTQuMy0zMiwzMmMwLDE0LjEsOS4yLDI2LjEsMjEuOSwzMC40DQoJYzEuNiwwLjMsMi4yLTAuNywyLjItMS41YzAtMC44LDAtMi44LDAtNS40Yy04LjksMS45LTEwLjgtNC4zLTEwLjgtNC4zYy0xLjUtMy43LTMuNi00LjctMy42LTQuN2MtMi45LTIsMC4yLTEuOSwwLjItMS45DQoJYzMuMiwwLjIsNC45LDMuMyw0LjksMy4zYzIuOSw0LjksNy41LDMuNSw5LjMsMi43YzAuMy0yLjEsMS4xLTMuNSwyLTQuM2MtNy4xLTAuOC0xNC42LTMuNi0xNC42LTE1LjhjMC0zLjUsMS4yLTYuMywzLjMtOC42DQoJYy0wLjMtMC44LTEuNC00LjEsMC4zLTguNWMwLDAsMi43LTAuOSw4LjgsMy4zYzIuNi0wLjcsNS4zLTEuMSw4LTEuMWMyLjcsMCw1LjUsMC40LDgsMS4xYzYuMS00LjEsOC44LTMuMyw4LjgtMy4zDQoJYzEuNyw0LjQsMC42LDcuNywwLjMsOC41YzIuMSwyLjIsMy4zLDUuMSwzLjMsOC42YzAsMTIuMy03LjUsMTUtMTQuNiwxNS44YzEuMSwxLDIuMiwyLjksMi4yLDUuOWMwLDQuMywwLDcuNywwLDguOA0KCWMwLDAuOSwwLjYsMS45LDIuMiwxLjVjMTIuNy00LjIsMjEuOS0xNi4yLDIxLjktMzAuNEMxMzEsMTYuMSwxMTYuNywxLjgsOTksMS44eiIvPg0KPC9zdmc+DQo=);\n  background-size: 28px 14px;\n  background-repeat: no-repeat;\n  background-position: 0 0;\n}\n.gh-btn:hover .gh-ico,\n.gh-btn:focus .gh-ico,\n.gh-btn:active .gh-ico {\n  background-position: -14px 0;\n}\n.gh-count {\n  position: relative;\n  display: none; /* hidden to start */\n  margin-left: 4px;\n  background-color: #fafafa;\n  border: 1px solid #d4d4d4;\n}\n.gh-count:hover,\n.gh-count:focus {\n  color: #4183C4;\n}\n.gh-count:before,\n.gh-count:after {\n  content: '';\n  position: absolute;\n  display: inline-block;\n  width: 0;\n  height: 0;\n  border-color: transparent;\n  border-style: solid;\n}\n.gh-count:before {\n  top: 50%;\n  left: -3px;\n  margin-top: -4px;\n  border-width: 4px 4px 4px 0;\n  border-right-color: #fafafa;\n}\n.gh-count:after {\n  top: 50%;\n  left: -4px;\n  z-index: -1;\n  margin-top: -5px;\n  border-width: 5px 5px 5px 0;\n  border-right-color: #d4d4d4;\n}\n.github-btn-large {\n  height: 30px;\n}\n.github-btn-large .gh-btn,\n.github-btn-large .gh-count {\n  padding: 3px 10px 3px 8px;\n  font-size: 16px;\n  line-height: 22px;\n  border-radius: 4px;\n}\n.github-btn-large .gh-ico {\n  width: 20px;\n  height: 20px;\n  background-size: 40px 20px;\n}\n.github-btn-large .gh-btn:hover .gh-ico,\n.github-btn-large .gh-btn:focus .gh-ico,\n.github-btn-large .gh-btn:active .gh-ico {\n  background-position: -20px 0;\n}\n.github-btn-large .gh-count {\n  margin-left: 6px;\n}\n.github-btn-large .gh-count:before {\n  left: -5px;\n  margin-top: -6px;\n  border-width: 6px 6px 6px 0;\n}\n.github-btn-large .gh-count:after {\n  left: -6px;\n  margin-top: -7px;\n  border-width: 7px 7px 7px 0;\n}\n</style>\n<span class=\"github-btn\" id=\"github-btn\">\n  <a class=\"gh-btn\" id=\"gh-btn\" href=\"#\" target=\"_blank\">\n    <span class=\"gh-ico\"></span>\n    <span class=\"gh-text\" id=\"gh-text\"></span>\n  </a>\n  <a class=\"gh-count\" id=\"gh-count\" href=\"#\" target=\"_blank\"></a>\n</span>\n<script>\n  // Read a page's GET URL variables and return them as an associative array.\n  // Source: http://jquery-howto.blogspot.com/2009/09/get-url-parameters-values-with-jquery.html\n  var params = function () {\n    var vars = [], hash;\n    var hashes = window.location.href.slice(window.location.href.indexOf('?') + 1).split('&');\n    for(var i = 0; i < hashes.length; i++) {\n      hash = hashes[i].split('=');\n      vars.push(hash[0]);\n      vars[hash[0]] = hash[1];\n    }\n    return vars;\n  }()\n  var user = params.user,\n  repo = params.repo,\n  type = params.type,\n  count = params.count,\n  size = params.size,\n  head = document.getElementsByTagName('head')[0],\n  button = document.getElementById('gh-btn'),\n  mainButton = document.getElementById('github-btn'),\n  text = document.getElementById('gh-text'),\n  counter = document.getElementById('gh-count');\n\n\n  // Add commas to numbers\n  function addCommas(n) {\n    return String(n).replace(/(\\d)(?=(\\d{3})+$)/g, '$1,')\n  }\n\n  function jsonp(path) {\n    var el = document.createElement('script');\n    el.src = path + '?callback=callback';\n    head.insertBefore(el, head.firstChild);\n  }\n\n  function callback(obj) {\n    if (type == 'watch') {\n      counter.innerHTML = addCommas(obj.data.watchers);\n    } else if (type == 'fork') {\n      counter.innerHTML = addCommas(obj.data.forks);\n    } else if (type == 'follow') {\n      counter.innerHTML = addCommas(obj.data.followers);\n    }\n\n    // Show the count if asked\n    if (count == 'true') {\n      counter.style.display = 'block';\n    }\n  }\n\n  // Set href to be URL for repo\n  button.href = 'https://github.com/' + user + '/' + repo + '/';\n\n  // Add the class, change the text label, set count link href\n  if (type == 'watch') {\n    mainButton.className += ' github-watchers';\n    text.innerHTML = 'Star';\n    counter.href = 'https://github.com/' + user + '/' + repo + '/stargazers';\n  } else if (type == 'fork') {\n    mainButton.className += ' github-forks';\n    text.innerHTML = 'Fork';\n    counter.href = 'https://github.com/' + user + '/' + repo + '/network';\n  } else if (type == 'follow') {\n    mainButton.className += ' github-me';\n    text.innerHTML = 'Follow @' + user;\n    button.href = 'https://github.com/' + user;\n    counter.href = 'https://github.com/' + user + '/followers';\n  }\n\n  // Change the size\n  if (size == 'large') {\n    mainButton.className += ' github-btn-large';\n  }\n\n  if (type == 'follow') {\n    jsonp('https://api.github.com/users/' + user);\n  } else {\n    jsonp('https://api.github.com/repos/' + user + '/' + repo);\n  }\n</script></body></html>\n"
  },
  {
    "path": "docs/manual/source/index.html.md.erb",
    "content": "---\ntitle: Welcome to Apache PredictionIO®!\ndescription: Apache PredictionIO® Open Source Machine Learning Server\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## What is Apache PredictionIO®?\n\nApache PredictionIO® is an **open source Machine Learning Server**\nbuilt on top of a state-of-the-art open source stack for developers and data\nscientists to create predictive engines for any machine learning task. It lets you:\n\n* quickly build and deploy an engine as a web service on production with\n  [customizable templates](/gallery/template-gallery);\n* respond to dynamic queries in **real-time** once deployed as a web service;\n* evaluate and tune multiple engine variants systematically;\n* unify data from multiple platforms in batch or in real-time for comprehensive\n  predictive analytics;\n* speed up machine learning modeling with systematic processes and pre-built\n  evaluation measures;\n* support machine learning and data processing libraries such as Spark MLLib and\n  OpenNLP;\n* implement your own machine learning models and seamlessly incorporate them\n  into your engine;\n* simplify data infrastructure management.\n\nApache PredictionIO® can be [installed](/install/) as a full machine\nlearning stack, bundled with **Apache Spark**, **MLlib**, **HBase**, **Akka HTTP**\nand **Elasticsearch**, which simplifies and accelerates scalable machine\nlearning infrastructure management.\n\n| Getting Started | Developer Guides | Machine Learning Education and Usage | PredictionIO SDKs |\n| --------------- | ---------------- | ------------------------------------ | ----------------- |\n| [Quick Intro](/start/) | [System Architecture](/system/) | [Demo: Recommending Comics](/demo/tapster/) | [Java](/sdk/java/) |\n| [Installation Guide](/install/) | [Event Server Overview](/datacollection/) | [Text Classification](/demo/textclassification/) | [PHP](/sdk/php/) |\n| [Downloading Template](/start/download/) | [Collecting Data](/datacollection/eventapi/) | [Community Contributed Demo](/community/projects.html#demos) | [Python](/sdk/python/) |\n| [Deploying an Engine](/start/deploy/) | [Learning DASE](/customize/) |[Dimensionality Reduction](/machinelearning/dimensionalityreduction/)| [Ruby](/sdk/ruby/) |\n| [Customizing an Engine](/start/customize/) | [Implementing DASE](/customize/dase/) ||[Community Contributed](/community/projects.html#sdks) |\n| [App Integration Overview](/appintegration/) | [Evaluation Overview](/evaluation/) |||\n|| [Intellij IDEA Guide](/resources/intellij/) |||\n|| [Scala API](/api/current/#package) |||\n\n\n## Release Notes\n\nA summary of changes in each release can be found\n[here](https://github.com/apache/predictionio/blob/livedoc/RELEASE.md).\n\n## Licensing\n\nApache PredictionIO® is licensed under the Apache License, Version\n2.0. See\n[LICENSE](https://github.com/apache/predictionio/blob/master/LICENSE.txt)\nfor the full license text.\n"
  },
  {
    "path": "docs/manual/source/install/index.html.md.erb",
    "content": "---\ntitle: Installing Apache PredictionIO®\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Prerequisites\n\nIt is **very important** to meet the version of the following\ntechnologies that power Apache PredictionIO®.\n\n* Java SE Development Kit 8\n* Apache Spark 2.0+\n* Apache Hadoop 2.6, 2.7\n\nand one of the following sets:\n\n* PostgreSQL 9.6 or MySQL 5.1\n* Apache HBase 1.2\n* Elasticsearch 6.x, 5.6(deprecated)\n\n## Installation\n\nPre-built for the following versions\n\n* Scala 2.11\n* Apache Spark 2.4\n* Apache Hadoop 2.7\n* Elasticsearch 6.8\n\n* [Downloading Binary Distribution](install-sourcecode.html#downloading-binary-distribution)\n\nBuilding Apache PredictionIO\n\n* [Downloading Source Code](install-sourcecode.html#downloading-source-code)\n\nDocker\n\n* [Installing Apache PredictionIO with Docker](install-docker.html)\n"
  },
  {
    "path": "docs/manual/source/install/install-docker.html.md.erb",
    "content": "---\ntitle: Installing Apache PredictionIO® with Docker\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Download and Start Docker\n\nDocker is a widely used container solution. Please download and start Docker by following their [guide](https://www.docker.com/get-started).\n\n## Get PredictionIO and Dependencies Configuration\n\nStarting from v0.13.0, Apache PredictionIO® starts to provide docker support for the production environment. `Dockerfile` and dependencies configuration can be found in the `docker` folder in the [git repository](https://github.com/apache/predictionio/tree/develop/docker).\n\n```bash\ngit clone https://github.com/apache/predictionio.git\ncd predictionio/docker\n```\n\nINFO: In this installation, we only need the `docker` sub-directory in the repository. One can use other tools to get the folder without cloning the whole project.\n\n## Build Docker Image\n\nTo build PredictionIO docker image, `Dockerfile` is provided in sub-directory `pio`.\n\n```\ndocker build -t predictionio/pio pio\n```\n\nOne will be able to build an image with tag `prediction/pio:latest` using the above command.\n\nWARNING: People can get PredictionIO image from Dockerhub through `docker pull predictionio/pio`. However, since the image cannot run without a properly configured storage, please follow the following steps to complete the installation.\n\nWARNING: Image `prediction/pio` hosted on Dockerhub is **NOT** regarded as an official ASF release and might provide a different PredictionIO version from your desired PredictionIO version. It is recommended to build the image locally other than pulling directly from Dockerhub.\n\n## Pull Images and Start\n\nIn this repository, PostgreSQL, MySQL, ElasticSearch, and local file system are supported with their corresponding configuration.\n\n### Supported storages are as below:\n\nEvent Storage\n\n - PostgreSQL, MySQL, Elasticsearch\n\nMetadata Storage\n\n - PostgreSQL, MySQL, Elasticsearch\n\nModel Storage\n\n - PostgreSQL, MySQL, LocalFS\n\nOne can use `docker-compose -f` to pull and start the corresponding services. More details are provided in [this document](https://github.com/apache/predictionio/blob/develop/docker/README.md#run-predictionio-with-selectable-docker-compose-files).\n\n### Service Starting Sample\n\n```\ndocker-compose -f docker-compose.yml \\\n    -f pgsql/docker-compose.base.yml \\\n    -f pgsql/docker-compose.meta.yml \\\n    -f pgsql/docker-compose.event.yml \\\n    -f pgsql/docker-compose.model.yml \\\n    up\n```\n\nIn this examples, we pull and start `predictionio/pio` image with `docker-compose.yml`.\n\nAnd pull `postgres:9` image with `pgsql/docker-compose.base.yml`.\n\nAnd config PostgreSQL to store our metadata, event, and model with `pgsql/docker-compose.meta.yml`, `pgsql/docker-compose.event.yml`, and `pgsql/docker-compose.model.yml`.\n\nAfter pulling the images, the script will start PostgreSQL, Apache PredictionIO, and Apache Spark. The event server should be ready at port `7070`, and one should see these logs in the command line interface.\n\n```\n...\npio_1       | [INFO] [Management$] Your system is all ready to go.\npio_1       | [INFO] [Management$] Creating Event Server at 0.0.0.0:7070\npio_1       | [INFO] [HttpListener] Bound to /0.0.0.0:7070\npio_1       | [INFO] [EventServerActor] Bound received. EventServer is ready.\n```\n\n## Verifying Service\n\nA command tool `pio-docker` is provided to invoke `pio` command in the PredictionIO container. Set `pio-docker` to default execution path and use `status` to check the current PredictionIO service with the following script.\n\n```bash\n$ export PATH=`pwd`/bin:$PATH\n$ pio-docker status\n```\n\nOne should be able to see the corresponding log in the following structure, and your system is ready to go!\n\n```\n[INFO] [Management$] Inspecting PredictionIO...\n[INFO] [Management$] PredictionIO 0.13.0 is installed at /usr/share/predictionio\n[INFO] [Management$] Inspecting Apache Spark...\n[INFO] [Management$] Apache Spark is installed at /usr/share/spark-2.2.2-bin-hadoop2.7\n[INFO] [Management$] Apache Spark 2.2.2 detected (meets minimum requirement of 1.3.0)\n[INFO] [Management$] Inspecting storage backend connections...\n[INFO] [Storage$] Verifying Meta Data Backend (Source: PGSQL)...\n[INFO] [Storage$] Verifying Model Data Backend (Source: PGSQL)...\n[INFO] [Storage$] Verifying Event Data Backend (Source: PGSQL)...\n[INFO] [Storage$] Test writing to Event Store (App Id 0)...\n[INFO] [Management$] Your system is all ready to go.\n```\n\nINFO: After the service is up, one can continue by changing `pio` to `pio-docker` for further deployment. More details are provided in [this document](https://github.com/apache/predictionio/tree/develop/docker#tutorial).\n\n## Community Docker Support\n\n[More PredictionIO Docker packages supported by our great community](/community/projects.html#docker-images).\n"
  },
  {
    "path": "docs/manual/source/install/install-sourcecode.html.md.erb",
    "content": "---\ntitle: Installing Apache PredictionIO®\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nINFO: Assuming you are following the directory structure in the following,\nreplace `/home/abc` with your own home directory wherever you see it.\n\n## Downloading Binary Distribution\n\nDownload [binary release from an Apache\nmirror](https://www.apache.org/dyn/closer.lua/predictionio/<%= data.versions.pio\n%>/apache-predictionio-<%= data.versions.pio %>-bin.tar.gz).\n\n### Verifying Release\n\nVerify binary release using the [signatures and checksums]\n(https://archive.apache.org/dist/predictionio/<%= data.versions.pio %>/)\nand [project release KEYS](https://www.apache.org/dist/predictionio/KEYS).\n\n```\n$ gpg --import KEYS\n$ gpg --verify apache-predictionio-<%= data.versions.pio %>-bin.tar.gz.asc apache-predictionio-<%= data.versions.pio %>-bin.tar.gz\n```\n\nYou should see something like this.\n\n```\ngpg: Signature made Tue Sep 26 22:55:22 2017 PDT\ngpg:                using RSA key 7E2363D84719A8F4\ngpg: Good signature from \"Chan Lee <chanlee@apache.org>\" [ultimate]\n```\n\nFor further information, the [official guide from\nApache](https://www.apache.org/info/verification.html) has the most up-to-date\nand complete information.\n\n### Installation\n\nExtract the binary distribution and proceed to\n[Installing Dependencies](#installing-dependencies).\n\n```\n$ tar zxvf apache-predictionio-<%= data.versions.pio %>-bin.tar.gz\n```\n\n## Downloading Source Code\n\nDownload [source release from an Apache\nmirror](https://archive.apache.org/dist/predictionio/<%= data.versions.pio\n%>/apache-predictionio-<%= data.versions.pio %>.tar.gz).\n\n### Verifying Release\n\nVerify source release using [signatures and checksums]\n(https://archive.apache.org/dist/predictionio/<%= data.versions.pio %>/)\nand [project release KEYS](https://www.apache.org/dist/predictionio/KEYS).\n\n```\n$ gpg --import KEYS\n$ gpg --verify apache-predictionio-<%= data.versions.pio %>.tar.gz.asc apache-predictionio-<%= data.versions.pio %>.tar.gz\n```\n\nYou should see something like this.\n\n```\ngpg: Signature made Tue Sep 26 22:55:22 2017 PDT\ngpg:                using RSA key 7E2363D84719A8F4\ngpg: Good signature from \"Chan Lee <chanlee@apache.org>\" [ultimate]\n```\n\nFor further information, the [official guide from\nApache](https://www.apache.org/info/verification.html) has the most up-to-date\nand complete information.\n\n### Building\n\nRun the following at the directory where you downloaded the source code to build\n\nApache PredictionIO®. By default, the build will be against\n\n* Scala 2.11.8\n* Spark 2.1.1\n* Hadoop 2.7.7\n* Elasticsearch 5.6.9\n\n\n```\n$ tar zxvf apache-predictionio-<%= data.versions.pio %>.tar.gz\n$ cd apache-predictionio-<%= data.versions.pio %>\n$ ./make-distribution.sh -Dscala.version=2.11.12 -Dspark.version=2.4.0 -Delasticsearch.version=6.4.2\n```\n\nYou should see something like the following when it finishes building\nsuccessfully.\n\n```\n...\nPredictionIO-<%= data.versions.pio %>/sbt/sbt\nPredictionIO-<%= data.versions.pio %>/conf/\nPredictionIO-<%= data.versions.pio %>/conf/pio-env.sh\nPredictionIO binary distribution created at PredictionIO-<%= data.versions.pio %>.tar.gz\n```\n\nExtract the binary distribution you have just built.\n\n```\n$ tar zxvf PredictionIO-<%= data.versions.pio %>.tar.gz\n```\n\n## Installing Dependencies\n\nLet us install dependencies inside a subdirectory of the Apache PredictionIO\ninstallation. By following this convention, you can use\nApache PredictionIO's default configuration as is.\n\n```\n$ mkdir PredictionIO-<%= data.versions.pio %>/vendors\n```\n\n\n### Spark Setup\n\n<%= partial 'shared/install/spark' %>\n\n\n### Storage Setup\n\n#### <a name=\"pgsql\"></a>PostgreSQL Setup\n\nWARNING: You may skip this section if you are not using PostgreSQL.\n\n<%=partial 'shared/install/postgres' %>\n\n#### HBase and Elasticsearch Setup\n\n##### Elasticsearch Setup\n\nWARNING: You may skip this section if you are not using Elasticsearch.\n\n<%= partial 'shared/install/elasticsearch' %>\n\n\n##### <a name=\"hbase\"></a>HBase Setup\n\nWARNING: You may skip this section if you are not using HBase.\n\n<%= partial 'shared/install/hbase' %>\n\n\nIn addition, you must set your environment variable `JAVA_HOME`. For example, in\n`/home/abc/.bashrc` add the following line:\n\n```\nexport JAVA_HOME=/usr/lib/jvm/java-8-oracle\n```\n\n\n<%= partial 'shared/install/dependent_services' %>\n\n\n\n\n\nNow you have installed everything you need!\n\n<%= partial 'shared/install/proceed_template' %>\n"
  },
  {
    "path": "docs/manual/source/install/sdk.html.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n"
  },
  {
    "path": "docs/manual/source/javascripts/application.js",
    "content": "//= require 'jquery'\n//= require 'Tabslet'\n//= require 'jcarousel'\n\n// Licensed to the Apache Software Foundation (ASF) under one or more\n// contributor license agreements.  See the NOTICE file distributed with\n// this work for additional information regarding copyright ownership.\n// The ASF licenses this file to You under the Apache License, Version 2.0\n// (the \"License\"); you may not use this file except in compliance with\n// the License.  You may obtain a copy of the License at\n//\n//    http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\nwindow.onresize = function() {\n  adjustContentImageWidth();\n}\n\n$(document).ready(function() {\n\n  adjustContentImageWidth();\n\n  // header menu toggler\n  $('#drawer-toggle').click(function() {\n    toggleDrawer(\n      document.getElementById('drawer-toggle'),\n      document.getElementById('menu-wrapper')\n    );\n  })\n\n  // mobile nav menu toggler\n  $(\".mobile-left-menu-toggler\").click(function() {\n    var isActive = $('#left-menu-indicator').hasClass('active');\n    if (isActive) {\n    $('#left-menu-indicator').attr(\"src\", '/images/icons/down-arrow.png');\n    } else {\n      $('#left-menu-indicator').attr(\"src\", '/images/icons/up-arrow.png');\n    }\n    $('#left-menu-wrapper').toggleClass('active');\n    $('#left-menu-indicator').toggleClass('active');\n  })\n\n  // search box toggler\n  $('.search-box-toggler').click(function() {\n    $('.search-form').toggleClass('active');\n    $('.st-search-input').focus();\n  })\n\n  $('.st-search-input').focusout(function() {\n    $('.search-form').toggleClass('active');\n  })\n\n  var toggleDrawer = function(icon, menu){\n    if (menu.classList.contains(\"active\")) {\n      icon.classList.remove(\"active\");\n      menu.classList.remove(\"active\");\n    } else {\n      icon.classList.add(\"active\");\n      menu.classList.add(\"active\");\n    }\n  }\n\n  // mobile search box toggler\n  $('.mobile-search-bar-toggler').click(function() {\n    $('.swiftype-wrapper').addClass('active');\n    $('.st-search-input').focus();\n  });\n\n  $('.swiftype-row-hider').click(function() {\n    $('.swiftype-wrapper').removeClass('active');\n  })\n\n  // add function call to subscription form\n  $( \"form.ajax-form\" ).each(function( index ) {\n    $(this).ajaxForm();\n  });\n\n  function navExpand(link) {\n    link.removeClass('expandible').addClass('collapsible');\n    link.children('i').removeClass('fa-caret-right').addClass('fa-caret-down');\n    link.next('ul').show();\n  }\n\n  function navCollapse(link) {\n    link.removeClass('collapsible').addClass('expandible');\n    link.children('i').removeClass('fa-caret-down').addClass('fa-caret-right');\n    link.next('ul').hide();\n  }\n\n  // Main Navigation\n  $('#nav-main a').on('click', function(event) {\n    var $this = $(this);\n\n    if ($this.hasClass('expandible')) {\n      navExpand($this);\n      event.preventDefault();\n    } else if ($this.hasClass('collapsible')) {\n      navCollapse($this);\n      event.preventDefault();\n    }\n  });\n\n  $('#nav-main .active').parentsUntil('#nav-main').each(function() {\n    $(this).children('.expandible').each(function() {\n      var $this = $(this);\n      navExpand($this);\n    });\n  });\n\n  $('#content').on('click', function(event) {\n    $('body').removeClass('active-navigation')\n  });\n\n  $('#active-navigation').on('click', function(event) {\n    event.preventDefault();\n    $('body').toggleClass('active-navigation')\n  });\n\n  if ($('#table-of-contents').is(':empty')) {\n    $('#table-of-contents').addClass('empty')\n  }\n\n  // Tabslet\n  $('.tabs').tabslet();\n\n  // Tab Syncing\n  $('.control li').on('mousedown', function(event) {\n    lang = $(this).data('lang')\n    $('.control li[data-lang=\"' + lang + '\"]').each(function() {\n      $(this).children('a:first').trigger('click')\n    });\n  });\n\n  // External Links\n  $(\"a[href^='http']\").each(function() {\n    $(this).click(function(event) {\n      event.preventDefault();\n      window.open(this.href);\n    }).addClass('external');\n  });\n});\n\n//ajax form submit\n$.fn.ajaxForm = function() {\n  var $form, request, $result, emailRegex, $submitInput;\n  $form = $(this);\n  $result = $form.find('.result');\n  $submitInput = $form.find(\"input[type=submit]\");\n  emailRegex = /^[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/;\n\n  $form.submit(function(event) {\n    event.preventDefault();\n    var validationMessage, valid = true;\n\n    $.each([ '.required', 'input[type=email]', '[data-match-string]'], function( i, value ) {\n      $form.find(value).each(function(j){\n        $(this).removeClass('error');\n        var result = null;\n        switch(i) {\n          case 0:\n            result = validateRequired($(this));\n            break;\n          case 1:\n            result = validateEmail($(this));\n            break;\n          case 2:\n            result = validateMatch($(this));\n            break;\n          default:\n            break;\n        }\n        if (result && result['pass'] === false) {\n          $(this).addClass('error');\n          valid = false;\n          validationMessage = result['errorMessage'];\n        }\n      });\n      return valid;\n    });\n\n    if (!valid) {\n      $result.addClass('error');\n      $result.text(validationMessage);\n      return;\n    }\n\n    $result.removeClass('error');\n    $result.text('');\n    if (request) {\n      request.abort();\n    }\n\n    var $inputs = $form.find(\"input, select, button, textarea\");\n    var serializedData = $form.serialize();\n    $submitInput.val($submitInput.data('state-loading'));\n    disableForm();\n    request = $.ajax({\n      url: $form.attr('action'),\n      type: \"POST\",\n      dataType: \"jsonp\",\n      crossDomain: true,\n      data: serializedData+ \"&prefix=formCallBack\",\n      jsonpCallback: \"formCallBack\",\n      success: function(data) {\n        if (data && data.result == \"success\") {\n          onFormSubmitSuccess();\n        } else {\n          onFormSubmitError();\n          console.error(\"error: \", data);\n        }\n      },\n      error: function(jqXHR, textStatus, errorThrown) {\n        onFormSubmitError();\n        console.error(\n          \"error: \"+ textStatus, errorThrown\n        );\n      }\n    });\n\n    function enableForm() {\n      $inputs.prop(\"disabled\", false);\n    };\n\n    function disableForm() {\n      $inputs.prop(\"disabled\", true);\n    };\n\n    function onFormSubmitSuccess() {\n      $submitInput.val($submitInput.data('state-sucess'));\n    };\n\n    function onFormSubmitError() {\n      $submitInput.val($submitInput.data('state-normal'));\n      enableForm();\n      $result.addClass('error');\n      $result.html('Oops! An error has occurred.');\n    }\n  });\n\n  function validateRequired($input) {\n    if (!$input.val()) {\n      return {\n        'pass': false,\n        'errorMessage': 'Please fill out all required fields.'\n      }\n    } else {\n      return {\n        'pass': true\n      }\n    }\n  };\n\n  function validateEmail($input) {\n    if ($input.val() && !emailRegex.test($input.val())) {\n      return {\n        'pass': false,\n        'errorMessage': 'Please input valid email address.'\n      }\n    } else {\n      return {\n        'pass': true\n      }\n    }\n  };\n\n  function validateMatch($input) {\n    if ($input.val() !== $input.data('match-string')) {\n      return {\n        'pass': false,\n        'errorMessage': \"Input doesn't match.\"\n      }\n    } else {\n      return {\n        'pass': true\n      }\n    }\n  };\n};\n\nfunction formCallBack(data) {};\n\nvar adjustContentImageWidth = function() {\n  // prevent image in place of table of content getting squeezed to next row\n  var tableOfContent = document.getElementById('table-of-content-wrapper');\n  var rect = tableOfContent.getBoundingClientRect();\n\n  $('.content img').each(function() {\n    var withinTableOfContentRow = this.getBoundingClientRect().top > rect.bottom;\n    if (withinTableOfContentRow) {\n      $(this).addClass('default-width');\n    } else {\n      $(this).removeClass('default-width');\n    }\n  })\n}\n"
  },
  {
    "path": "docs/manual/source/javascripts/tryit.js",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one or more\n// contributor license agreements.  See the NOTICE file distributed with\n// this work for additional information regarding copyright ownership.\n// The ASF licenses this file to You under the Apache License, Version 2.0\n// (the \"License\"); you may not use this file except in compliance with\n// the License.  You may obtain a copy of the License at\n//\n//    http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n$(document).ready(function() {\n  // Carousel Initialization\n  $('.jcarousel')\n    .jcarousel({\n      // Options go here\n    });\n\n  // Prev Control Initialization\n  $('.jcarousel-control-prev')\n    .on('jcarouselcontrol:active', function() {\n      $(this).removeClass('inactive');\n    })\n    .on('jcarouselcontrol:inactive', function() {\n      $(this).addClass('inactive');\n    })\n    .jcarouselControl({\n      // Options go here\n      target: '-=1'\n    });\n\n  // Next Control Initialization\n  $('.jcarousel-control-next')\n    .on('jcarouselcontrol:active', function() {\n      $(this).removeClass('inactive');\n    })\n    .on('jcarouselcontrol:inactive', function() {\n      $(this).addClass('inactive');\n    })\n    .jcarouselControl({\n      // Options go here\n      target: '+=1'\n    });\n\n  // Pagination Initialization\n  $('.jcarousel-pagination')\n    .on('jcarouselpagination:active', 'li', function() {\n      $(this).addClass('active');\n    })\n    .on('jcarouselpagination:inactive', 'li', function() {\n      $(this).removeClass('active');\n    })\n    .jcarouselPagination({\n      // Options go here\n      'item': function(page, carouselItems) {\n        return '<li><a href=\"#' + page + '\">' + page + '</a></li>';\n      }\n    });\n\n  $('#tryit-start').on('click', function() {\n    $('.jcarousel').jcarousel('scroll', 1);\n  });\n});\n"
  },
  {
    "path": "docs/manual/source/layouts/layout.html.slim",
    "content": "doctype html\nhtml\n  head\n    = partial 'head/base'\n  body\n    #global\n      = partial 'header'\n      = partial 'search_bar'\n      #page.container-fluid\n        .row\n          #left-menu-wrapper.col-md-3\n            = partial 'nav/main'\n          .col-md-9.col-sm-12\n            .content-header.hidden-md.hidden-lg\n              = breadcrumbs\n              #page-title\n                = page_title\n\n            = partial 'table_of_content'\n\n            .content-header.hidden-sm.hidden-xs\n              = breadcrumbs\n              #page-title\n                = page_title\n            .content\n              = yield\n      = partial 'footer'\n    = partial 'swiftype'\n    = javascript_include_tag  'application'\n"
  },
  {
    "path": "docs/manual/source/layouts/tryit.html.slim",
    "content": "doctype html\nhtml\n  head\n    = partial 'head/base'\n  body.tryit\n    #global\n      = partial 'header'\n      #page.container\n        main#main role=\"main\" data-swiftype-index=\"true\"\n          #content\n            = yield\n      = partial 'footer'\n    = partial 'swiftype'\n    = javascript_include_tag  'application'\n    = javascript_include_tag  'tryit'\n"
  },
  {
    "path": "docs/manual/source/machinelearning/dimensionalityreduction.html.md",
    "content": "---\ntitle: Dimensionality Reduction With PredictionIO\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThe purpose of this guide is to teach developers how to incorporate \"dimensionality reduction\" into a PredictionIO engine [Principal Component Analysis](https://en.wikipedia.org/wiki/Principal_component_analysis) (PCA) on the [MNIST digit recognition dataset](https://www.kaggle.com/c/digit-recognizer). To do this, you will be modifying the PredictionIO [classification engine template](/gallery/template-gallery/#classification). This guide will demonstrate how to import the specific data set in batch, and also how to change the engine components in order to incorporate the new sample data and implement PCA.\n\nIn machine learning, specifically in [supervised learning](http://en.wikipedia.org/wiki/Supervised_learning), the general problem at hand is to predict a numeric outcome \\\\(y\\\\) from a numeric vector \\\\(\\bf{x}\\\\). The different components of \\\\(\\bf{x}\\\\) are called **features**, and usually represent observed values such as a hospital patient's age, weight, height, sex, etc. There are subtle issues that begin to arise as the number of features contained in each feature vector increases. We briefly list some of the issues that arise as the number of features grows in size:\n\n\n- **Computation:** The time complexity of machine learning algorithms often times depends on the number of features used. That is, the more features one uses for prediction, the more time it takes to train a model.\n\n- **Prediction Performance:** Often times there will be features that, when used in training, will actually decrease the predictive performance of a particular algorithm.  \n\n- **Curse of Dimensionality:** It is harder to make inference and predictions in high dimensional spaces simply due to the fact that we need to sample a lot more observations. Think about it in this way, suppose that we sample 100 points lying on a flat solid square, and 100 points in a solid cube. The 100 points from the square will likely take up a larger proportion of its area, in comparison to the proportion of the cube's volume that the points sampled from it occupy. Hence we would need to sample more points from the cube in order to get better estimates of the different properties of the cube, such as height, length, and width. This is shown in the following figure:\n\n| 100 Points Sampled From Unit Square                      | 100 Points Sampled From Unit Cube                    |\n| -------------------------------------------------------- | ---------------------------------------------------- |\n|                                                          |                                                      |\n| ![Square Samples](/images/machinelearning/featureselection/square100.png) | ![Cube Samples](/images/machinelearning/featureselection/cube100.png) |\n|                                                          |                                                      |\n\nDimensionality reduction is the process of applying a transformation to your feature vectors in order to produce a vector with the same or less number of features. Principal component Analysis (PCA) is a technique for dimensionality reduction. This can be treated as a data processing technique, and so with respect to the [DASE](/customize/) framework, it will fall into the Data Preparator engine component.\n\nThis guide will also help to solidify the concept of taking an engine template and customizing it for a particular use case: hand-written numeric digit recognition.\n\n## Data Example\n\nAs a guiding example, a base data set, the [MNIST digit recognition dataset](https://www.kaggle.com/c/digit-recognizer/data), is used. This is a perfect data set for dimensionality reduction, for, in this data set, the features that will be used for learning are pixel entries in a \\\\(28 \\times 28\\\\) pixel image. There is really no direct interpretation of any one feature, so that you do not lose anything in applying a transformation that will treat the features as [linear combinations](https://en.wikipedia.org/wiki/Linear_combination) of some set \"convenient\" vectors.\n\nNow, we first pull the [classification engine template](/gallery/template-gallery/#classification) via the following bash line\n\n```\ngit clone https://github.com/apache/predictionio-template-attribute-based-classifier.git <Your new engine directory>\n```\n\nYou should immediately be prompted with the following message:\n\n```\nPlease enter the template's Scala package name (e.g. com.mycompany):\n```\n\nGo ahead and input `FeatureReduction`, and feel free to just press enter for the remaining message prompts. For the remainder of this guide, you will be working in your new engine directory, so go ahead and `cd` into your new engine directory. At this point, go ahead and run the command\n\n```\npio build\n```\n\nThis will make sure that the PredictionIO dependency version for your project matches the version installed on your computer. Now, download the MNIST `train.csv` data set from the link above, and put this file in the `data` directory contained in the new engine directory.\n\n### **Optional**: Visualizing Observations\n\nIf you want to actually convert the observation pixel data to an image go ahead and create a Python script called `picture_processing.py` into your data directory and copy and paste the following code into the script:\n\n```python\nfrom PIL import Image\nimport sys\n\nobs_num = int(sys.argv[1])\n\nf = open('./data/train.csv', 'r').read().split('\\n')\nvar_names = f[0].split(',')\nf = f[1 : -1]\nf = [list(map(int, x[1 : ])) for x in (y.split(\",\") for y in f)]\n\n\ndef create_image(pixel_array):\n    img = Image.new('RGB', (28, 28))\n    pixels = img.load()\n    count = 0\n    for i in range(img.size[0]):\n        for j in range(img.size[1]):\n            pixels[i, j] = (i, j, pixel_array[count])\n            count += 1\n    return img\n\ncreate_image(f[obs_num]).show()\n```\nTo use this run the following line:\n\n```\npython data/picture_processing.py k\n```\n\nwhere you will replace `k` with an integer between 0 and 41999 (referring to an observation number). This script uses the [Python pillow](https://python-pillow.github.io/) library, and, if you have it installed, the above command should open up a window with an image of a hand-written numerical digit.\n\n### Importing the Data\n\nYou will use the [PredictionIO Python SDK](/sdk/python/) to prepare the data for batch import. Go ahead and create a Python script called `export_events.py` in the same `data` directory, and copy and paste the following code:\n\n```python\n\"\"\"\nImport digit recognition data.\n\"\"\"\n\nimport predictionio\nimport argparse\nimport pytz\nfrom datetime import datetime\n\n### Remove the variable name line, and last line.\nf = open(\"./data/train.csv\", \"r\").read().split(\"\\n\")[1 : -1]\n\n### Separate your observations into a tuple (label, pixel list).\nf = [(int(x[0]), list(map(int, x[1 : ]))) for x in (y.split(\",\") for y in f)]\n\n### JSON event exporter.\nexporter = predictionio.FileExporter(\"./data/digits.json\")\n\n\ncount = 0\nprint(\"Exporting events to JSON batch file........\")\nfor elem in f:\n  exporter.create_event(\n    event=\"digitData\",\n    entity_type=\"digit\",\n    entity_id=str(count), # use the count num as user ID\n    properties= {\n      \"label\":elem[0],\n      \"features\":str(elem[1])[1 : -1]\n    },\n    event_time = datetime.now(pytz.utc)\n  )\n  count += 1\nprint(\"Exported {} events.\".format(str(count)))\n\n```\n\nThis will import the data into the [event server](/datacollection/) in a manner that will facilitate its processing in the Classification engine, although you will also need to modify the engine accordingly. In your new engine directory, run the above script via the following:\n\n```\npython data/export_events.py\n```\n\nThis will create a file `digits.json` in your engine `data` directory. We will create a new application called `FeatureReduction` via the command:\n\n```\npio app new FeatureReduction\n```\n\nThis will create an application associated to an application ID and an access key. To import the data, you use the command in your engine directory:\n\n```\npio import --appid <Your application ID> --input data/digits.json\n```\n\nIf the data has been successfully imported, you should see output of the form:\n\n```\n...\n[INFO] [Remoting] Starting remoting\n[INFO] [Remoting] Remoting started; listening on addresses :[akka.tcp://sparkDriver@10.0.0.30:65523]\n[INFO] [FileToEvents$] Events are imported.                                     \n[INFO] [FileToEvents$] Done.\n```\n\nThe data is now in the event server.\n\n\n## Principal Component Analysis\n\n\nPCA begins with the data matrix \\\\(\\bf X\\\\) whose rows are feature vectors corresponding to a set of observations. In our case, each row represents the pixel information of the corresponding hand-written numeric digit image. The model then computes the [covariance matrix](https://en.wikipedia.org/wiki/Covariance_matrix) estimated from the data matrix \\\\(\\bf X\\\\). The algorithm then takes the covariance matrix and computes the [eigenvectors](https://en.wikipedia.org/wiki/Eigenvalues_and_eigenvectors) that correspond to its \\\\(k\\\\) (some integer) largest [eigenvalues](https://en.wikipedia.org/wiki/Eigenvalues_and_eigenvectors). The data matrix is then mapped to the space generated by these \\\\(k\\\\) vectors, which are called the \\\\(k\\\\) **principal components** of \\\\(\\bf X\\\\). What this is doing is mapping the data observations into a lower-dimensional space that explains the largest variability in the data (contains the most information). The algorithm for implementing PCA is listed as follows:\n\n### PCA Algorithm\n\n**Input:** \\\\(N \\times p\\\\) data matrix \\\\(\\bf X\\\\); \\\\(k \\leq p\\\\), the number of desired features.\n\n**1.** For each column in the data matrix: compute the average of all the entries contained in the column, and then subtract this average from each of the column entries.\n\n**2.** Compute the \\\\(k\\\\) eigenvectors corresponding to the \\\\(k\\\\) largest eigenvalues of the matrix obtained in the first step.\n\n**Output:** \\\\(p \\times k\\\\) matrix \\\\(P\\\\) whose \\\\(k\\\\) rows are the eigenvectors computed in the second step.\n\nNow, to transform a \\\\(p \\times 1\\\\) feature vector \\\\(\\bf {x}\\\\), you multiply by the matrix \\\\(P\\^T\\\\). Now, the vector \\\\(P\\^T {\\bf x}\\\\) is a feature vector with only \\\\(k\\\\) components, which has accomplished the desired dimensionality reduction. Also, as a side note, the first step in the algorithm reduces the covariance matrix computation to that of only performing [SVD](https://spark.apache.org/docs/1.3.1/mllib-dimensionality-reduction.html#singular-value-decomposition-svd) on matrix obtained from step 1, which is numerically preferred, and necessary to extract the required eigenvectors.\n\n\n## Modifying the Engine Template\n\nWe will be modifying the engine template by first re-defining our `Query` class located in the `Engine.scala` script as follows:\n\n```scala\nclass Query(\n  val features : String\n) extends Serializable\n```\n\nWe will continue to make the required engine modifications by following the [DASE](/customize/) workflow. The next step is then to modify the engine's `DataSource` class which is the engine component in charge of reading the data from the event server.\n\n### Data Source Modifications\n\nThe following changes will be made to the `DataSource` class. We will redefine the method `readTraining` as follows:\n\n```scala\n...\n\noverride\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    val data : RDD[Observation] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"digit\"),\n      eventNames = Some(List(\"digitData\"))\n    )(sc).map(e => Observation(\n      e.properties.get[Double](\"label\"),\n      e.properties.get[String](\"features\")\n    ))\n\n    new TrainingData(data)\n  }\n\n...\n```\n\nThis is essentially just making sure that the `entityType`, `eventName`, and `properties` fields match those specified in the script `export_events.py`. Also, a new class is introduced called `Observation` to serve as a wrapper for each data point's response and feature attributes, and the `TrainingData` is modified to hold an RDD of type `Observation` (instead of `LabeledPoints`):\n\n```scala\ncase class Observation (\n  label : Double,\n  features : String\n)\n\nclass TrainingData(\n  val observations: RDD[Observation]\n) extends Serializable\n```\n\n\nThis also means that the `readEval` method must be redefined in a similar fashion:\n\n```scala\n  override\n  def readEval(sc: SparkContext)\n  : Seq[(TrainingData, EmptyEvaluationInfo, RDD[(Query, ActualResult)])] = {\n    require(dsp.evalK.nonEmpty, \"DataSourceParams.evalK must not be None\")\n\n    // The following code reads the data from data store. It is equivalent to\n    // the readTraining method. We copy-and-paste the exact code here for\n    // illustration purpose, a recommended approach is to factor out this logic\n    // into a helper function and have both readTraining and readEval call the\n    // helper.\n    val data : RDD[Observation] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"digit\"),\n      eventNames = Some(List(\"digitData\"))\n    )(sc).map(e => Observation(\n      e.properties.get[Double](\"label\"),\n      e.properties.get[String](\"features\")\n    )).cache\n    // End of reading from data store\n\n    // K-fold splitting\n    val evalK = dsp.evalK.get\n    val indexedPoints: RDD[(Observation, Long)] = data.zipWithIndex()\n\n    (0 until evalK).map { idx =>\n      val trainingPoints = indexedPoints.filter(_._2 % evalK != idx).map(_._1)\n      val testingPoints = indexedPoints.filter(_._2 % evalK == idx).map(_._1)\n\n      (\n        new TrainingData(trainingPoints),\n        new EmptyEvaluationInfo(),\n        testingPoints.map {\n          p => (new Query(p.features), new ActualResult(p.label))\n        }\n      )\n    }\n  }\n```\n\nThe motivation for defining the `Observation` class is to make it easy to maintain the format of the data as it was imported, and to help you look at each RDD element as a data observation in its original format. All of the data processing will be taken care of via the `Preparator` class.\n\n### Preparator Modifications\n\nRemember that the Data Preparator is the engine component that takes care of the necessary data processing prior to the fitting of a predictive model in the Algorithm component. Hence this stage is where you will implement PCA.\n\nTo make sure there is no confusion, replace the import statements in the `Preparator.scala` script with the following:\n\n```scala\nimport org.apache.predictionio.controller.{Params, PPreparator}\nimport org.apache.spark.SparkContext\nimport org.apache.spark.mllib.feature.{StandardScaler, StandardScalerModel}\nimport org.apache.spark.mllib.linalg.distributed.RowMatrix\nimport org.apache.spark.mllib.linalg.{DenseVector, Vectors, Vector}\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.mllib.regression.LabeledPoint\n```\n\nAlso, note that the PCA algorithm requires you to specify the hyperparameter \\\\(k\\\\), or the desired number of features. Thus you will first define a parameter class `PreparatorParams`:\n\n```scala\ncase class PreparatorParams (\nnumFeatures : Int\n) extends Params\n```\n\nThe next step is to implement the algorithm discussed in the above digression. This will all be done in the `PreparedData` class.\n\nRemember that the classes `Observation` and `Query` store the pixel features as a string separated by `\", \"`. Hence, for data processing, you first need a function, `string2Vector`, that will transform the feature strings to vectors. Now, you will need a function, `scaler`, that centers your observations (step 1 in PCA algorithm). Luckily, the `StandardScaler` and `StandardScalerModel` classes implemented in Spark MLLib can easily take care of this for you. The last part will be to actually compute the SVD of the data matrix which can also be easily done in MLLib. All this will be implemented in the `PreparedData` class which you will redefine as follows:\n\n```scala\nclass PreparedData(\n  val data : RDD[Observation],\n  val pp : PreparatorParams\n) extends Serializable {\n\n\n  /// Data Transformation Tools\n\n  // Transform features string member to a MLLib Vector.\n  private val string2Vector : (String => Vector) = (e : String) => Vectors.dense(\n    e.split(\", \").map(_.toDouble)\n  )\n\n  // Create function for centering data.\n  private val scaler : StandardScalerModel = new StandardScaler(true, false).fit(\n    data.map(e => string2Vector(e.features))\n  )\n\n  // Compute PCA output matrix.\n  private val pcaMatrix = new RowMatrix(data.map(\n    e => string2Vector(e.features)\n  )).computePrincipalComponents(pp.numFeatures).transpose\n\n  /// Observation transformation.\n  def transform (features : String): Vector = {\n    pcaMatrix.multiply(\n      new DenseVector(scaler.transform(string2Vector(features)).toArray)\n    )\n  }\n\n  // Data for inputting into learning Algorithm.\n  val transformedData : RDD[LabeledPoint] = data.map(e => LabeledPoint(\n    e.label,\n    transform(e.features)\n  ))\n}\n```\n\nThe function `transform` takes the string features and outputs a post-PCA feature vector. This is not made a private class member since it must also be used in transforming future queries. The member `transformedData` is the data set represented as an object that can be simply thrown into a classification model!\n\nThe final step is to incorporate the `PreparatorParams` into the `Preparator` class. This requires very little editing:\n\n```scala\nclass Preparator (pp: PreparatorParams) extends PPreparator[TrainingData, PreparedData] {\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(trainingData.observations, pp)\n  }\n}\n```\n\nThe Data Preparator engine component is now complete, and we can move on to the Algorithm component.\n\n### Algorithm Modifications\n\nThe default algorithm used in the classification template is Naive Bayes. Now, this is a [probabilistic classifier](https://en.wikipedia.org/wiki/Probabilistic_classification) that makes certain assumptions about the data that do not really match the format of the PCA-transformed data. In particular, it assumes that the vectors consist of counts. In particular, this means it assumes non-negative feature values. However, upon applying PCA on the data, you have no guarantees that you will have purely non-negative features. Given this, you will delete the script `NaiveBayesAlgorithm.scala`, and create one called `LRAlgorithm.scala` (in the `src/main/scala/` directory) which implements [Multinomial Logistic Regression](https://en.wikipedia.org/wiki/Multinomial_logistic_regression).\n\nThe implementation details are not discussed in this guide, as the point of this guide is to show how to incorporate **dimensionality reduction** techniques by incorporating PCA. The latter paragraph is mentioned in order to emphasize the fact that applying the PCA transformation (or possibly other dimensionality reduction techniques) will largely remove the interpretability of features, so that model assumptions relying on such interpretations may no longer be satisfied. This is just something to keep in mind.\n\nThe following code is taken from the [text classification engine template](/gallery/template-gallery/#classification) and adapted to match the project definitions.  Copy and paste into the new scala script, `LRAlgorithm.scala`:\n\n```scala\npackage FeatureReduction\n\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.controller.P2LAlgorithm\nimport org.apache.spark.SparkContext\nimport org.apache.spark.ml.classification.LogisticRegression\nimport org.apache.spark.sql.DataFrame\nimport org.apache.spark.sql.functions\nimport org.apache.spark.sql.SQLContext\nimport org.apache.spark.sql.UserDefinedFunction\nimport scala.math._\n\n\ncase class LRAlgorithmParams (\nregParam  : Double\n) extends Params\n\n\nclass LRAlgorithm(\nval sap: LRAlgorithmParams\n) extends P2LAlgorithm[PreparedData, LRModel, Query, PredictedResult] {\n\n  // Train your model.\n  def train(sc: SparkContext, pd: PreparedData): LRModel = {\n    new LRModel(sc, pd, sap.regParam)\n  }\n\n  // Prediction method for trained model.\n  def predict(model: LRModel, query: Query): PredictedResult = {\n    model.predict(query.features)\n  }\n}\n\nclass LRModel (\nsc : SparkContext,\npd : PreparedData,\nregParam : Double\n) extends Serializable {\n\n  // 1. Import SQLContext for creating DataFrame.\n  private val sql : SQLContext = new SQLContext(sc)\n  import sql.implicits._\n\n  // 2. Initialize logistic regression model with regularization parameter.\n  private val lr = new LogisticRegression()\n  .setMaxIter(100)\n  .setThreshold(0.5)\n  .setRegParam(regParam)\n\n  private val labels : Seq[Double] = pd.transformedData.map(e => e.label).distinct.collect.toSeq\n\n  private case class LREstimate (\n  coefficients : Array[Double],\n  intercept : Double\n  ) extends Serializable\n\n  private val data = labels.foldLeft(pd.transformedData.toDF)( //transform to Spark DataFrame\n\n    // Add the different binary columns for each label.\n    (data : DataFrame, label : Double) => {\n      // function: multiclass labels --> binary labels\n      val f : UserDefinedFunction = functions.udf((e : Double) => if (e == label) 1.0 else 0.0)\n\n      data.withColumn(label.toInt.toString, f(data(\"label\")))\n    }\n  )\n\n  // 3. Create a logistic regression model for each class.\n  private val lrModels : Seq[(Double, LREstimate)] = labels.map(\n    label => {\n      val lab = label.toInt.toString\n\n      val fit = lr.setLabelCol(lab).fit(\n        data.select(lab, \"features\")\n      )\n\n      // Return (label, feature coefficients, and intercept term.\n      (label, LREstimate(fit.weights.toArray, fit.intercept))\n\n    }\n  )\n\n  // 4. Enable vector inner product for prediction.\n\n  private def innerProduct (x : Array[Double], y : Array[Double]) : Double = {\n    x.zip(y).map(e => e._1 * e._2).sum\n  }\n\n  // 5. Define prediction rule.\n  def predict(text : String): PredictedResult = {\n    val x: Array[Double] = pd.transform(text).toArray\n\n    // Logistic Regression binary formula for positive probability.\n    // According to MLLib documentation, class labeled 0 is used as pivot.\n    // Thus, we are using:\n    // log(p1/p0) = log(p1/(1 - p1)) = b0 + xTb =: z\n    // p1 = exp(z) * (1 - p1)\n    // p1 * (1 + exp(z)) = exp(z)\n    // p1 = exp(z)/(1 + exp(z))\n    val pred = lrModels.map(\n      e => {\n        val z = exp(innerProduct(e._2.coefficients, x) + e._2.intercept)\n        (e._1, z / (1 + z))\n      }\n    ).maxBy(_._2)\n\n    new PredictedResult(pred._1)\n  }\n\n}\n```\n\n### Serving Modifications\n\nSince you did not make any modifications in the definition of the class `PredictedResult`, the Serving engine component does not need to be modified.\n\n### Evaluation Modifications\n\nHere the only modifications you need to make are in the `EngineParamsList` object:\n\n```scala\nobject EngineParamsList extends EngineParamsGenerator {\n  // Define list of EngineParams used in Evaluation\n\n  // First, we define the base engine params. It specifies the appId from which\n  // the data is read, and a evalK parameter is used to define the\n  // cross-validation.\n  private[this] val baseEP = EngineParams(\n    dataSourceParams = DataSourceParams(appName = \"FeatureReduction\", evalK = Some(3)),\n    preparatorParams = PreparatorParams(numFeatures = 250))\n\n  // Second, we specify the engine params list by explicitly listing all\n  // algorithm parameters. In this case, we evaluate 3 engine params, each with\n  // a different algorithm params value.\n  engineParamsList = Seq(\n    baseEP.copy(algorithmParamsList = Seq((\"lr\", LRAlgorithmParams(0.5)))),\n    baseEP.copy(algorithmParamsList = Seq((\"lr\", LRAlgorithmParams(2.5)))),\n    baseEP.copy(algorithmParamsList = Seq((\"lr\", LRAlgorithmParams(7.5)))))\n}\n```\n\nThe main modifications reflect the change in algorithm, and the addition of the `PreparatorParams` class. This concludes the modifications to the DASE components. There are only a few modifications left:\n\n### Other Engine Modifications\n\nThere are two last modifications before we have a working template. First, since you deleted the `NaiveBayesAlgorithm.scala` script and replaced it with the `LRAlgorithm.scala` script, you must modify the `ClassificationEngine` object:\n\n```scala\nobject ClassificationEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\n        \"lr\" -> classOf[LRAlgorithm]\n      ), classOf[Serving]\n    )\n  }\n}\n```\n\nNext you will have to also modify the `engine.json` file, which is where you set the different component parameters:\n\n```json\n{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"FeatureReduction.ClassificationEngine\",\n  \"datasource\": {\n    \"params\": {\n      \"appName\": \"FeatureReduction\"\n    }\n  },\n  \"preparator\":{\n    \"params\": {\n      \"numFeatures\": 250\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"lr\",\n      \"params\": {\n        \"regParam\": 1.0\n      }\n    }\n  ]\n}\n```\n\n## Testing the Engine\n\nCongratulations, the engine is now ready to go. Firstly, go ahead and run the following command again:\n\n```\npio build\n```\n\nThe easiest way to begin testing it right away is to do an evaluation:\n\n```\npio eval FeatureReduction.AccuracyEvaluation FeatureReduction.EngineParamsList\n```\n\nGiven the current evaluation settings and logistic regression implementation (multinomial logistic regression from binary logistic regression): evalK = 3, 3 parameters being tested, and 10 different classes this will be creating a binary logistic regression model \\\\(3 \\times 3 \\times 10 = 90\\\\) times, so that it will take some time to run locally on your machine. You can decrease the latter number of models by: (a) decreasing evalK to 2, or (b) reduce the number of parameters being tested to one or two. You can also increase the driver and executor memory to increase performance:\n\n```\npio eval FeatureReduction.AccuracyEvaluation FeatureReduction.EngineParamsList -- --driver-memory xG --executor-memory yG\n```\n\nHere `x` and `y` should be replaced by whole numbers. Alternatively, you can train and deploy your engine as usual:\n\n```\npio train\npio deploy\n```\n\nTo query it, you will first need some test data. Go ahead and [download](https://www.kaggle.com/c/digit-recognizer/data) the `test.csv` file and place it in the `data` directory. This contains 28,000 unlabeled pixel images. Next create the Python script `query.py` in the same data directory, and copy and paste the following:\n\n```python\nfrom PIL import Image\nimport sys\nimport os\n\nobs_num = int(sys.argv[1])\n\nf = open('./data/test.csv', 'r').read().split('\\n')\nvar_names = f[0].split(',')\nf = f[1 : -1]\nf = [list(map(int, x)) for x in (y.split(\",\") for y in f)]\n\n\ndef create_image(pixel_array):\n    img = Image.new('RGB', (28, 28))\n    pixels = img.load()\n    count = 0\n    for i in range(img.size[0]):\n        for j in range(img.size[1]):\n            pixels[i, j] = (i, j, pixel_array[count])\n            count += 1\n    return img\n\ncreate_image(f[obs_num]).show()\n\nqry = \"curl -H 'Content-Type: applications/json' -d '{\\\"features\\\":\\\"...\\\"}' localhost:8000/queries.json; echo ' '\"\n\nos.system(qry.replace(\"...\", str(f[obs_num])[1 : -1]))\n```\n\nIn your engine directory file, you can now use the following line to query the engine with a test observation by using the command\n\n```\npython data/query.py k\n```\n\nwhere you replace `k` with a number between 0 and 27,999 (corresponds to test observations). This will generate the digit image first, and then immediately return the predicted digit for your reference.\n"
  },
  {
    "path": "docs/manual/source/machinelearning/modelingworkflow.html.md",
    "content": "---\ntitle: Modeling Workflow and DASE\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nIn addition to the DASE components, we also introduce the Data Model and Training Model abstractions. The Data Model abstraction refers to the set of Scala classes dealing with the implementation of modeling choices relating to feature **extraction**, **preparation**, and/or **selection**. For this illustration, this only includes the vectorization of text and t.f.-i.d.f. processing which is entirely implemented in the PreparedData class. The Training Model abstraction refers to any set of classes that individually take in a set of feature observations and output a predictive model. This predictive model is leveraged by the Algorithm component to produce prediction results to queries in real-time. In the engine template, this abstraction is implemented in the NBModel class. **Please note that these are conceptual abstractions that are designed to make engine development easier by decoupling class functionality.** Keeping these abstractions in mind will help you in the future with debugging your code, and also make it easier to incorporate different modeling ideas into your engine.\n\nThe figure below shows a graphical representation of the engine architecture just described, as well as its interactions with your web/app and a provided Event Server:\n\n\n![Engine Overview](/images/demo/text_classification_template/engine_overview.png)\n\n## Training The Model\n\nThis section will guide you through the two Training Model implementations that come with this engine template. Recall that the Training Model abstraction refers to an arbitrary set Scala Class that outputs a predictive model (i.e. implements some method that can be used for prediction). The general problem this engine template is tackling is text classification, so that our Training Model abstraction domain is restricted to implementations producing classifiers. In particular, the classification model that is implemented in this engine template is based on Multinomial Naive Bayes using t.f.-i.d.f. vectorized text.\n"
  },
  {
    "path": "docs/manual/source/partials/_action_call.html.slim",
    "content": ".action-call\n  .container\n    .row\n      .col-md-4.col-xs-12\n        .action-square\n          h1 Get Started\n          p\n            | Please take a look at the\n            = succeed \".\" do\n              a href=\"//predictionio.apache.org/\" target=\"blank\"  Docs\n            | It is helpful to read through a few examples.\n      .col-md-4.col-xs-12\n        .action-square\n          h1 Get Help\n          p\n            | Join our\n            a> href=\"//groups.google.com/forum/#!forum/predictionio-user\" target=\"blank\"  Forum\n            | to discuss, get help and help others in the PredictionIO community.\n      .col-md-4.col-xs-12\n        .action-square\n          h1 href=\"#\"  Get Involved\n          p\n            | Check out the source code on\n            a> href=\"//github.com/PredictionIO/PredictionIO\" target=\"blank\"  GitHub\n            | and report issues on the\n            = succeed \".\" do\n              a href=\"//github.com/PredictionIO/PredictionIO/issues\" target=\"blank\"  Bug Tracker\n"
  },
  {
    "path": "docs/manual/source/partials/_edit_page.html.slim",
    "content": "#edit-page\n  p\n    = link_to '<i class=\"fa fa-pencil\"></i> Edit Page', github_url\n"
  },
  {
    "path": "docs/manual/source/partials/_footer.html.slim",
    "content": "\n\nfooter\n  .container\n    .seperator\n    .row\n      .col-md-6.footer-link-column\n        .footer-link-column-row\n          h4 Community\n          ul\n            li: a href=\"//predictionio.apache.org/install/\" target=\"blank\" Download\n            li: a href=\"//predictionio.apache.org/\" target=\"blank\" Docs\n            li: a href=\"//github.com/apache/predictionio\" target=\"blank\" GitHub\n            li: a href=\"mailto:user-subscribe@predictionio.apache.org\" target=\"blank\" Subscribe to User Mailing List\n            li: a href=\"//stackoverflow.com/questions/tagged/predictionio\" target=\"blank\" Stackoverflow\n      .col-md-6.footer-link-column\n        .footer-link-column-row\n          h4 Contribute\n          ul\n            li: a href=\"//predictionio.apache.org/community/contribute-code/\" target=\"blank\" Contribute\n            li: a href=\"//github.com/apache/predictionio\" target=\"blank\" Source Code\n            li: a href=\"//issues.apache.org/jira/browse/PIO\" target=\"blank\" Bug Tracker\n            li: a href=\"mailto:dev-subscribe@predictionio.apache.org\" target=\"blank\" Subscribe to Development Mailing List\n    .row\n      .col-md-12.footer-link-column\n         p Apache PredictionIO, PredictionIO, Apache, the Apache feather logo, and the Apache PredictionIO project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.\n         p All other marks mentioned may be trademarks or registered trademarks of their respective owners.\n\n  #footer-bottom\n    .container\n      .row\n        .col-md-12\n          #footer-logo-wrapper\n            = image_tag 'logos/logo-white.png', alt: 'PredictionIO'\n            span ®\n          #social-icons-wrapper\n            a.github-button> href=\"https://github.com/apache/predictionio\" data-icon=\"octicon-star\" data-show-count=\"true\" aria-label=\"Star apache/predictionio on GitHub\" Star\n            a.github-button> href=\"https://github.com/apache/predictionio/fork\" data-icon=\"octicon-repo-forked\" data-show-count=\"true\" aria-label=\"Fork apache/predictionio on GitHub\" Fork\n            script#github-bjs(async defer src=\"https://buttons.github.io/buttons.js\")\n            a> href=\"https://twitter.com/predictionio\" target=\"blank\"\n              = image_tag 'icons/twitter.png', alt: 'PredictionIO on Twitter'\n            a> href=\"https://www.facebook.com/predictionio\" target=\"blank\"\n              = image_tag 'icons/facebook.png', alt: 'PredictionIO on Facebook'\n"
  },
  {
    "path": "docs/manual/source/partials/_header.html.slim",
    "content": "header\n  .container#header-wrapper\n    .row\n      .col-sm-12\n        #logo-wrapper\n          span#drawer-toggle\n          a href=\"#\"\n          = link_to 'http://predictionio.apache.org/' do\n            = image_tag 'logos/logo.png', alt: 'Apache PredictionIO', id: 'logo'\n          span ®\n        #menu-wrapper\n          #pill-wrapper\n            a.pill.left> href=\"/gallery/template-gallery\" TEMPLATES\n            a.pill.right href=\"//github.com/apache/predictionio/\" OPEN SOURCE\n        = image_tag 'icons/search-glass.png', class: 'mobile-search-bar-toggler hidden-md hidden-lg'\n"
  },
  {
    "path": "docs/manual/source/partials/_search_bar.html.slim",
    "content": "#search-bar-row-wrapper\n  .container-fluid#search-bar-row\n    .row\n      .col-md-9.col-sm-11.col-xs-11\n        .hidden-md.hidden-lg#mobile-page-heading-wrapper\n          p PredictionIO Docs\n          h4\n            = page_title_in_nav_menu data.nav.main.root\n        h4.hidden-sm.hidden-xs PredictionIO Docs\n      .col-md-3.col-sm-1.col-xs-1.hidden-md.hidden-lg\n        = image_tag 'icons/down-arrow.png', id: 'left-menu-indicator'\n      .col-md-3.col-sm-12.col-xs-12.swiftype-wrapper\n        = partial 'nav/swiftype'\n      .mobile-left-menu-toggler.hidden-md.hidden-lg"
  },
  {
    "path": "docs/manual/source/partials/_segment.html.slim",
    "content": "- if build?\n  javascript:\n    !function(){var analytics=window.analytics=window.analytics||[];if(!analytics.initialize)if(analytics.invoked)window.console&&console.error&&console.error(\"Segment snippet included twice.\");else{analytics.invoked=!0;analytics.methods=[\"trackSubmit\",\"trackClick\",\"trackLink\",\"trackForm\",\"pageview\",\"identify\",\"group\",\"track\",\"ready\",\"alias\",\"page\",\"once\",\"off\",\"on\"];analytics.factory=function(t){return function(){var e=Array.prototype.slice.call(arguments);e.unshift(t);analytics.push(e);return analytics}};for(var t=0;t<analytics.methods.length;t++){var e=analytics.methods[t];analytics[e]=analytics.factory(e)}analytics.load=function(t){var e=document.createElement(\"script\");e.type=\"text/javascript\";e.async=!0;e.src=(\"https:\"===document.location.protocol?\"https://\":\"http://\")+\"cdn.segment.com/analytics.js/v1/\"+t+\"/analytics.min.js\";var n=document.getElementsByTagName(\"script\")[0];n.parentNode.insertBefore(e,n)};analytics.SNIPPET_VERSION=\"3.0.1\";\n      analytics.load(\"YlF3updaI3DR96hnNgSGpR3PPBUGDzt8\");\n      analytics.page()\n      }}();\n"
  },
  {
    "path": "docs/manual/source/partials/_swiftype.html.slim",
    "content": "javascript:\n  (function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){\n  (w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);\n  e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);\n  })(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');\n\n  _st('install','HaUfpXXV87xoB_zzCQ45');\n"
  },
  {
    "path": "docs/manual/source/partials/_table_of_content.html.slim",
    "content": "- if table_of_contents(current_page)\n  #table-of-content-wrapper\n    h5 On this page\n    == table_of_contents(current_page)\n    hr\n    = link_to github_url, id: 'edit-page-link'\n      = image_tag 'icons/edit-pencil.png'\n      | Edit this page\n- else\n  #table-of-content-wrapper\n    = link_to github_url, id: 'edit-page-link'\n      = image_tag 'icons/edit-pencil.png'\n      | Edit this page"
  },
  {
    "path": "docs/manual/source/partials/head/_base.html.slim",
    "content": "title = rendered_title\n= partial 'head/meta'\n= partial 'head/favicon'\n= partial 'head/stylesheets'\n= partial 'head/javascripts'\n"
  },
  {
    "path": "docs/manual/source/partials/head/_favicon.html.slim",
    "content": "link href=\"/images/favicon/normal.png\" rel=\"shortcut icon\"\nlink href=\"/images/favicon/apple.png\" rel=\"apple-touch-icon\"\n"
  },
  {
    "path": "docs/manual/source/partials/head/_javascripts.html.slim",
    "content": "/[if lt IE 9]\n= javascript_include_tag '//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js'\n= javascript_include_tag '//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'\n\n/azo sans font\n= javascript_include_tag '//use.typekit.net/pqo0itb.js'\njavascript:\n  try{Typekit.load({ async: true });}catch(e){}\n"
  },
  {
    "path": "docs/manual/source/partials/head/_meta.html.slim",
    "content": "meta charset=\"utf-8\"\nmeta content=\"IE=edge,chrome=1\" http-equiv=\"X-UA-Compatible\"\nmeta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\"\nmeta class=\"swiftype\" name=\"title\" data-type=\"string\" content=rendered_title\n- if current_page.data.description\n  meta name=\"description\" content=current_page.data.description\n  meta class=\"swiftype\" name=\"body\" data-type=\"text\" content=current_page.data.description\nlink rel=\"canonical\" href=absolute_url(current_page.url)\n"
  },
  {
    "path": "docs/manual/source/partials/head/_stylesheets.html.slim",
    "content": "link href=\"//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800\" rel=\"stylesheet\"\nlink href=\"//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css\" rel=\"stylesheet\"\n\n= stylesheet_link_tag 'application'\n"
  },
  {
    "path": "docs/manual/source/partials/nav/_breadcrumbs.html.slim",
    "content": "- if crumbs\n  #breadcrumbs.hidden-sm.hidden.xs\n    ul\n      - crumbs.each do |node|\n        li\n          - if crumbs.last == node\n            span.last = node.body\n          - else\n            = link_to node.body, node.url, class: node.css\n            span.spacer &gt;"
  },
  {
    "path": "docs/manual/source/partials/nav/_header.html.slim",
    "content": "nav#nav-header\n  ul\n    li = link_to 'Docs', '/'\n    li = link_to 'Engine Templates', '/gallery/template-gallery'\n    li = link_to 'Community', '/community/'\n    li = link_to 'Blog', 'http://blog.prediction.io/'\n"
  },
  {
    "path": "docs/manual/source/partials/nav/_main.html.slim",
    "content": "nav#nav-main\n  ul\n    - data.nav.main.root.each do |node|\n      = partial 'nav/node', locals: { node: node, level: 1 }\n    - unless build?\n      - data.nav.build.root.each do |node|\n        = partial 'nav/node', locals: { node: node, level: 1 }\n"
  },
  {
    "path": "docs/manual/source/partials/nav/_node.html.slim",
    "content": "li class=\"level-#{level}\"\n  - icon_or_nil = node.children ? 'caret-right' : nil\n  - css = node.children ? 'expandible' : 'final'\n\n  = link_to_with_active \"<span>#{node.body}</span>\".html_safe, node.url, class: \"#{node.css} #{css}\".strip\n\n  - if node.children\n    ul\n      - node.children.each do |child|\n        = partial 'nav/node', locals: { node: child, level: level + 1 }\n"
  },
  {
    "path": "docs/manual/source/partials/nav/_page.html.slim",
    "content": "nav#nav-page\n  = link_to '<i class=\"fa fa-pencil\"></i>', github_url, class: 'github'\n"
  },
  {
    "path": "docs/manual/source/partials/nav/_swiftype.html.slim",
    "content": ".swiftype\n  form.search-form\n    = image_tag 'icons/search-glass.png', class: 'search-box-toggler hidden-xs hidden-sm'\n    .search-box\n      = image_tag 'icons/search-glass.png'\n      input type=\"text\" id=\"st-search-input\" class=\"st-search-input\" placeholder=\"Search Doc...\"\n    = image_tag 'icons/drawer-toggle-active.png', class: 'swiftype-row-hider hidden-md hidden-lg'"
  },
  {
    "path": "docs/manual/source/partials/shared/dase/_dase.html.md.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nPredictionIO's DASE architecture brings the separation-of-concerns design\nprinciple to predictive engine development. DASE stands for the following\ncomponents of an engine:\n\n* **D**ata - includes Data Source and Data Preparator\n* **A**lgorithm(s)\n* **S**erving\n* **E**valuator\n\nLet's look at the code and see how you can customize the engine you built from \nthe <%= template_name %>.\n\n<% if defined?(evaluation_link) %>\nNOTE: Evaluator will not be covered in this tutorial. \nPlease visit [evaluation explained](<%= evaluation_link%>) for using evaluation.\n<% else %>\nNOTE: Evaluator will not be covered in this tutorial.\n<% end %>\n"
  },
  {
    "path": "docs/manual/source/partials/shared/datacollection/_parquet.html.md.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Export Events to Apache Parquet\n\nPredictionIO supports exporting your events to [Apache\nParquet](http://parquet.apache.org/), a columnar storage format that\nallows you to query quickly.\n\nLet's export the data we imported in [Recommendation Engine Template Quick\nStart](/templates/recommendation/quickstart/#import-sample-data), and assume the\nApp ID is 1.\n\n```\n$ $PIO_HOME/bin/pio export --appid 1 --output /tmp/movies --format parquet\n```\n\nAfter the command has finished successfully, you should see something similar to\nthe following.\n\n```\nroot\n |-- creationTime: string (nullable = true)\n |-- entityId: string (nullable = true)\n |-- entityType: string (nullable = true)\n |-- event: string (nullable = true)\n |-- eventId: string (nullable = true)\n |-- eventTime: string (nullable = true)\n |-- properties: struct (nullable = true)\n |    |-- rating: double (nullable = true)\n |-- targetEntityId: string (nullable = true)\n |-- targetEntityType: string (nullable = true)\n```\n"
  },
  {
    "path": "docs/manual/source/partials/shared/install/_dependent_services.html.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n### Start PredictionIO and Dependent Services\n\nWARNING: If you are using PostgreSQL or MySQL, skip `pio-start-all` and\n`pio-stop-all`, and do `PredictionIO-<%= data.versions.pio %>/bin/pio eventserver &`\ninstead.\n\nSimply do `PredictionIO-<%= data.versions.pio %>/bin/pio-start-all` and you\nshould see something similar to the following:\n\n```\n$ PredictionIO-<%= data.versions.pio %>/bin/pio-start-all\nStarting Elasticsearch...\nStarting HBase...\nstarting master, logging to /home/abc/PredictionIO-<%= data.versions.pio %>/vendors/<%= data.versions.hbase_basename %>/bin/../logs/hbase-abc-master-yourhost.local.out\nWaiting 10 seconds for HBase to fully initialize...\nStarting PredictionIO Event Server...\n$\n```\n\nYou may use `jps` to verify that you have everything started:\n\n```\n$ jps -l\n15344 org.apache.hadoop.hbase.master.HMaster\n15409 org.apache.predictionio.tools.console.Console\n15256 org.elasticsearch.bootstrap.Elasticsearch\n15469 sun.tools.jps.Jps\n$\n```\n\nA running setup will have these up and running:\n\n- org.apache.predictionio.tools.console.Console\n- org.apache.hadoop.hbase.master.HMaster\n- org.elasticsearch.bootstrap.Elasticsearch\n\nAt any time, you can run `PredictionIO-<%= data.versions.pio %>/bin/pio status`\nto check the status of the dependencies.\n"
  },
  {
    "path": "docs/manual/source/partials/shared/install/_elasticsearch.html.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n[Elasticsearch](https://www.elastic.co/) can be used as a storage backend for\nthe meta data repository.\n\nINFO: Starting from 0.11.0, if you build PredictionIO against Elasticsearch 5+,\nyou may also use it as a backend for the event data repository.\n\n```\n$ wget https://artifacts.elastic.co/downloads/elasticsearch/<%= data.versions.elasticsearch_download_filename %>.tar.gz\n$ tar zxvfC <%= data.versions.elasticsearch_download_filename %>.tar.gz PredictionIO-<%= data.versions.pio %>/vendors\n```\n\nINFO: If you decide to install Elasticsearch to another location, you must edit\n`PredictionIO-<%= data.versions.pio %>/conf/pio-env.sh` and change the\n`PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME` variable to point to your own\nElasticsearch installation.\n\nINFO: If you are using a shared network, change the `network.host` line in\n`PredictionIO-<%= data.versions.pio %>/vendors/<%=\ndata.versions.elasticsearch_download_filename %>/config/elasticsearch.yml` to\n`network.host: 127.0.0.1` because by default, Elasticsearch looks for other\nmachines on the network upon setup and you may run into weird errors if there\nare other machines that is also running Elasticsearch.\n\nIf you are not using the default setting at `localhost`, you may change the\nfollowing in `PredictionIO-<%= data.versions.pio %>/conf/pio-env.sh` to fit your\nsetup.\n\n```\nPIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch\nPIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=localhost\nPIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200\n```\n"
  },
  {
    "path": "docs/manual/source/partials/shared/install/_hbase.html.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n[HBase](http://hbase.apache.org) can be used as the backend of the event data\nrepository.\n\nDownload HBase from a [mirror](http://www.apache.org/dyn/closer.cgi/hbase/<%=\ndata.versions.hbase_version %>/<%= data.versions.hbase_basename %>-<%=\ndata.versions.hbase_variant %>.tar.gz). Extract HBase by following the example\nbelow.\n\n```\n$ tar zxvfC <%= data.versions.hbase_basename %>-<%= data.versions.hbase_variant %>.tar.gz PredictionIO-<%= data.versions.pio %>/vendors\n```\n\nINFO: If you decide to install HBase to another location, you must edit\n`PredictionIO-<%= data.versions.pio %>/conf/pio-env.sh` and change the\n`PIO_STORAGE_SOURCES_HBASE_HOME` variable to point to your own HBase\ninstallation.\n\nYou will need to at least add a minimal configuration to HBase to start it in\nstandalone mode. Details can be found\n[here](http://hbase.apache.org/book/quickstart.html). Here, we are showing a\nsample minimal configuration.\n\nINFO: For production deployment, run a fully distributed HBase configuration.\n\nEdit `PredictionIO-<%= data.versions.pio %>/vendors/<%=\ndata.versions.hbase_basename %>/conf/hbase-site.xml`.\n\n```\n<configuration>\n  <property>\n    <name>hbase.rootdir</name>\n    <value>file:///home/abc/PredictionIO-<%= data.versions.pio %>/vendors/<%= data.versions.hbase_basename %>/data</value>\n  </property>\n  <property>\n    <name>hbase.zookeeper.property.dataDir</name>\n    <value>/home/abc/PredictionIO-<%= data.versions.pio %>/vendors/<%= data.versions.hbase_basename %>/zookeeper</value>\n  </property>\n</configuration>\n```\n\nINFO: HBase will create `hbase.rootdir` automatically to store its data.\n\nEdit `PredictionIO-<%= data.versions.pio %>/vendors/<%=\ndata.versions.hbase_basename %>/conf/hbase-env.sh` to set `JAVA_HOME` for the\ncluster. For example:\n\n```\nexport JAVA_HOME=/usr/lib/jvm/java-8-oracle/jre\n```\n\nFor Mac users, use this instead (change `1.8` to `1.7` if you have Java 7\ninstalled):\n\n```\nexport JAVA_HOME=`/usr/libexec/java_home -v 1.8`\n```\n"
  },
  {
    "path": "docs/manual/source/partials/shared/install/_postgres.html.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n[PostgreSQL](https://www.postgresql.org/) can be used by PredictionIO as a\nstorage backend for all 3 repositories (event data, meta data, and model data).\nThis is perhaps the easiest route if you are trying PredictionIO for the first\ntime.\n\nMake sure you have PostgreSQL installed. For Mac Users,\n[Homebrew](https://brew.sh/) is recommended and can be used as\n\n```\n$ brew install postgresql\n```\nor on Ubuntu:\n\n```\n$ apt-get install postgresql\n```\n\nNow that PostgreSQL is installed use the following comands\n\n```\n$ createdb pio\n```\n\nIf you get an error of the form `could not connect to server: No such file or\ndirectory`, then you must first start the server manually,:\n\n```\n$ pg_ctl -D /usr/local/var/postgres -l /usr/local/var/postgres/server.log start\n```\n\nFinally use the command:\n\n```\n$ psql -c \"create user pio with password 'pio'\"\n```\n\nStarting from 0.11.0, PredictionIO no longer bundles JDBC drivers. Download the\nPostgreSQL JDBC driver from the [official web\nsite](https://jdbc.postgresql.org/), and put the JAR file in the `lib`\nsubdirectory. Afterwords, you need to edit `conf/pio-env.sh` and change the\n`POSTGRES_JDBC_DRIVER` variable to point to the correct JAR.\n"
  },
  {
    "path": "docs/manual/source/partials/shared/install/_proceed_template.html.md.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nYou can proceed to [Choosing an Engine Template](/start/download), or continue the QuickStart guide of the Engine template if you have already chosen one.\n"
  },
  {
    "path": "docs/manual/source/partials/shared/install/_spark.html.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n[Apache Spark](http://spark.apache.org) is the default processing engine for\nPredictionIO. Download and extract it.\n\n```\n$ wget https://archive.apache.org/dist/spark/spark-<%= data.versions.spark %>/<%= data.versions.spark_download_filename %>.tgz\n$ tar zxvfC <%= data.versions.spark_download_filename %>.tgz PredictionIO-<%= data.versions.pio %>/vendors\n```\n\nINFO: If you decide to install Apache Spark to another location, you must edit\n`PredictionIO-<%= data.versions.pio %>/conf/pio-env.sh` and change the\n`SPARK_HOME` variable to point to your own Apache Spark installation.\n"
  },
  {
    "path": "docs/manual/source/partials/shared/quickstart/_collect_data.html.md.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nYou can send these events to PredictionIO Event Server in real-time easily by\nmaking a HTTP request or through the provided SDK. Please see [App Integration Overview](/appintegration/) for more details how to integrate your app with SDK.\n\nLet's try sending events to EventServer with the following `curl` commands (The corresponding SDK code is showed in other tabs).\n\nReplace `<ACCCESS_KEY>` by the Access Key generated in above steps. Note that `localhost:7070` is the default URL of the Event Server.\n\nFor convenience, set your access key to the shell variable, run:\n\n`$ ACCESS_KEY=<ACCESS_KEY>`\n\n<!-- dummy markdown link needed if code block is last -->\n[]()\n"
  },
  {
    "path": "docs/manual/source/partials/shared/quickstart/_create_app.html.md.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nYou will need to create a new App in PredictionIO to store all the data of your app. The data collected will be used for machine learning modeling.\n\nLet's assume you want to use this engine in an application named \"MyApp1\".  Run the following to create a new app \"MyApp1\":\n\n```\n$ pio app new MyApp1\n```\n\nYou should find the following in the console output:\n\n```\n...\n[INFO] [App$] Initialized Event Store for this app ID: 1.\n[INFO] [App$] Created new app:\n[INFO] [App$]       Name: MyApp1\n[INFO] [App$]         ID: 1\n[INFO] [App$] Access Key: 3mZWDzci2D5YsqAnqNnXH9SB6Rg3dsTBs8iHkK6X2i54IQsIZI1eEeQQyMfs7b3F\n```\n\nNote that *App ID**, **Access Key** are created for this App \"MyApp1\". You will need the **Access Key** when you collect data with EventServer for this App.\n\nYou can list all of the apps created its corresponding ID and Access Key by running the following command:\n\n```\n$ pio app list\n```\n\nYou should see a list of apps created. For example:\n\n```\n[INFO] [App$]                 Name |   ID |                                                       Access Key | Allowed Event(s)\n[INFO] [App$]               MyApp1 |    1 | 3mZWDzci2D5YsqAnqNnXH9SB6Rg3dsTBs8iHkK6X2i54IQsIZI1eEeQQyMfs7b3F | (all)\n[INFO] [App$]               MyApp2 |    2 | io5lz6Eg4m3Xe4JZTBFE13GMAf1dhFl6ZteuJfrO84XpdOz9wRCrDU44EUaYuXq5 | (all)\n[INFO] [App$] Finished listing 2 app(s).\n```\n<!-- dummy markdown link needed if code block is last -->\n[]()\n"
  },
  {
    "path": "docs/manual/source/partials/shared/quickstart/_create_engine.html.md.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nNow let's create a new engine called *<%= engine_name %>* by downloading the <%= template_name %>. Go to a directory where you want to put your engine and run the following:\n\n```\n$ git clone https://github.com/<%= template_repo %>.git <%= engine_name %>\n$ cd <%= engine_name %>\n```\n\nA new directory *<%= engine_name %>* is created, where you can find the downloaded engine template.\n\n<!-- dummy markdown link needed if code block is last -->\n[]()\n"
  },
  {
    "path": "docs/manual/source/partials/shared/quickstart/_deploy.html.md.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n### Building\n\nStart with building your *<%= engine_name %>* engine. Run the following command:\n\n```\n$ pio build --verbose\n```\n\nThis command should take few minutes for the first time; all subsequent builds\nshould be less than a minute. You can also run it without `--verbose` if you don't want to see all the log messages.\n\nUpon successful build, you should see a console message similar to the\nfollowing.\n\n```\n[INFO] [Console$] Your engine is ready for training.\n```\n\n### Training the Predictive Model\n\nTo train your engine, run the following command:\n\n```\n$ pio train\n```\n\nWhen your engine is trained successfully, you should see a console message\nsimilar to the following.\n\n```\n[INFO] [CoreWorkflow$] Training completed successfully.\n```\n\n### Deploying the Engine\n\nNow your engine is ready to deploy. Run:\n\n```\n$ pio deploy\n```\n\nWhen the engine is deployed successfully and running, you should see a console message similar to the following:\n\n```\n[INFO] [HttpListener] Bound to /0.0.0.0:8000\n[INFO] [MasterActor] Bind successful. Ready to serve.\n```\n\nDo not kill the deployed engine process.\n\nBy default, the deployed engine binds to http://localhost:8000. You can visit\nthat page in your web browser to check its status.\n\n![Engine Status](/images/engine-server.png)\n"
  },
  {
    "path": "docs/manual/source/partials/shared/quickstart/_deploy_enginejson.html.md.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nNow you can build, train, and deploy the engine. First, make sure you are under the `<%= engine_name %>` directory.\n\n```\n$ cd <%= engine_name %>\n```\n\n### Engine.json\n\nUnder the directory, you should find an `engine.json` file; this is where you specify parameters for the engine.\n\nWARNING: Modify this file to make sure the `appName` parameter match your **App Name** you created earlier (e.g. \"MyApp1\" if you follow the quickstart).\n\n```\n  ...\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  ...\n```\n\nNOTE: You may see `appId` in engine.json instead, which means you are using old template. In this case, make sure the `appId` defined in the file match your **App ID**. Alternatively, you can download the latest version of the template or follow our [upgrade instructions](/resources/upgrade/#upgrade-to-0.9.2) to modify the template to use `appName` as parameter.\n\n<!-- dummy markdown link needed if code block is last -->\n[]()\n"
  },
  {
    "path": "docs/manual/source/partials/shared/quickstart/_import_sample_data.html.md.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis engine requires more data in order to train a useful model. Instead of sending more events one by one in real time, for quickstart demonstration purpose, we are going to use a script to import more events in batch.\n"
  },
  {
    "path": "docs/manual/source/partials/shared/quickstart/_install.html.md.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nFirst you need to [install PredictionIO <%= data.versions.pio %>](/install) (if\nyou haven't done it).\n\nLet's say you have installed PredictionIO at `/home/yourname/PredictionIO/`.\nFor convenience, add PredictionIO's binary command path to your `PATH`, i.e.\n`/home/yourname/PredictionIO/bin`:\n\n```\n$ PATH=$PATH:/home/yourname/PredictionIO/bin; export PATH\n```\n\nNOTE: If you launched **PredictionIO AWS instance**, the path is located at\n`/opt/PredictionIO/bin`.\n\nOnce you have completed the installation process, please make sure all the\ncomponents (PredictionIO Event Server, Elasticsearch, and HBase) are up and\nrunning.\n\nNOTE: If you launched **PredictionIO AWS instance**, you can skip `pio-start-all`. All components should have been started automatically.\n\nIf you are using PostgreSQL or MySQL, run the following to start PredictionIO\nEvent Server:\n\n```\n$ pio eventserver &\n```\n\nIf instead you are running HBase and Elasticsearch, run the following to start\nall PredictionIO Event Server, HBase, and Elasticsearch:\n\n```\n$ pio-start-all\n```\n\nYou can check the status by running:\n\n```\n$ pio status\n```\n\nIf everything is OK, you should see the following outputs:\n\n```\n...\n\n(sleeping 5 seconds for all messages to show up...)\nYour system is all ready to go.\n```\n\nNOTE: To further troubleshoot, please see [FAQ - Using\nPredictionIO](/resources/faq/#using-predictionio).\n\n<!-- dummy markdown link needed if code block is last -->\n[]()\n"
  },
  {
    "path": "docs/manual/source/partials/shared/quickstart/_install_python_sdk.html.md.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nFirst, you will need to install Python SDK in order to run the sample data import script. To install Python SDK, run:\n\n```\n$ pip install predictionio\n```\n\nor\n\n```\n$ easy_install predictionio\n```\n\nNOTE: You may need `sudo` access if you have permission issue. (ie. `sudo pip install predictionio`)\n\n<!-- dummy markdown link needed if code block is last -->\n[]()\n"
  },
  {
    "path": "docs/manual/source/partials/shared/quickstart/_install_sdk.html.md.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nYou can send these events to PredictionIO Event Server in real-time easily by\nmaking a HTTP request or through the `EventClient` of an SDK. You can refer to [App Integration Oview](/appintegration/) for more details later.\n\nThe following is sample code of using different SDKs to import the events for this template, which you can refer to when you integrate with your app later.\n\nFor quickstart demonstration purpose, a script is provided to import sample data (Please see 4b. Import Sample Data.)\n"
  },
  {
    "path": "docs/manual/source/partials/shared/quickstart/_production.html.md.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nINFO: To update the model periodically with new data, simply set up a cron job to call `pio train` and `pio deploy`. The engine will continue to serve prediction results during the re-train process. After the training is completed, `pio deploy` will automatically shutdown the existing engine server and bring up a new process on the same port.\n\nINFO: **Note that if you import a *large* data set** and the training seems to be taking forever or getting stuck, it's likely that there is not enough executor memory. It's recommended to setup a Spark standalone cluster, you'll need to specify more driver and executor memory when training with a large data set. Please see [FAQ here](/resources/faq/#engine-training) for instructions.\n"
  },
  {
    "path": "docs/manual/source/partials/shared/quickstart/_query_eventserver.html.md.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n### Query Event Server\n\nNow let's query the EventServer and see if these events are imported successfully.\n\nGo to following URL with your browser:\n\n`http://localhost:7070/events.json?accessKey=<YOUR_ACCESS_KEY>`\n\nor run the following command in terminal:\n\n```\n$ curl -i -X GET \"http://localhost:7070/events.json?accessKey=$ACCESS_KEY\"\n```\n\nNOTE: Note that you should quote the entire URL by using single or double quotes when you run the `curl` command.\n\nIt should return the imported events in JSON format. You can refer to [Event Server Debugging Recipes](/datacollection/eventapi/#debugging-recipes) for more different ways to query Event Server.\n"
  },
  {
    "path": "docs/manual/source/partials/shared/quickstart/_query_eventserver_short.html.md.erb",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n<!-- dummy markdown link needed if code block is last -->\n[]()\n\nWARNING: If you see error **TypeError: __init__() got an unexpected keyword argument 'access_key'**,\nplease update the Python SDK to the latest version.\n\nYou can query the event server again as described previously to check the imported events.\n"
  },
  {
    "path": "docs/manual/source/production/deploy-cloudformation.html.md",
    "content": "---\ntitle: Deploying with AWS CloudFormation\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis document has been moved to [here](/system/deploy-cloudformation/).\n"
  },
  {
    "path": "docs/manual/source/resources/faq.html.md",
    "content": "---\ntitle: Frequently Asked Questions\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nIf you have questions that are not resolved below, you can subscribe and post to\nthe user mailing list. You can follow the instructions [here](/support).\n\n## Using PredictionIO\n\n### Q: How do I check to see if various dependencies, such as Elasticsearch and HBase, are running?\n\nYou can run `$ pio status` from the terminal and it will return the status of various components that PredictionIO depends on.\n\n-  You should see the following message if everything is OK:\n\n```\n$ pio status\nPredictionIO\n  Installed at: /home/vagrant/PredictionIO\n  Version: 0.8.6\n\nApache Spark\n  Installed at: /home/vagrant/PredictionIO/vendors/spark-1.2.0\n  Version: 1.2.0 (meets minimum requirement of 1.2.0)\n\nStorage Backend Connections\n  Verifying Meta Data Backend\n  Verifying Model Data Backend\n  Verifying Event Data Backend\n  Test write Event Store (App Id 0)\n2015-02-03 18:52:38,904 INFO  hbase.HBLEvents - The table predictionio_eventdata:events_0 doesn't exist yet. Creating now...\n2015-02-03 18:52:39,868 INFO  hbase.HBLEvents - Removing table predictionio_eventdata:events_0...\n\n(sleeping 5 seconds for all messages to show up...)\nYour system is all ready to go.\n```\n\n- If you see the following error message, it usually means Elasticsearch is not running properly:\n\n```\n  ...\nStorage Backend Connections\n  Verifying Meta Data Backend\n  ...\nCaused by: org.elasticsearch.client.transport.NoNodeAvailableException: None of the configured nodes are available: []\n\tat org.elasticsearch.client.transport.TransportClientNodesService.ensureNodesAreAvailable(TransportClientNodesService.java:298)\n  ...\n\nUnable to connect to all storage backend(s) successfully. Please refer to error message(s) above. Aborting.\n```\n\nYou can check if there is any Elasticsearch process by running 'jps'.\n\nPlease see **How to start Elasticsearch** below.\n\n- If you see the following error message, it usually means HBase is not running properly:\n\n```\nStorage Backend Connections\n  Verifying Meta Data Backend\n  Verifying Model Data Backend\n  Verifying Event Data Backend\n2015-02-03 18:40:04,810 ERROR zookeeper.RecoverableZooKeeper - ZooKeeper exists failed after 1 attempts\n2015-02-03 18:40:04,812 ERROR zookeeper.ZooKeeperWatcher - hconnection-0x1e4075ce, quorum=localhost:2181, baseZNode=/hbase Received unexpected KeeperException, re-throwing exception\norg.apache.zookeeper.KeeperException$ConnectionLossException: KeeperErrorCode = ConnectionLoss for /hbase/hbaseid\n...\n2015-02-03 18:40:07,021 ERROR hbase.StorageClient - Failed to connect to HBase. Please check if HBase is running properly.\n2015-02-03 18:40:07,026 ERROR storage.Storage$ - Error initializing storage client for source HBASE\n2015-02-03 18:40:07,027 ERROR storage.Storage$ - Can't connect to ZooKeeper\njava.util.NoSuchElementException: None.get\n...\n\nUnable to connect to all storage backend(s) successfully. Please refer to error message(s) above. Aborting.\n```\n\nYou can check if there is any HBase-related process by running 'jps'.\n\nPlease see **How to start HBase** below.\n\n### Q: How to start Elasticsearch?\n\nIf you followed the [instructions](/install/install-sourcecode/) to install\nPredictionIO, Elasticsearch would have been installed at\n`PredictionIO/vendors/elasticsearch-x.y.z/` where x.y.z is the version number.\nTo start it, run:\n\n```\n$ ~/PredictionIO/vendors/elasticsearch-x.y.z/bin/elasticsearch\n```\n\nIf you didn't use install script, please go to where Elasticsearch is installed to start it.\n\nINFO: It may take some time (15 seconds or so) for Elasticsearch to become ready after you start it (wait a bit before you run `pio status` again).\n\n### Q: How to start HBase ?\n\nIf you followed the [instructions](/install/install-sourcecode/) to install\nPredictionIO, the HBase is installed at `~/PredictionIO/vendors/hbase-x.y.z/`\nwhere x.y.z is the version number. To start it, run:\n\n```\n$ ~/PredictionIO/vendors/hbase-x.y.z/bin/start-hbase.sh\n```\n\nIf you didn't use install script, please go to where HBase is installed to start it.\n\nINFO: It may take some time (15 seconds or so) for HBase to become ready after\nyou start it (wait a bit before you run `pio status` again).\n\n\n## Problem with Event Server\n\n### Q: How do I increase the JVM heap size of the Event Server?\n\nAdd the `JAVA_OPTS` environmental variable to supply JVM options, e.g.\n\n```\n$ JAVA_OPTS=-Xmx16g bin/pio eventserver ...\n````\n\n## Engine Training\n\n### Q: How to increase Spark driver program and worker executor memory size?\nIn general, the PredictionIO `bin/pio` scripts wraps around Spark's `spark-submit`\nscript. You can specify a lot of Spark configurations (i.e. executor memory, cores, master\nurl, etc.) with it. You can supply these as pass-through arguments at the end of\n`bin/pio` command.\n\nIf the engine training seems stuck, it's possible that the the executor doesn't have enough memory.\n\nFirst, follow [instruction here]( http://spark.apache.org/docs/latest/spark-standalone.html) to start standalone Spark cluster and get the master URL. If you use the provided quick install script to install PredictionIO, the Spark is installed at `PredictionIO/vendors/spark-1.2.0/` where you could run the Spark commands in `sbin/` as described in the Spark documentation. Then use following train command to specify executor memory (default is only 512 MB) and driver memory.\n\nFor example, the follow command set the Spark master to `spark://localhost:7077`\n(the default url of standalone cluster), set the driver memory to 16G and set the executor memory to 24G for `pio train`.\n\n```\n$ pio train -- --master spark://localhost:7077 --driver-memory 16G --executor-memory 24G\n```\n\n### Q: How to resolve \"Exception in thread \"main\" org.apache.spark.SparkException: Job aborted due to stage failure: Serialized task 165:35 was 110539813 bytes, which exceeds max allowed: spark.akka.frameSize (10485760 bytes) - reserved (204800 bytes). Consider increasing spark.akka.frameSize or using broadcast variables for large values.\"?\n\nA likely reason is the local algorithm model is larger than the default frame size.\nYou can specify a larger value as a pass-thru argument to spark-submit when you `pio train`.\nThe following command increase the frameSize to 1024MB.\n\n```\n$ pio train -- --conf spark.akka.frameSize=1024\n```\n\n## Deploy Engine\n\n### Q: How to increase heap space memory for \"pio deploy\"?\n\nIf you see the following error during `pio deploy`, it means there is not enough\nheap space memory.\n\n```\n...\n[ERROR] [LocalFSModels] Java heap space\n[ERROR] [OneForOneStrategy] None.get\n...\n```\n\nTo increase the heap space, specify the \"-- --driver-memory \" parameter in the\ncommand. For example, set the driver memory to 8G when deploy the engine:\n\n```\n$ pio deploy -- --driver-memory 8G\n```\n\n\n## Building PredictionIO\n\n### Q: How to resolve \"Error: Could not find or load main class org.apache.predictionio.tools.Console\" after ./make_distribution.sh?\n\n```\n$ bin/pio app\nError: Could not find or load main class org.apache.predictionio.tools.Console\n```\n\nWhen PredictionIO bumps a version, it creates another JAR file with the new\nversion number.\n\nDelete everything but the latest `pio-assembly-<VERSION>.jar` in\n`$PIO_HOME/assembly` directory. For example:\n\n```\nPredictionIO$ cd assembly/\nPredictionIO/assembly$ ls -al\ntotal 197776\ndrwxr-xr-x  2 yipjustin yipjustin      4096 Nov 12 00:08 .\ndrwxr-xr-x 17 yipjustin yipjustin      4096 Nov 12 00:09 ..\n-rw-r--r--  1 yipjustin yipjustin 101184982 Nov  5 06:05 pio-assembly-0.8.1-SNAPSHOT.jar\n-rw-r--r--  1 yipjustin yipjustin 101324859 Nov 12 00:09 pio-assembly-0.8.2.jar\n\nPredictionIO/assembly$ rm pio-assembly-0.8.1-SNAPSHOT.jar\n```\n\n### Q: How to resolve \".......\\[error\\] (data/compile:compile) java.lang.AssertionError: assertion failed: java.lang.AutoCloseable\" when ./make_distribution.sh?\n\nPredictionIO only support Java 8 or later. Please make sure you have the\ncorrect Java version with the command:\n\n```\n$ javac -version\n```\n\n## Engine Development\n\n### Q: What's the difference between P- and L- prefixed classes and functions?\n\nPredictionIO v0.8 is built on the top of Spark, a massively scalable programming\nframework. A spark algorithm is different from conventional single machine\nalgorithm in a way that spark algorithms use the\n[RDD](http://spark.apache.org/docs/1.0.1/programming-guide.html#resilient-distributed-datasets-rdds)\nabstraction as its primary data type.\n\nPredictionIO framework natively support both RDD-based algorithms and\ntraditional single-machine algorithms. For controllers prefixed by \"P\" (i.e.\nPJavaDataSource, PJavaAlgorithm), their data include RDD abstraction; For \"L\"\ncontrollers, they are traditional single machine algorithms.\n\n## Running HBase\n\n### Q: How to resolve 'Exception in thread \"main\" java.lang.NullPointerException at org.apache.hadoop.net.DNS.reverseDns(DNS.java:92)'?\n\nHBase relies on reverse DNS be set up properly to function. If your network\nconfiguration changes (such as working on a laptop with public WiFi hotspots),\nthere could be a chance that reverse DNS does not function properly. You can\ninstall a DNS server on your own computer. Some users have reported that using\n[Google Public DNS](https://developers.google.com/speed/public-dns/) would also\nsolve the problem.\n\n### Q: How to fix HBase issues after cleaning up a disk that was full?\n\nYou may receive error messages like `write error: No space left on device`\nwhen disk is full, and also receive error from `pio status` even after\nrestarting PredictionIO services (due to\n[an issue](https://issues.apache.org/jira/browse/ZOOKEEPER-1621) in ZooKeeper).\n\nOne quick fix is to delete the newest `snapshot.xxxxx` and `log.xxxoo` in the\nZooKeeper data directory (e.g. `$HBASE_HOME/zookeeper/zookeeper_0/version-2`).\nRestart all services with `pio-start-all`, and use `pio status` to check whether\nyour setup is good to go again.\n\nIf you still have problems connecting to the event server, take a look at the\nHBase dashboard to see if there are `regions under transition`. If so, try the\nfollowing:\n\n1.  Try `hbase hbck -repair` and `hbase hbck -repairHoles`. If it solves the\n    problem, you are all set. Otherwise, continue on.\n\n2.  Find out failing regions by `hbase hbck`.\n\n    ```\n    ...\n    Summary:\n    Table pio_event:events_1 is inconsistent.\n      Number of regions: 2\n      Deployed on:  prediction.io,54829,1489213832255\n    ...\n    2 inconsistencies detected.\n    ```\n\n3.  Shutdown HBase process and delete `recovered.edits` folders in the HBase\n    data directory (e.g. `$HBASE_HOME/hbase/data/pio_event/events_1` in this\n    example) for failing regions.\n\n4.  Run `hbase hbck -repairHoles` and restart all PredictionIO services.\n"
  },
  {
    "path": "docs/manual/source/resources/glossary.html.md",
    "content": "---\ntitle: Glossary\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n**Data Preparator**\n- Part of Engine. It reads data from source and transforms it to the desired\nformat.\n\n**Data Source**\n- Part of Engine. It preprocesses the data and forward it to the algorithm for\nmodel training.\n\n**Engine**\n- An Engine represents a type of prediction, e.g. product recommendation. It is\ncomprised of four components: [D] Data Source and Data Preparator, [A]\nAlgorithm, [S] Serving, [E] Evaluation Metrics.\n\n**EngineClient**\n- Part of PredictionSDK. It sends queries to a deployed engine instance through\nthe Engine API and retrieves prediction results.\n\n**Event API**\n- Please see Event Server.\n\n**Event Server**\n- Event Server is designed to collect data into PredictionIO in an event-based\nstyle. Once the Event Server is launched, your application can send data to it\nthrough its Event API with HTTP requests or with the EventClient of\nPredictionIO's SDKs.\n\n**EventClient**\n- Please see Event Server.\n\n**Live Evaluation**\n- Evaluation of prediction results in a production environment. Prediction\nresults are shown to real users. Users do not rate the results explicitly but\nthe system observes user behaviors such as click through rate.\n\n**Offline Evaluation**\n- The prediction results are compared with pre-compiled offline datasets.\nTypically, offline evaluations are meant to identify the most promising\napproaches.\n\n**Test Data**\n- Also commonly referred as Test Set. A set of data used to assess the strength\nand utility of a predictive relationship.\n\n**Training Data**\n- Also commonly referred as Training Set. A set of data used to discover\npotentially predictive relationships. In PredictionIO Engine, training data is\nprocessed through the Data layer and passed onto algorithm.\n"
  },
  {
    "path": "docs/manual/source/resources/intellij.html.md.erb",
    "content": "---\ntitle: Developing Engines with IntelliJ IDEA\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Prerequisites\n\nThis documentation assumes that you have a fully functional PredictionIO setup.\nIf you have not installed PredictionIO yet, please follow [these\ninstructions](/install/).\n\nThe following instructions have been tested with IntelliJ IDEA 2018.2.2\nCommunity Edition.\n\n\n## Preparing IntelliJ for Engine Development\n\n\n### Installing IntelliJ Scala Plugin\n\nFirst of all, you will need to install the [Scala\nplugin](https://plugins.jetbrains.com/plugin/?id=1347) if you have not already\ndone so.\n\nGo to the *Preferences* menu item, and look for *Plugins*. You should see\nthe following screen.\n\n![IntelliJ Plugins](/images/intellij/intelliJ-scala-plugin.png)\n\nClick *Install JetBrains plugin...*, the search for *Scala*. You should arrive\nat something similar to the following.\n\n![Scala Plugin](/images/intellij/intellij-scala-plugin-2.png)\n\nClick the green *Install plugin* button to install the plugin. Restart IntelliJ\nIDEA if asked to do so.\n\n\n### Setting Up the Engine Directory\n\nCreate an engine directory from a template. This requires that you download a\ntemplate that you wish to start from or modify.\n\nFollow template [install](/start/download) and [deploy](/start/deploy)\ninstructions or go through the [Quick\nStart](/templates/recommendation/quickstart/) if you are planning to modify a\nrecommender. Make sure to build, train, and deploy the engine to make sure all\nis configured properly.\n\nFrom IntelliJ IDEA, choose *File* > *New* > *Project from Existing Sources...*.\nWhen asked to select a directory to import, browse to the engine directory that\nyou downloaded too and proceed. Make sure you pick *Import project from external\nmodel* > *SBT*, then proceed to finish.\n\nYou should be able to build the project at this point. To run and debug your\ntemplate, continue on to the rest of the steps.\n\n\n### Optional: Issues with Snappy on macOS\n\nIf you are running on macOS and run into the following [known\nissue](http://bit.ly/12Abtvn), follow steps in this section.\n\nEdit `build.sbt` and add the following under `libraryDependencies`\n\n```\n\"org.xerial.snappy\" % \"snappy-java\" % \"1.1.1.7\"\n```\n\n![Updating build.sbt](/images/intellij/intellij-buildsbt.png)\n\nWhen you are done editing, IntelliJ should prompt you to import new changes,\nunless you have already enabled auto import. Import this change to make it\neffective.\n\n\n### Module Settings\n\nINFO: IntelliJ will recreate module settings whenever it imports changes of your\nproject. You will need to repeat this section whenever that happens.\n\nDue to the way how `pio` command sources required classes during runtime, it is\nnecessary to add them manually in module settings for *Run/Debug Configurations*\nto work properly.\n\nRight click on the project and click *Open Module Settings*. Hit the **+**\nbutton right below the list of dependencies, and select *JARs or\ndirectories...*.\n\nThe first JAR that you need to add is the `pio-assembly-<%= data.versions.pio\n%>.jar` that contains all necessary classes. It can be found inside the\n`dist/lib` directory of your PredictionIO source installation directory (if you\nhave built from sources) or the `lib` directory of your PredictionIO binary\ninstallation directory.\n\nNext, you will need to make sure some configuration files from your PredictionIO\ninstallation can be found during runtime. Add the `conf` directory of your\nPredictionIO installation directory. When asked about categories of the\ndirectory, pick *Classes*.\n\nFinally, you will need to add storage classes. The exact list of JARs that you\nwill need to add depends on your storage configuration. These JARs can be found\ninside the `dist/lib/spark` directory of your PredictionIO source installation\ndirectory (if you have built from sources) or the `lib/spark` directory of your\nPredictionIO binary installation directory.\n\n*   `pio-data-elasticsearch-assembly-<%= data.versions.pio %>.jar`\n\n    Add this JAR if your configuration uses Elasticsearch.\n\n*   `pio-data-hbase-assembly-<%= data.versions.pio %>.jar`\n\n    Add this JAR if your configuration uses Apache HBase.\n\n*   `pio-data-hdfs-assembly-<%= data.versions.pio %>.jar`\n\n    Add this JAR if your configuration uses HDFS.\n\n*   `pio-data-jdbc-assembly-<%= data.versions.pio %>.jar`\n\n    Add this JAR if your configuration uses JDBC. Notice that you must also add any\n    additional JDBC driver JARs.\n\n*   `pio-data-localfs-assembly-<%= data.versions.pio %>.jar`\n\n    Add this JAR if your configuration uses local filesystem.\n\n*   `pio-data-s3-assembly-<%= data.versions.pio %>.jar`\n\n    Add this JAR if your configuration uses Amazon Web Services S3.\n\nMake sure to change all these additions to *Runtime* scope. The following shows\nan example that uses the JDBC storage backend with PostgreSQL driver.\n\n![Example module settings for a JDBC and PostgreSQL\nconfiguration](/images/intellij/intellij-module-settings.png)\n\n\n## Running and Debugging in IntelliJ IDEA\n\n\n### Simulating `pio train`\n\nCreate a new *Run/Debug Configuration* by going to *Run* > *Edit\nConfigurations...*. Click on the **+** button and select *Application*. Name it\n`pio train` and put in the following:\n\n*   Main class:\n\n    ```\n    org.apache.predictionio.workflow.CreateWorkflow\n    ```\n\n*   VM options:\n\n    ```\n    -Dspark.master=local -Dlog4j.configuration=file:/<your_pio_path>/conf/log4j.properties -Dpio.log.dir=<path_of_log_file>\n    ```\n\n*   Program arguments:\n\n    ```\n    --engine-id dummy --engine-version dummy --engine-variant engine.json --env dummy=dummy\n    ```\n\nMake sure *Working directory* is set to the base directory of the template that\nyou are working on.\n\nClick the folder button to the right of *Environment variables*, and paste the\nrelevant values from `conf/pio-env.sh` in your PredictionIO installation\ndirectory. The following shows an example using JDBC and PostgreSQL.\n\n![Example environment variables\nsettings](/images/intellij/pio-train-env-vars.png)\n\nMake sure *Include dependencies with \"Provided\" scope* is checked.\n\nThe end result should look something similar to this.\n\n![Run Configuration](/images/intellij/pio-train.png)\n\nSave and you can run or debug `pio train` with the new configuration!\n\n\n### Simulating `pio deploy`\n\nFor `pio deploy`, simply duplicate the previous configuration and replace with\nthe following.\n\n*   Main class:\n\n    ```\n    org.apache.predictionio.workflow.CreateServer\n    ```\n\n*   Program Arguments:\n\n    ```\n    --engineInstanceId <id_from_pio_train> --engine-variant engine.json\n    ```\n\n\n### Executing a Query\n\nYou can execute a query with the correct SDK. For a recommender that has been\ntrained with the sample MovieLens dataset perhaps the easiest query is a `curl`\none. Start by running or debugging your `pio deploy` config so the service is\nwaiting for the query. Then go to the \"Terminal\" tab at the very bottom of the\nIntelliJ IDEA window and enter the `curl` request:\n\n```\ncurl -H \"Content-Type: application/json\" -d '{ \"user\": \"1\", \"num\": 4 }' http://localhost:8000/queries.json\n```\n\nThis should return something like:\n\n```\n{\"itemScores\":[\n  {\"item\":\"52\",\"score\":9.582509402541834},\n  {\"item\":\"95\",\"score\":8.017236650368387},\n  {\"item\":\"89\",\"score\":6.975951244053634},\n  {\"item\":\"34\",\"score\":6.857457277981334}\n]}\n```\n\nINFO: If you hit a breakpoint you are likely to get a connection timeout. To see\nthe data that would have been returned, just place a breakpoint where the\nresponse is created or run the query with no breakpoints.\n"
  },
  {
    "path": "docs/manual/source/resources/release.html.md",
    "content": "---\ntitle: Release Cadence\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Versions\n\nEach PIO release will be versioned: `<MAJOR>`.`<FEATURE>`.`<MAINTENANCE>`\n\n- **MAJOR**: Major releases are not on any particular cadence. Major releases are likely to be\n  stable over a long period (one year or more)\n- **FEATURE**: We are targeting new feature releases every two months and will include new\n  features, improvements and bug fixes\n- **MAINTENANCE**: This is reserved for urgent bug fixes on the current release\n\n## Release Cadence\n\nFeature releases will occur every two months. Maintenance releases will be ad-hoc, as needed.\n\n- At the beginning of each release, committers nominate features for the upcoming release\n- At end of week, share JIRA link with dev user group to invite comments on the list\n- Committers modify the target list as necessary after comments are integrated and assign them to devs\n"
  },
  {
    "path": "docs/manual/source/resources/upgrade.html.md",
    "content": "---\ntitle: Upgrade Instructions\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis page highlights major changes in each version and upgrade tools.\n\n# How to Upgrade\n\n## Upgrade to 0.14.0\n\nThis release adds Elasticsearch 6 support. See [pull request](https://github.com/apache/predictionio/pull/466) for details.\nConsequently, you must reindex your data.\n\n1. Access your old cluster to check existing indices\n\n```\n$ curl -XGET 'http://localhost:9200/_cat/indices?v'\nhealth status index     uuid                   pri rep docs.count docs.deleted store.size pri.store.size\nyellow open   pio_event 6BAPz-DfQ2e9bICdVRr03g   5   1       1501            0    321.3kb        321.3kb\nyellow open   pio_meta  oxDMU1mGRn-vnXtAjmifSw   5   1          4            0     32.4kb         32.4kb\n\n$ curl -XGET \"http://localhost:9200/pio_meta/_search\" -d'\n{\n  \"aggs\": {\n    \"typesAgg\": {\n      \"terms\": {\n        \"field\": \"_type\",\n        \"size\": 200\n      }\n    }\n  },\n  \"size\": 0\n}'\n{\"took\":3,\"timed_out\":false,\"_shards\":{\"total\":5,\"successful\":5,\"skipped\":0,\"failed\":0},\"hits\":{\"total\":4,\"max_score\":0.0,\"hits\":[]},\"aggregations\":{\"typesAgg\":{\"doc_count_error_upper_bound\":0,\"sum_other_doc_count\":0,\"buckets\":[{\"key\":\"accesskeys\",\"doc_count\":1},{\"key\":\"apps\",\"doc_count\":1},{\"key\":\"engine_instances\",\"doc_count\":1},{\"key\":\"sequences\",\"doc_count\":1}]}}}\n\n$ curl -XGET \"http://localhost:9200/pio_event/_search\" -d'\n{\n  \"aggs\": {\n    \"typesAgg\": {\n      \"terms\": {\n        \"field\": \"_type\",\n        \"size\": 200\n      }\n    }\n  },\n  \"size\": 0\n}'\n{\"took\":2,\"timed_out\":false,\"_shards\":{\"total\":5,\"successful\":5,\"skipped\":0,\"failed\":0},\"hits\":{\"total\":1501,\"max_score\":0.0,\"hits\":[]},\"aggregations\":{\"typesAgg\":{\"doc_count_error_upper_bound\":0,\"sum_other_doc_count\":0,\"buckets\":[{\"key\":\"1\",\"doc_count\":1501}]}}}\n```\n\n2. (Optional) Settings for new indices\n\nIf you want to add specific settings associated with each index, we would recommend defining [Index Templates](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates.html).\n\nFor example,\n\n```\n$ curl -H \"Content-Type: application/json\" -XPUT \"http://localhost:9600/_template/pio_meta\" -d'\n{\n  \"index_patterns\": [\"pio_meta_*\"],\n  \"settings\": {\n    \"number_of_shards\": 1,\n    \"number_of_replicas\": 1\n  }\n}'\n$ curl -H \"Content-Type: application/json\" -XPUT \"http://localhost:9600/_template/pio_event\" -d'\n{\n  \"index_patterns\": [\"pio_event_*\"],\n  \"settings\": {\n    \"number_of_shards\": 1,\n    \"number_of_replicas\": 1\n  }\n}'\n```\n\n3. [Reindex](https://www.elastic.co/guide/en/elasticsearch/reference/6.0/reindex-upgrade-remote.html)\n\nAccording to the following conversion table, you run the reindex every index that you need to migrate to your new cluster.\n\n| Old Cluster | New Cluster |\n| --------------- | ---------------- |\n| index: `pio_meta` type: `accesskeys` | index: `pio_meta_accesskeys` |\n| index: `pio_meta` type: `apps` | index: `pio_meta_apps` |\n| index: `pio_meta` type: `channels` | index: `pio_meta_channels` |\n| index: `pio_meta` type: `engine_instances` | index: `pio_meta_engine_instances` |\n| index: `pio_meta` type: `evaluation_instances` | index: `pio_meta_evaluation_instances` |\n| index: `pio_meta` type: `sequences` | index: `pio_meta_sequences` |\n| index: `pio_event` type: It depends on your use case. (e.g. `1`) | index: pio_event_<old_type> (e.g. `pio_event_1`) |\n\nFor example,\n\n```\n$ curl -H \"Content-Type: application/json\" -XPOST \"http://localhost:9600/_reindex\" -d'\n{\n  \"source\": {\n    \"remote\": {\n      \"host\": \"http://localhost:9200\"\n    },\n    \"index\": \"pio_meta\",\n    \"type\": \"accesskeys\"\n  },\n  \"dest\": {\n    \"index\": \"pio_meta_accesskeys\"\n  }\n}'\n```\n\n## Upgrade to 0.12.0\n\nIn 0.12.0, Elasticsearch 5.x client has been reimplemented as a singleton.\nEngine templates directly using Elasticsearch 5.x StorageClient require\nupdate for compatibility. See [pull request]\n(https://github.com/apache/predictionio/pull/421) for details.\n\n## Upgrade to 0.11.0\n\nStarting from 0.11.0, PredictionIO no longer bundles any JDBC drivers in the\nbinary assembly. If your setup is using a JDBC backend and you run into storage\nconnection errors after an upgrade, please manually install the JDBC driver. If\nyou use PostgreSQL, you can find instructions\n[here](/install/install-sourcecode#pgsql).\n\n## Upgrade to 0.9.2\n\nThe Spark dependency has been upgraded to version 1.3.0. All engines must be\nrebuilt against it in order to work.\n\nOpen and edit `build.sbt` of your engine, and look for these two lines:\n\n```scala\n\"org.apache.spark\" %% \"spark-core\"    % \"1.2.0\" % \"provided\"\n\n\"org.apache.spark\" %% \"spark-mllib\"   % \"1.2.0\" % \"provided\"\n```\n\nChange `1.2.0` to `1.3.0`, and do a clean rebuild by `pio build --clean`. Your\nengine should now work with the latest Apache Spark.\n\n\n### New PEventStore and LEventStore API\n\nIn addition, new PEventStore and LEventStore API are introduced so that appName can be used as parameters in engine.json to access Event Store.\n\nNOTE: The following changes are not required for using 0.9.2 but it's recommended to upgrade your engine code as described below because the old API will be deprecated.\n\n#### 1. In **DataSource.scala**:\n\n- remove this line of code:\n\n    ```scala\n    import org.apache.predictionio.data.storage.Storage\n    ```\n\n    and replace it by\n\n    ```scala\n    import org.apache.predictionio.data.store.PEventStore\n    ```\n\n- Change `appId: Int` to `appName: String` in DataSourceParams\n\n    For example,\n\n    ```scala\n    case class DataSourceParams(appName: String) extends Params\n    ```\n\n- remove this line of code: `val eventsDb = Storage.getPEvents()`\n\n- locate where `eventsDb.aggregateProperties()` is used, change it to `PEventStore.aggregateProperties()`:\n\n    For example,\n\n    ```scala\n\n      val usersRDD: RDD[(String, User)] = PEventStore.aggregateProperties( // CHANGED\n        appName = dsp.appName, // CHANGED: use appName\n        entityType = \"user\"\n      )(sc).map { ... }\n\n    ```\n\n- locate where `eventsDb.find() `is used, change it to `PEventStore.find()`\n\n    For example,\n\n    ```scala\n\n      val viewEventsRDD: RDD[ViewEvent] = PEventStore.find( // CHANGED\n        appName = dsp.appName, // CHANGED: use appName\n        entityType = Some(\"user\"),\n        ...\n\n    ```\n\n#### 2. In **XXXAlgorithm.scala**:\n\nIf Storage.getLEvents() is also used in Algorithm (such as ALSAlgorithm of E-Commerce Recommendation template), you also need to do following:\n\nNOTE: If `org.apache.predictionio.data.storage.Storage` is not used at all (such as Recommendation, Similar Product, Classification, Lead Scoring, Product Ranking template), there is no need to change Algorithm and can go to the later **engine.json** section.\n\n- remove `import org.apache.predictionio.data.storage.Storage` and replace it by `import org.apache.predictionio.data.store.LEventStore`\n- change `appId` to `appName` in the XXXAlgorithmParams class.\n- remove this line of code: `@transient lazy val lEventsDb = Storage.getLEvents()`\n- locate where `lEventsDb.findSingleEntity()` is used, change it to `LEventStore.findByEntity()`:\n\n    For example, change following code\n\n    ```scala\n      ...\n      val seenEvents: Iterator[Event] = lEventsDb.findSingleEntity(\n        appId = ap.appId,\n        entityType = \"user\",\n        entityId = query.user,\n        eventNames = Some(ap.seenEvents),\n        targetEntityType = Some(Some(\"item\")),\n        // set time limit to avoid super long DB access\n        timeout = Duration(200, \"millis\")\n      ) match {\n        case Right(x) => x\n        case Left(e) => {\n          logger.error(s\"Error when read seen events: ${e}\")\n          Iterator[Event]()\n        }\n      }\n    ```\n\n    to\n\n    ```scala\n      val seenEvents: Iterator[Event] = try { // CHANGED: try catch block is used\n        LEventStore.findByEntity( // CHANGED: new API\n          appName = ap.appName, // CHANGED: use appName\n          entityType = \"user\",\n          entityId = query.user,\n          eventNames = Some(ap.seenEvents),\n          targetEntityType = Some(Some(\"item\")),\n          // set time limit to avoid super long DB access\n          timeout = Duration(200, \"millis\")\n        )\n      } catch { // CHANGED: try catch block is used\n        case e: scala.concurrent.TimeoutException =>\n          logger.error(s\"Timeout when read seen events.\" +\n            s\" Empty list is used. ${e}\")\n          Iterator[Event]()\n        case e: Exception =>\n          logger.error(s\"Error when read seen events: ${e}\")\n          throw e\n      }\n    ```\n\n    If you are using E-Commerce Recommendation template, please refer to the latest version for other updates related to `LEventStore.findByEntity()`\n\n#### 3. In **engine.json**:\n\nlocate where `appId` is used, change it to `appName` and specify the name of the app instead.\n\nFor example:\n\n```json\n  ...\n\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyAppName\"\n    }\n  },\n\n```\n\nNote that other components such as `algorithms` may also have `appId` param (e.g. E-Commerce Recommendation template). Remember to change it to `appName` as well.\n\nThat's it! You can re-biuld your engine to try it out!\n\n## Upgrade to 0.9.0\n\n0.9.0 has the following new changes:\n\n- The signature of `P2LAlgorithm` and `PAlgorithm`'s `train()` method is changed from\n\n    ```scala\n      def train(pd: PD): M\n    ```\n\n    to\n\n    ```scala\n      def train(sc: SparkContext, pd: PD): M\n    ```\n\n    which allows you to access SparkContext inside `train()` with this new parameter `sc`.\n\n- A new SBT build plugin (`pio-build`) is added for engine template\n\n\nWARNING: If you have existing engine templates running with previous version of PredictionIO, you need to either download the latest templates which are compatible with 0.9.0, or follow the instructions below to modify them.\n\nFollow instructions below to modify existing engine templates to be compatible with PredictionIO 0.9.0:\n\n1. Add a new parameter `sc: SparkContext` in the signature of `train()` method of algorithm in the templates.\n\n    For example, in Recommendation engine template, you will find the following `train()` function in `ALSAlgorithm.scala`\n\n    ```scala\n    class ALSAlgorithm(val ap: ALSAlgorithmParams)\n      extends P2LAlgorithm[PreparedData, ALSModel, Query, PredictedResult] {\n\n      ...\n\n      def train(data: PreparedData): ALSModel = ...\n\n      ...\n    }\n    ```\n\n    Simply add the new parameter `sc: SparkContext,` to `train()` function signature:\n\n    ```scala\n    class ALSAlgorithm(val ap: ALSAlgorithmParams)\n      extends P2LAlgorithm[PreparedData, ALSModel, Query, PredictedResult] {\n\n      ...\n\n      def train(sc: SparkContext, data: PreparedData): ALSModel = ...\n\n      ...\n    }\n    ```\n\n    You need to add the following import for your algorithm as well if it is not there:\n\n    ```scala\n    import org.apache.spark.SparkContext\n    ```\n\n2. Modify the file `build.sbt` in your template directory to use `pioVersion.value` as the version of org.apache.predictionio.core dependency:\n\n    Under your template's root directory, you should see a file `build.sbt` which has the following content:\n\n    ```\n    libraryDependencies ++= Seq(\n      \"org.apache.predictionio\"    %% \"core\"          % \"0.8.6\" % \"provided\",\n      \"org.apache.spark\" %% \"spark-core\"    % \"1.2.0\" % \"provided\",\n      \"org.apache.spark\" %% \"spark-mllib\"   % \"1.2.0\" % \"provided\")\n    ```\n\n    Change the version of `\"org.apache.predictionio\" && \"core\"` to `pioVersion.value`:\n\n    ```\n    libraryDependencies ++= Seq(\n      \"org.apache.predictionio\"    %% \"core\"          % pioVersion.value % \"provided\",\n      \"org.apache.spark\" %% \"spark-core\"    % \"1.2.0\" % \"provided\",\n      \"org.apache.spark\" %% \"spark-mllib\"   % \"1.2.0\" % \"provided\")\n    ```\n\n3. Create a new file `pio-build.sbt` in template's **project/** directory with the following content:\n\n    ```\n    addSbtPlugin(\"org.apache.predictionio\" % \"pio-build\" % \"0.9.0\")\n    ```\n\n    Then, you should see the following two files in the **project/** directory:\n\n    ```\n    your_template_directory$ ls project/\n    assembly.sbt  pio-build.sbt\n    ```\n\n4. Create a new file `template.json` file in the engine template's root directory with the following content:\n\n    ```\n    {\"pio\": {\"version\": { \"min\": \"0.9.0\" }}}\n    ```\n\n    This is to specify the minium PredictionIO version which the engine can run with.\n\n5. Lastly, you can add `/pio.sbt` into your engine template's `.gitignore`. `pio.sbt` is automatically generated by `pio build`.\n\nThat's it! Now you can run `pio build`, `pio train` and `pio deploy` with PredictionIO 0.9.0 in the same way as before!\n\n\n##Upgrade to 0.8.4\n\n**engine.json** has slightly changed its format in 0.8.4 in order to make engine more flexible. If you are upgrading to 0.8.4, engine.json needs to have the ```params``` field for *datasource*, *preparator*, and *serving*. Here is the sample engine.json from templates/scala-parallel-recommendation-custom-preparator that demonstrate the change for *datasource* (line 7).\n\n\n```\nIn 0.8.3\n{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.template.recommendation.RecommendationEngine\",\n  \"datasource\": {\n    \"appId\": 1\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\": 20,\n        \"lambda\": 0.01\n      }\n    }\n  ]\n}\n```\n\n\n\n```\nIn 0.8.4\n{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.template.recommendation.RecommendationEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appId\": 1\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\": 20,\n        \"lambda\": 0.01\n      }\n    }\n  ]\n```\n\n\n\n##Upgrade from 0.8.2 to 0.8.3\n\n0.8.3 disallows entity types **pio_user** and **pio_item**. These types are used by default for most SDKs. They are deprecated in 0.8.3, and SDKs helper functions have been updated to use **user** and **item** instead.\n\nIf you are upgrading to 0.8.3, you can follow these steps to migrate your data.\n\n##### 1. Create a new app\n\n```\n$ pio app new <my app name>\n```\nPlease take note of the <new app id> generated for the new app.\n\n##### 2. Run the upgrade command\n\n```\n$ pio upgrade 0.8.2 0.8.3 <old app id> <new app id>\n```\n\nIt will run a script that creates a new app with the new app id and migreate the data to the new app.\n\n##### 3. Update **engine.json** to use the new app id. **Engine.json** is located under your engine project directory.\n\n```\n  \"datasource\": {\n    \"appId\": <new app id>\n  },\n```\n\n## Schema Changes in 0.8.2\n\n0.8.2 contains HBase and Elasticsearch schema changes from previous versions. If you are upgrading from a pre-0.8.2 version, you need to first clear HBase and ElasticSearch. These will clear out all data\nin Elasticsearch and HBase. Please be extra cautious.\n\nDANGER: **ALL EXISTING DATA WILL BE LOST!**\n\n\n### Clearing Elasticsearch\n\nWith Elasticsearch running, do\n\n```\n$ curl -X DELETE http://localhost:9200/_all\n```\n\nFor details see http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/indices-delete-index.html.\n\n### Clearing HBase\n\n```\n$ $HBASE_HOME/bin/hbase shell\n...\n> disable_all 'predictionio.*'\n...\n> drop_all 'predictionio.*'\n...\n```\n\nFor details see http://wiki.apache.org/hadoop/Hbase/Shell.\n\n## Experimental upgrade tool (Upgrade HBase schema from 0.8.0/0.8.1 to 0.8.2)\n\nCreate an app to store the data\n\n```\n$ bin/pio app new <my app>\n```\n\nReplace by the returned app ID: ( is the original app ID used in 0.8.0/0.8.2.)\n\n```\n$ set -a\n$ source conf/pio-env.sh\n$ set +a\n$ sbt/sbt \"data/run-main org.apache.predictionio.data.storage.hbase.upgrade.Upgrade <from app ID>\" \"<to app ID>\"\n```\n"
  },
  {
    "path": "docs/manual/source/robots.txt",
    "content": "User-agent: *\nDisallow:\n\nSitemap: http://predictionio.apache.org/sitemap.xml\n"
  },
  {
    "path": "docs/manual/source/samples/index.html.md",
    "content": "---\ntitle: Sample Style Page\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Alerts\n\n### Info\n\nMarkdown: `INFO: This is a info message!` will display this:\n\nINFO: This is a info message!\n\n### Success\n\nMarkdown: `SUCCESS: This is a success message!` will display this:\n\nSUCCESS: This is a success message!\n\n### Warning\n\nMarkdown: `WARNING: This is a warning message!` will display this:\n\nWARNING: This is a warning message!\n\n### Danger\n\nMarkdown: `DANGER: This is a danger message!` will display this:\n\nDANGER: This is a danger message!\n\n### Note\n\nMarkdown: `NOTE: This is a note message!` will display this:\n\nNOTE: This is a note message!\n\n### TODO\n\nMarkdown: `TODO: This is a TODO message!` will display this:\n\nTODO: This is a TODO message!\nThis message is longer to demonstrate what a multi line message would look like.\nThis message is longer to demonstrate what a multi line message would look like.\nThis message is longer to demonstrate what a multi line message would look like.\nYes **bold** and other styling still work inside alerts!\n\n\n## Text\n\nThis is the normal paragraph font.\nThis is a [internal link](/samples/tabs).\nThis is an [external link](http://google.com/)\nThis is a [secure external link](https://google.com/)\nThis is **bold**.\nThis is *italic*.\nThis is _underlined_.\nThis is ==highlighted==.\nThis is ~~strikethough~~.\nThis is ^(superscript).\n\nThis is another paragraph.\n\nView [additional sizing](/samples/sizing) samples.\n\n## Lists\n\n* Bullet 1\n* Bullet 2\n  * Bullet 2.1\n  * Bullet 2.2\n* Bullet 3\n\n1. First item\n2. Second item\n3. Third item\n\n## Code\n\n### Block\n\nThis is a Scala code block:\n\n```scala\ncase class Query(\n  user: Int,\n  num: Int\n) extends Serializable\n\n```\n\nSee a full list of [supported languages](/samples/languages).\n\n\n### Inline\n\nThis `code is inline`.\n\n## Image\n\n![Sample Image](/images/tutorials/rails/localhost-8000.png)\n\n\n## Quotes\n\n> This is a blockquote. Don't use these for anything other actual quotes! Use [alerts](#alerts) instead.\n\n## Tables\n\n| First Header  | Second Header |\n| ------------- | ------------- |\n| Content Cell  | Content Cell  |\n| Content Cell  | Content Cell  |\n\n## Other\n\nThis is a horizontal rule:\n\n---\n\nThis is a en dash &ndash; and an em dash &mdash; using HTML entities.\n\n<div>This is inside a div tag.</div>\n\n# Heading 1\n\nThis is the normal paragraph font.\n\n## Heading 2\n\nThis is the normal paragraph font.\n\n### Heading 3\n\nThis is the normal paragraph font.\n\n#### Heading 4\n\nThis is the normal paragraph font.\n\n##### Heading 5\n\nThis is the normal paragraph font.\n\n###### Heading 6\n\nThis is the normal paragraph font.\n"
  },
  {
    "path": "docs/manual/source/samples/languages.html.md",
    "content": "---\ntitle: Language Samples\nhidden: true\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Plain Text\n\nThis is a sample code block with no language.\n\n```\n$ $PIO_HOME/bin/pio eventserver\n$ cd /path/to/engine\n$ ../bin/pio train\n$ ../bin/pio deploy\n```\n\n## Scala\n\nThis is a sample Scala code block:\n\n```scala\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(ratings = trainingData.ratings)\n  }\n}\n\nclass PreparedData(\n  val ratings: RDD[Rating]\n)\n```\n\n## Ruby\n\nThis is a sample Ruby code block:\n\n```ruby\nclass UsersController < ApplicationController\n  def index\n    @users = User.order('reviews_count DESC').limit(20)\n  end\nend\n```\n\n## JSON\n\nThis is a sample JSON code block:\n\n```json\n{\n  ...\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\": 20,\n        \"lambda\": 0.01\n      }\n    }\n  ]\n  ...\n}\n```\n\n## PHP\n\nThis is a sample PHP code block:\n\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EventClient;\n\n$client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// Set the 4 properties for a user\n$client->createEvent(array(\n  'event' => '$set',\n  'entityType' => 'user',\n  'entityId' => <USER ID>,\n  'properties' => array(\n    'attr0' => <VALUE OF ATTR0>,\n    'attr1' => <VALUE OF ATTR1>,\n    'attr2' => <VALUE OF ATTR2>,\n    'plan' => <VALUE OF PLAN>\n    )\n  ));\n?>\n```\n\n## Python\n\nThis is a sample Python code block:\n\n```python\nfrom predictionio import EventClient\nfrom datetime import datetime\nimport pytz\nclient = EventClient(app_id=4, url=\"http://localhost:7070\")\n\nfirst_event_properties = {\n    \"prop1\" : 1,\n    \"prop2\" : \"value2\",\n    \"prop3\" : [1, 2, 3],\n    \"prop4\" : True,\n    \"prop5\" : [\"a\", \"b\", \"c\"],\n    \"prop6\" : 4.56 ,\n    }\nfirst_event_time = datetime(\n  2004, 12, 13, 21, 39, 45, 618000, pytz.timezone('US/Mountain'))\nfirst_event_response = client.create_event(\n    event=\"my_event\",\n    entity_type=\"user\",\n    entity_id=\"uid\",\n    properties=first_event_properties,\n    event_time=first_event_time,\n)\n```\n"
  },
  {
    "path": "docs/manual/source/samples/level-1.html.md",
    "content": "---\ntitle: Level 1\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Level 1\n"
  },
  {
    "path": "docs/manual/source/samples/level-2-1.html.md",
    "content": "---\ntitle: Level 2.1\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Level 2.1\n"
  },
  {
    "path": "docs/manual/source/samples/level-2-2.html.md",
    "content": "---\ntitle: Level 2.2\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Level 2.2\n"
  },
  {
    "path": "docs/manual/source/samples/level-2.html.md",
    "content": "---\ntitle: Level 2\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Level 2\n"
  },
  {
    "path": "docs/manual/source/samples/level-3-1.html.md",
    "content": "---\ntitle: Level 3.1\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Level 3.1\n"
  },
  {
    "path": "docs/manual/source/samples/level-3-2.html.md",
    "content": "---\ntitle: Level 3.2\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Level 3.2\n"
  },
  {
    "path": "docs/manual/source/samples/level-3.html.md",
    "content": "---\ntitle: Level 3\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Level 3\n"
  },
  {
    "path": "docs/manual/source/samples/level-4-1.html.md",
    "content": "---\ntitle: Level 4.1\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Level 4.1\n"
  },
  {
    "path": "docs/manual/source/samples/level-4-2.html.md",
    "content": "---\ntitle: Level 4.2\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Level 4.2\n"
  },
  {
    "path": "docs/manual/source/samples/level-4-3.html.md",
    "content": "---\ntitle: Level 4.3\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Level 4.3\n"
  },
  {
    "path": "docs/manual/source/samples/level-4.html.md",
    "content": "---\ntitle: Level 4\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Level 4\n"
  },
  {
    "path": "docs/manual/source/samples/narrow.html.md",
    "content": "---\ntitle: Narrow Page\nhidden: true\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nKeep it short!\n"
  },
  {
    "path": "docs/manual/source/samples/sizing.html.md",
    "content": "---\ntitle: Sizing Samples with an Extra Long Title to ThisIsAVeryVeryLongWord Show Wrapping\nhidden: true\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Long Text\n\nThis is a paragraph of text that spans multiple lines.\nThis is a paragraph of text that spans multiple lines.\nThis is a paragraph of text that spans multiple lines.\nThis is a paragraph of text that spans multiple lines.\nThis is a paragraph of text that spans multiple lines.\nThis is a paragraph of text that spans multiple lines.\n\n```\nThis is a code block with lines longer than 80 characters. This is a code block with lines longer than 80 characters. This is a code block with lines longer than 80 characters.\n\nThis is a code block with lines longer than 80 characters. This is a code block with lines longer than 80 characters. This is a code block with lines longer than 80 characters.\n```\n\nThis paragraph contains a single word 80 characters long.\n12345678901234567890123456789012345678901234567890123456789012345678901234567890\n"
  },
  {
    "path": "docs/manual/source/samples/tabs.html.md",
    "content": "---\ntitle: Tabs\nhidden: true\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis page is used to test the tabs plugin based on [Tabslet](https://github.com/vdw/Tabslet).\n\n<div class=\"tabs\">\n  <div data-tab=\"Ruby\" data-lang=\"ruby\">\n```ruby\n# This is a ruby file.\nclass MyClass\n  def foo\n    'bar'\n  end\nend\n```\n  </div>\n  <div data-tab=\"Plain\">\nThis is a test of **markdown** inside a tab!\n\n```\n// This tab does not have the data-lang attribute set!\n$ cd path/to/your/file\n```\n  </div>\n  <div data-tab=\"HTML\" data-lang=\"html\">\n```html\n<p>Yes you can still use HTML in code blocks!</p>\n```\n  </div>\n  <div data-tab=\"Test\">\n```php\nTest 0 <>\nTest 1 >\nTest 3 <\nTest 4 ><\nTest 5 =>\nTest 6 <=\nTest 7 <>\n<p><b>Test</b></p>\n```\n  </div>\n</div>\n\n## Test Syncing\n\nHere we show a similar set of tabs to test language syncing:\n\n<div class=\"tabs\">\n  <div data-tab=\"Ruby\" data-lang=\"ruby\">\n```ruby\n# This is a ruby file.\nclass MyClass\n  def foo\n    'bar'\n  end\nend\n```\n  </div>\n  <div data-tab=\"Plain\">\nThis is a test of **markdown** inside a tab!\n\n```\n// This tab does not have the data-lang attribute set!\n$ cd path/to/your/file\n```\n  </div>\n  <div data-tab=\"HTML\" data-lang=\"html\">\n<p>This HTML is <b>hard coded</b>.</p>\n  </div>\n  <div data-tab=\"Python\" data-lang=\"python\">\n```python\n# The other group does not have a Python tab.\n```\n  </div>\n</div>\n\n<div class=\"tabs\">\n  <div data-tab=\"Java\" data-lang=\"java\">\n```Java\n// Java code..\n```\n  </div>\n  <div data-tab=\"HTML\" data-lang=\"html\">\n\n  This includes **bold** with Markdown.\n\n  </div>\n</div>\n"
  },
  {
    "path": "docs/manual/source/sdk/index.html.md",
    "content": "---\ntitle: List of PredictionIO SDKs\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Officially Supported SDKs\n\n* [Java & Android SDK](/sdk/java/)\n* [PHP SDK](/sdk/php/)\n* [Python SDK](/sdk/python/)\n* [Ruby SDK](/sdk/ruby/)\n\n## Community Powered SDKs\n\nCheck the community projects! [here](/community/projects.html#sdks).\n"
  },
  {
    "path": "docs/manual/source/sdk/java.html.md.erb",
    "content": "---\ntitle: Java & Android SDK\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## API Docs\n\n[View API documentation](https://javadoc.io/doc/org.apache.predictionio/predictionio-sdk-java-client/0.13.0)\n(compatible with PredictionIO version <%= data.versions.pio %>).\n\n## Central Repository\n\n[Browse](http://search.maven.org/#search%7Cga%7C1%7Corg.apache.predictionio)\n\n## GitHub\n\n<iframe src=\"/github/?user=apache&repo=predictionio-sdk-java&type=fork&count=true&size=large\" allowtransparency=\"true\" frameborder=\"0\" scrolling=\"0\" width=\"170\" height=\"30\"></iframe>\n<iframe src=\"/github/?user=apache&repo=predictionio-sdk-java&type=watch&count=true&size=large\" allowtransparency=\"true\" frameborder=\"0\" scrolling=\"0\" width=\"170\" height=\"30\"></iframe>\n\n[View code](https://github.com/apache/predictionio-sdk-java) on GitHub.\n\n## License\n\n[Apache License 2.0](https://github.com/apache/predictionio-sdk-java/blob/develop/LICENSE).\n"
  },
  {
    "path": "docs/manual/source/sdk/php.html.md.erb",
    "content": "---\ntitle: PHP SDK\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## API Docs\n\n[View API documentation](/sdk/php/api/)\n(compatible with PredictionIO version <%= data.versions.pio %>).\n\n## Packagist\n\n[Browse](https://packagist.org/packages/predictionio/predictionio)\n\n## GitHub\n\n<iframe src=\"/github/?user=apache&repo=predictionio-sdk-php&type=fork&count=true&size=large\" allowtransparency=\"true\" frameborder=\"0\" scrolling=\"0\" width=\"170\" height=\"30\"></iframe>\n<iframe src=\"/github/?user=apache&repo=predictionio-sdk-php&type=watch&count=true&size=large\" allowtransparency=\"true\" frameborder=\"0\" scrolling=\"0\" width=\"170\" height=\"30\"></iframe>\n\n[View code](https://github.com/apache/predictionio-sdk-php) on GitHub.\n\n## License\n\n[Apache License 2.0](https://github.com/apache/predictionio-sdk-php/blob/develop/LICENSE).\n"
  },
  {
    "path": "docs/manual/source/sdk/python.html.md.erb",
    "content": "---\ntitle: Python SDK\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## API Docs\n\n[View API documentation](http://pythonhosted.org/PredictionIO/) (compatible with\nPredictionIO <%= data.versions.pio %>).\n\n## PyPI\n\n[Browse](https://pypi.python.org/pypi/PredictionIO)\n\n## GitHub\n\n<iframe src=\"/github/?user=apache&repo=predictionio-sdk-python&type=fork&count=true&size=large\" allowtransparency=\"true\" frameborder=\"0\" scrolling=\"0\" width=\"170\" height=\"30\"></iframe>\n<iframe src=\"/github/?user=apache&repo=predictionio-sdk-python&type=watch&count=true&size=large\" allowtransparency=\"true\" frameborder=\"0\" scrolling=\"0\" width=\"170\" height=\"30\"></iframe>\n\n[View code](https://github.com/apache/predictionio-sdk-python) on GitHub.\n\n## License\n\n[Apache License 2.0](https://github.com/apache/predictionio-sdk-python/blob/develop/LICENSE.txt).\n"
  },
  {
    "path": "docs/manual/source/sdk/ruby.html.md.erb",
    "content": "---\ntitle: Ruby SDK\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## API Docs\n\n[View API\ndocumentation](https://www.rubydoc.info/github/apache/predictionio-sdk-ruby/master)\n(compatible with PredictionIO <%= data.versions.pio %>).\n\n## RubyGems\n\n[Browse](https://rubygems.org/gems/predictionio)\n\n## GitHub\n\n<iframe src=\"/github/?user=apache&repo=predictionio-sdk-ruby&type=fork&count=true&size=large\" allowtransparency=\"true\" frameborder=\"0\" scrolling=\"0\" width=\"170\" height=\"30\"></iframe>\n<iframe src=\"/github/?user=apache&repo=predictionio-sdk-ruby&type=watch&count=true&size=large\" allowtransparency=\"true\" frameborder=\"0\" scrolling=\"0\" width=\"170\" height=\"30\"></iframe>\n\n[View code](https://github.com/apache/predictionio-sdk-ruby) on GitHub.\n\n## License\n\n[Apache License 2.0](https://github.com/apache/predictionio-sdk-ruby/blob/develop/LICENSE).\n"
  },
  {
    "path": "docs/manual/source/search/index.html.md",
    "content": "---\ntitle: Search Results\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n<div id=\"st-results-container\"></div>\n"
  },
  {
    "path": "docs/manual/source/start/customize.html.md",
    "content": "---\ntitle: Customizing an Engine\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nWhen you download an engine template, it comes with the source code. All engine templates follow the same DASE architecture and they are designed to be customizable.\n\nYou may want to customize an engine for many reasons, for example:\n\n* Use another algorithm, or multiple of them\n* Read data from a different, or existing, data store\n* Read different types of training data\n* Transform data with another approach\n* Add new evaluation measures\n* Add custom business logics\n\n\nTo learn more about DASE, please read \"[Learning DASE](/customize/)\".\n\nAfter you have finished modifying the code, you can re-build and deploy the engine again with:\n\n```\n$ pio build; pio train; pio deploy\n```\n"
  },
  {
    "path": "docs/manual/source/start/deploy.html.md.erb",
    "content": "---\ntitle: Deploying your first Engine\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nOnce you have created an engine from a template, it is time to deploy it as a web service. One or more of your applications can then send real-time prediction queries to it.\n\nIt takes 6 simple steps to deploy and use an engine:\n\n1. Install and Run PredictionIO\n2. Create an Engine by downloading an Engine Template\n3. Generate an App ID and Access Key, if you are integrating PredictionIO with a new application\n4. Collecting Data\n5. Deploy the Engine as a Service\n6. Use the Engine\n\nThe [QuickStart](/templates/recommendation/quickstart/) of the basic Recommendation template is a great step-by-step guide especially if you are new to PredictionIO.\n\nOf course, you may also browse the QuickStart of other engine templates in the template gallery.\n"
  },
  {
    "path": "docs/manual/source/start/download.html.md",
    "content": "---\ntitle: Downloading an Engine Template\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThe first step to create a new engine is to browse [PredictionIO template\ngallery](/gallery/template-gallery) where you could find Engine Templates for\nall kinds of machine learning tasks. Choose an engine template that matches your\nuse case the best. You can further customize the engine later if you like.\n\nTo download a template, go to the template gallery, pick an engine template and\nclick its name. You will be redirected to the engine template's GitHub\nrepository. Clone the engine to your local machine to further customize it.\n\nPlease browse the [PredictionIO template gallery](/gallery/template-gallery) to\nchoose an engine template.\n"
  },
  {
    "path": "docs/manual/source/start/index.html.md",
    "content": "---\ntitle: PredictionIO - A Quick Intro\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Overview\n\nPredictionIO consists of the following components:\n\n* **PredictionIO platform** - our open source machine learning stack for building, evaluating and deploying engines with machine learning algorithms.\n* **Event Server** - our open source machine learning analytics layer for unifying events from multiple platforms\n* **Template Gallery** - the place for you to download engine templates for different type of machine learning applications\n\n![PredictionIO Overview](/images/overview-multiengines.png)\n\n## Event Server\n\nIn a common scenario, PredictionIO's **Event Server** continuously collects data from your application.\nA PredictionIO **engine** then builds predictive model(s) with one or more algorithms using the data.\nAfter it is deployed as a web service, it listens to queries from your application and respond with predicted results in real-time.\n\n![PredictionIO Single Engine Overview](/images/overview-singleengine.png)\n\n\n[Event Server](/datacollection/) collects data from your application, in real-time or in batch. It can also unify data that are related to your application from multiple platforms.\nAfter data is collected, it mainly serves two purposes:\n\n1. Provide data to Engine(s) for model training and evaluation\n2. Offer a unified view for data analysis\n\nLike a database server, Event Server can host multiple applications. Data are separated for each application by a unique *app_name*.\n\nOnce Event Server is launched, you can send data to a specific *app_name*, identified by an Access Key, through its [Event API](/datacollection/eventapi.html) with HTTP requests or with [one of the SDKs](/sdk/).\n\nIn some special case, you may want your engine to read data from another datastore instead of Event Server.\nIt can be achieved by [making some modifications](/start/customize/).\n\n## Engine\n\nEngine is responsible for making prediction.\nIt contains one or more machine learning algorithms. An engine reads training data and build predictive model(s).\nIt is then deployed as a web service. A deployed engine responds to prediction queries from your application through REST API in real-time.\n\nPredictionIO's [template gallery](/gallery/template-gallery) offers Engine Templates for all kinds of machine learning tasks.\nYou can easily create one or more engines from these templates .\n\nThe components of a template, namely **Data Source**, **Data Preparator**, **Algorithm(s)**, and **Serving**, are all [customizable](/start/customize/) for your specific needs.\n"
  },
  {
    "path": "docs/manual/source/stylesheets/application.css.scss",
    "content": "@import 'bootstrap';\n@import 'variables/**/*';\n@import 'mixins/**/*';\n@import 'partials/**/*';"
  },
  {
    "path": "docs/manual/source/stylesheets/mixins/_all.css.scss",
    "content": "@mixin x-small {\n  @media only screen and (min-width: 320px) {\n    @content\n  }\n}\n\n\n@mixin small {\n  @media only screen and (min-width: 480px) {\n    @content\n  }\n}\n\n\n@mixin medium {\n  @media all and (min-width: 768px) {\n    @content\n  }\n}\n\n@mixin large {\n  @media only screen and (min-width: 990px) {\n    @content\n  }\n}\n\n@mixin x-large {\n  @media only screen and (min-width: 1200px) {\n    @content\n  }\n}\n\n@mixin placeholder-color ($color) {\n  :-moz-placeholder {\n    color: $color;\n  }\n  :-ms-input-placeholder {\n    color: $color;\n  }\n  ::-moz-placeholder {\n    color: $color;\n  }\n  ::-webkit-input-placeholder {\n    color: $color;\n  } \n}\n\n@mixin jump-on-hover {\n  -moz-transform: translateX(0px) translateY(-3px);\n  -webkit-transform: translateX(0px) translateY(-3px);\n  -o-transform: translateX(0px) translateY(-3px);\n  -ms-transform: translateX(0px) translateY(-3px);\n  transform: translateX(0px) translateY(-3px);\n  -webkit-box-shadow: 0px 3px 5px 1px rgba(0,0,0,0.12);\n  -moz-box-shadow: 0px 3px 5px 1px rgba(0,0,0,0.12);\n  box-shadow: 0px 3px 5px 1px rgba(0,0,0,0.12);\n}\n\n@mixin alert-border($color) {\n  border: 1px solid $color;\n  border-left: 12px solid $color;\n}\n\n@mixin anchor-no-underline {\n  a{\n    &:hover {\n      text-decoration: none;\n    }\n    &:active {\n      text-decoration: none;\n    }\n  }\n}"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_action_call.css.scss",
    "content": ".action-call {\n  font-family: $standard-font-family;\n  @include anchor-no-underline;\n  padding-top: 78px;\n  padding-bottom: 10px;\n\n  .action-square {\n    height: 190px;\n    background-color: #F7F7F7;\n    padding: 35px;\n    padding-top: 0;\n\n    h1 {\n      padding-top: 35px;\n      font-size: 27px;\n      text-align: center;\n      font-weight: normal;\n    }\n\n    p {\n      font-size: 16px;\n      color: #979797;\n    }\n  }\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_alerts.css.scss",
    "content": "body { // for higher priority\n  .alert-message {\n    margin: 1em 0 1em 0;\n    display: block;\n    width: auto;\n    overflow:hidden;\n\n    p {\n      padding: 22px;\n      margin: 0;\n    }\n\n    &.info {\n      @include alert-border ($info-color);\n    }\n    &.success {\n      @include alert-border ($success-color);\n    }\n    &.warning {\n      @include alert-border ($warning-color);\n    }\n    &.danger {\n      @include alert-border ($danger-color);\n    }\n    &.note {\n      @include alert-border ($note-color);\n    }\n    &.todo {\n      @include alert-border ($todo-color);\n    }\n  }\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_breadcrumbs.css.scss",
    "content": "#breadcrumbs {\n  font-family: $standard-font-family;\n  @include anchor-no-underline;\n  ul {\n    display: inline-block;\n    padding: 0;\n  }\n\n  li {\n    display: inline-block;\n  }\n\n  a {\n    color: $breadcrumbs-link-color;\n  }\n\n  span {\n    color: $breadcrumbs-text-color;\n  }\n  span.spacer {\n    padding-left: 3px;\n    padding-right: 3px;\n  }\n}"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_buttons.css.scss",
    "content": ".button {\n  border-radius: 5px;\n}\n\n.button-primary {\n  padding: 10px 30px 10px 30px;\n  text-shadow: none;\n  border: none;\n  border-radius: 100px;\n  font-size: 16px;\n  font-weight: 600;\n  display: inline-block;\n  background-color: $button-primary-background-color;\n  color: $button-primary-font-color;\n  &:hover {\n    background-color: $button-primary-hover-background-color;\n    color: $button-primary-hover-font-color;\n    text-decoration: none;\n  }\n}\n\n\n.button-download {\n  box-sizing: border-box;\n  @extend .button;\n  height: 40px;\n  font-size: 16px;\n  line-height: 24px;\n  font-weight: 600;\n  padding: 7px 25px 0 25px;\n  background-color: $button-download-background-color;\n  color: $button-download-font-color;\n\n  &:hover {\n    text-decoration: none;\n  }\n  i {\n    font-size: 24px;\n    line-height: 24px;\n    vertical-align: bottom;\n    margin-right: 8px;\n  }\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_classes.css.scss",
    "content": ".hide-text {\n  text-indent: 100%;\n  white-space: nowrap;\n  overflow: hidden;\n}\n\n.group {\n  display: inline-block;\n}\n\n.new {\n  color: $danger-color;\n}\n\n.logo-dark {\n  background-color: $logo-dark-color;\n  display: inline-block;\n  font-size: 0;\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_code.css.scss",
    "content": "body .highlight {\n  // Reset existing table styles!\n  table {\n    width: auto;\n    tr {\n      border-bottom: none;\n    }\n  }\n\n  overflow: auto;\n  border: none;\n  border-radius: 6px;\n  font-family: $code-font-family;\n  font-size: 14px;\n  line-height: 18px;\n  margin: 16px 0;\n  z-index: 9000;\n  position: relative;\n\n  pre {\n    background: none; //counter bootstrap default\n    border:none; //counter bootstrap default\n    margin: 0;\n    overflow: visible;\n  }\n\n  td {\n    &.gutter { //line numbers\n      padding: 16px;\n      background-color: $code-line-numbers-background-color;\n      pre {\n        line-height: 20px;\n        font-size: 14px;\n        color: $black; \n        font-family: $code-font-family;\n      }\n    }\n    &.code {\n      background-color: $code-block-background-color;\n      padding: 16px;\n      width: 100%;\n      pre {\n        line-height: 20px;\n        font-size: 14px;\n        word-wrap: normal;\n        color: $white;\n        font-family: $code-font-family;\n      }\n    }\n  }\n}\n\ncode {\n  // inline code elements \n  background-color: $code-inline-background-color;\n  border: 1px solid $code-inline-border-color;\n  border-radius: 6px;\n  padding: 0 5px;\n  line-height: 22px;\n  color: $code-inline-color;\n  display: inline-block;\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_content.css.scss",
    "content": ".content {\n  font-family: $standard-font-family;\n  @include anchor-no-underline;\n\n  img {\n    width: calc(100% - 282.5px);\n    max-width: 100%;\n    box-shadow:0 3px 5px rgba(0,0,0,0.2);\n    &.static {\n      width: auto;\n    }\n    &.default-width {\n      width: auto;\n    }\n  }\n\n  h1, h2, h3, h4, h5, h6 {\n    line-height: 1.5;\n  }\n\n  h1 {\n    color: $heading-1-font-color;\n    margin: 24px 0;\n  }\n\n  h2 {\n    color: $heading-2-font-color;\n    font-weight: 400;\n    font-size: 20px;\n    margin: 24px 0;\n  }\n\n  h3 {\n    color: $heading-3-font-color;\n    font-size: 18px;\n    font-weight: 700;\n    margin-top: 24px;\n  }\n\n  h4 {\n    color: $heading-4-font-color;\n  }\n\n  h5 {\n    color: $heading-5-font-color;\n  }\n\n  h6 {\n    color: $heading-6-font-color;\n  }\n\n  p {\n    margin: 15px 0 15px 0;\n    font-size: 16px;\n    color: $content-paragraph-font-color;\n    line-height: 28px;\n  }\n\n  a {\n    color: $content-link-color;\n    word-break: break-word;\n  }\n\n  ul, ol {\n    margin-top: 15px;\n    margin-bottom: 15px;\n    li {\n      font-size: 16px;\n      line-height: 28px;\n      padding-left: 10px;\n      color: $content-paragraph-font-color;\n      a {\n        color: #1C92CF;\n      }\n    }\n  }\n\n  ul.control {\n    li {\n      padding: 0;\n    }\n  }\n\n  blockquote p:last-child, \n  blockquote ul:last-child, \n  blockquote ol:last-child { \n    margin:0;\n    font-style: italic; \n  }\n\n  .header-anchors:before {\n    //compensate fixed header height for in page anchor jumps.\n    content: \"\";\n    display: block; \n    height: $search-bar-row-height + $header-height;\n    margin-top: ($search-bar-row-height + $header-height) * -1;\n  }\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_copyright.css.scss",
    "content": "#copyright {\n  display: inline-block;\n  width: 800px;\n  margin-left: 0;\n  background-color: $footer-background-color;\n  text-align: center;\n  color: $copyright-font-color;\n  padding: 25px 0 25px 0;\n  font-size: 14px;\n  a {\n    color: $copyright-link-color;\n  }\n\n  .logo {\n    margin-top: 10px;\n    display: inline-block;\n  }\n}\n\n\n@include medium {\n  #copyright {\n    margin-left: 300px;\n  }\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_edit_page.css.scss",
    "content": "#edit-page {\n  background-color: $edit-page-background-color;\n  position: fixed;\n  right: 25px;\n  bottom: 0;\n  border-top-left-radius: 5px;\n  border-top-right-radius: 5px;\n\n  i {\n    font-size: 20px;\n    margin-right: 7px;\n    position: relative;\n    top: 1px;\n  }\n  p {\n    margin: 0;\n  }\n  a {\n    color: $edit-page-font-color;\n    &:hover {\n      text-decoration: none;\n    }\n    display: block;\n    font-size: 16px;\n    font-weight: 600;\n    margin: 7px 15px 7px 15px;\n  }\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_footer.css.scss",
    "content": "footer {\n  font-family: $standard-font-family;\n  padding:0;\n  background-color: $footer-background-color;\n  @include anchor-no-underline;\n\n  .seperator {\n    border-top: 1px solid $footer-seperator-color;\n    margin: 50px 0;\n  }\n\n  .footer-link-column {\n    a {\n      color: $footer-link-color;\n    }\n\n    ul {\n      padding: 0;\n      margin: 0;\n    }\n\n    li {\n      list-style: none;\n      font-size: 12px;\n      line-height: 24px;\n    }\n    .footer-link-column-row {\n      margin-bottom: 20px;\n\n      h4 {\n        color: $footer-heading-4-color;\n        font-size: 18px;\n        font-weight: normal;\n      }\n    }\n  }\n\n  #footer-bottom {\n    background: $footer-bottom-background-color;\n    padding: 15px 0;\n    margin-top: 50px;\n\n    span {\n      color: $footer-bottom-span-color;\n    }\n\n    #footer-logo-wrapper {\n      float:left;\n    }\n\n    #social-icons-wrapper {\n      zoom:1;\n      float: right;\n\n      iframe {\n        vertical-align: middle;\n        margin: 0 5px;\n      }\n\n      img{\n        padding:0 5px;\n      }\n\n      &:after {\n        clear:both;\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_global.css.scss",
    "content": "#global {\n  min-width: 320px;\n  background-color: $global-background-color;\n}\n\n#main {\n  background-color: $main-background-color;\n  padding: 0 0 25px 0;\n  width: 1100px;\n  min-height: 800px;\n}\n\n.col-main {\n  margin-left: 0;\n}\n\n@include medium {\n  .col-main {\n    margin-left: 300px;\n  }\n}\n\n#page {\n  margin-top: $header-height + $search-box-height + ($search-bar-row-vertical-padding * 2) + 50px; // header height\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_hacks.css.scss",
    "content": "// Firefox sucks sometimes!\n:focus { outline: none; }\n::-moz-focus-inner { border: none; }"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_header.css.scss",
    "content": "header {\n  font-family: $standard-font-family;\n  background-color: $white;\n  line-height: 83px;\n  border-bottom: 3px solid $header-border-bottom-color;\n  position: fixed;\n  width: 100%;\n  z-index: 9999;\n  top:0;\n\n  #header-wrapper {\n    height: 100%;\n  }\n\n  #logo {\n    width: 169px;\n  }\n\n  ul {\n    margin-bottom: 0;\n    padding-left:20px;\n    li {\n      display: inline;\n      a {\n        color: black;\n        padding-left: 25px;\n        padding-right: 25px;\n      }\n      a:hover {\n        text-decoration: none;\n      }\n    }\n  }\n\n  .pill {\n    display: inline-block;\n    height: 38px;\n    line-height: 38px;\n    text-align: center;\n    font-size: 12px;\n    letter-spacing: 3px;\n    padding: 0 15px;\n    transition:All 0.3s ease;\n    -webkit-transition:All 0.3s ease;\n    -moz-transition:All 0.3s ease;\n    -o-transition:All 0.3s ease;\n    -webkit-font-smoothing: subpixel-antialiased;\n\n  a {\n      color: $white;\n    }\n  }\n\n  .pill.left {\n    background-color: $header-left-pill-background-color;\n    border-top-left-radius: 3em;\n    border-bottom-left-radius: 3em;\n    padding-left: 20px;\n  }\n\n  .pill.right {\n    background-color: $header-right-pill-background-color;\n    border-top-right-radius: 3em;\n    border-bottom-right-radius: 3em;\n    padding-right: 20px;\n  }\n\n  .pill.left:hover {\n    background-color: $header-left-pill-hover-backgounr-color;\n    text-decoration: none;\n  }\n\n  .pill.right:hover {\n    background-color: $header-right-pill-hover-backgounr-color;\n    text-decoration: none;\n  }\n\n  .pill.left:hover , .pill.right:hover {\n    @include jump-on-hover;\n  }\n\n  #pill-wrapper {\n  float: right;\n    a {\n      color: white;\n    }\n  }\n\n  #header-pills-wrapper .pill {\n    display: inline-block;\n  }\n\n  #logo-wrapper, #header-nav-options-wrapper {\n    float: left;\n  }\n}"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_hybird_vim_highlight.css.scss",
    "content": ".highlight {\n  .hll { background-color: #222222; }\n  .err { color: #cccccc; background-color: #FF0000; } /* Error */\n  .gr { color: #FF0000; } /* Generic.Error */\n  .gt { color: #0040D0; } /* Generic.Traceback */\n  .gd { color: #de935f; background-color: #222222; } /* Generic.Deleted */\n  .gi { color: #de935f; background-color: #222222; } /* Generic.Inserted */\n  .ni { color: #cccccc; } /* Name.Entity */\n  .w { color: #cccccc; } /* Text.Whitespace */\n  .c { color: #707880; font-style: italic; } /* Comment */\n  .k { color: #de935f; font-weight: bold; } /* Keyword */\n  .o { color: #de935f; font-weight: bold; } /* Operator */\n  .cm { color: #707880; font-style: italic; } /* Comment.Multiline */\n  .cp { color: #f0c674; font-weight: bold; font-style: italic; } /* Comment.Preproc */\n  .c1 { color: #707880; font-style: italic; } /* Comment.Single */\n  .cs { color: #f0c674; font-weight: bold; font-style: italic; } /* Comment.Special */\n  .ge { color: #de935f; font-style: italic; } /* Generic.Emph */\n  .gh { color: #f0c674; } /* Generic.Heading */\n  .go { color: #707880; } /* Generic.Output */\n  .gp { color: #cc6666; } /* Generic.Prompt */\n  .gs { font-weight: bold; } /* Generic.Strong */\n  .gu { color: #f0c674; } /* Generic.Subheading */\n  .kc { color: #de935f; font-weight: bold; } /* Keyword.Constant */\n  .kd { color: #de935f; font-weight: bold; } /* Keyword.Declaration */\n  .kn { color: #de935f; font-weight: bold; } /* Keyword.Namespace */\n  .kp { color: #de935f; font-weight: bold; } /* Keyword.Pseudo */\n  .kr { color: #de935f; font-weight: bold; } /* Keyword.Reserved */\n  .kt { color: #81a2be; font-weight: bold; } /* Keyword.Type */\n  .m { color: #cc6666; } /* Literal.Number */\n  .s { color: #cc6666; } /* Literal.String */\n  .na { color: #de935f; } /* Name.Attribute */\n  .nb { color: #b294bb; } /* Name.Builtin */\n  .nc { color: #81a2be; font-weight: bold; } /* Name.Class */\n  .no { color: #de935f; } /* Name.Constant */\n  .nd { color: #f0c674; font-weight: bold; } /* Name.Decorator */\n  .ne { color: #81a2be; font-weight: bold; } /* Name.Exception */\n  .nf { color: #81a2be; font-weight: bold; } /* Name.Function */\n  .nl { color: #81a2be; font-weight: bold; } /* Name.Label */\n  .nn { color: #cc6666; } /* Name.Namespace */\n  .nt { color: #b5bd68; } /* Name.Tag */\n  .nv { color: #de935f; } /* Name.Variable */\n  .ow { color: #de935f; font-weight: bold; } /* Operator.Word */\n  .mf { color: #cc6666; } /* Literal.Number.Float */\n  .mh { color: #cc6666; } /* Literal.Number.Hex */\n  .mi { color: #cc6666; } /* Literal.Number.Integer */\n  .mo { color: #cc6666; } /* Literal.Number.Oct */\n  .sb { color: #cc6666; } /* Literal.String.Backtick */\n  .sc { color: #cc6666; } /* Literal.String.Char */\n  .sd { color: #cc6666; } /* Literal.String.Doc */\n  .s2 { color: #cc6666; } /* Literal.String.Double */\n  .se { color: #cc6666; } /* Literal.String.Escape */\n  .sh { color: #cc6666; } /* Literal.String.Heredoc */\n  .si { color: #cc6666; } /* Literal.String.Interpol */\n  .sx { color: #cc6666; } /* Literal.String.Other */\n  .s1 { color: #cc6666; } /* Literal.String.Single */\n  .sr { color: #b5bd68; } /* Literal.String.Regex */\n  .ss { color: #cc6666; } /* Literal.String.Symbol */\n  .bp { color: #f0c674; } /* Name.Builtin.Pseudo */\n  .vc { color: #de935f; } /* Name.Variable.Class */\n  .vg { color: #de935f; } /* Name.Variable.Global */\n  .vi { color: #de935f; } /* Name.Variable.Instance */\n  .il { color: #cc6666; } /* Literal.Number.Integer.Long */\n}\n\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_jcarousel.css.scss",
    "content": "/*\nThis is the visible area of you carousel.\nSet a width here to define how much items are visible.\nThe width can be either fixed in px or flexible in %.\nPosition must be relative!\n*/\n.jcarousel {\n  position: relative;\n  overflow: hidden;\n\n}\n\n.jcarousel-wrapper {\n  width: 400px;\n}\n\n/*\nThis is the container of the carousel items.\nYou must ensure that the position is relative or absolute and\nthat the width is big enough to contain all items.\n*/\n.jcarousel ul {\n  width: 99999px;\n  position: relative;\n\n  /* Optional, required in this case since it's a <ul> element */\n  list-style: none;\n  margin: 0;\n  padding: 0;\n}\n\n/*\nThese are the item elements. jCarousel works best, if the items\nhave a fixed width and height (but it's not required).\n*/\n.jcarousel li {\n  /* Required only for block elements like <li>'s */\n  float: left;\n  width: 400px;\n\n}\n\n.jcarousel-pagination {\n  width: 100%;\n  list-style-type: none;\n  display: inline-block;\n  margin: 0;\n  padding: 20px 0 20px 0;\n  border-bottom: 1px solid $border-color;\n\n  li  {\n    display: inline-block;\n    margin: 0 20px 0 0;\n\n    a {\n      border: 1px solid $border-color;\n      border-radius: 30px;\n      width: 30px;\n      height: 30px;\n      line-height: 30px;\n      text-align: center;\n      display: inline-block;\n      color: $carousel-link-color;\n      &:hover {\n        background-color: $carousel-hover-background-color;\n        border: 1px solid $carousel-hover-background-color;\n        text-decoration: none;\n        color: $carousel-hover-link-color;\n      }\n    }\n    &.active {\n      a {\n        background-color: $carousel-active-background-color;\n        border: 1px solid $carousel-active-background-color;\n        color: $carousel-active-link-color;\n\n      }\n    }\n\n  }\n}\n\n.jcarousel-controls {\n  display: inline-block;\n  width: 100%;\n  padding: 15px 0 15px 0;\n  border-top: 1px solid $border-color;\n\n  a {\n    display: inline-block;\n    background-color: $carousel-control-background-color;\n    color: $carousel-control-font-color;\n    border-radius: 3px;\n    padding: 5px 10px 5px 10px;\n    font-weight: 600;\n    &:hover {\n      text-decoration: none;\n      background-color: $carousel-control-hover-background-color;\n    }\n  }\n  .inactive {\n    display: none;\n  }\n}\n\n.jcarousel-control-prev {\n\n}\n\n.jcarousel-control-next {\n  float: right;\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_layout.css.scss",
    "content": "#page.container-fluid {\n  max-width: 1170px;\n}\n\n.content-header {\n  width: auto;\n  display: block;\n}\n\n.container-center {\n  width: 1170px;\n  display: inline-block;\n  text-align: left;\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_modules.css.scss",
    "content": ".pill-button {\n  height: auto;\n  border:1px solid #2499E6;\n  border-radius: 3em;\n  font-size: 12px;\n  color: #2499E6;\n  text-align: center;\n  display: inline-block;\n  letter-spacing: 2px;\n  transition:All 0.3s ease;\n  -webkit-transition:All 0.3s ease;\n  -moz-transition:All 0.3s ease;\n  -o-transition:All 0.3s ease;\n  padding: 14px 15px;\n  -webkit-font-smoothing: subpixel-antialiased;\n}\n\n.underlined-input {\n  border:none;\n  border-bottom: 1px solid #EBF1F9;\n  outline: none;\n  padding: 5px 0;\n  \n}\n\n.underlined-input:focus {\n  border-color: #249DEC;\n}"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_off_canvas.css.scss",
    "content": "#active-navigation {\n  float: left;\n  font-size: 40px;\n  height: 40px;\n  margin: 0 0 0 20px;\n  color: $header-icon-color;\n  display: block;\n  i {\n    vertical-align: top;\n  }\n}\n\n@include medium {\n  #active-navigation {\n    display: none;\n  }\n\n  .active-navigation {\n    #main {\n      margin-right: 0;\n    }\n  }\n}\n\n// Large\n@include x-large {\n  #table-of-contents {\n    display: block;\n    &.empty {\n      display: none;\n    }\n  }\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_page_title.css.scss",
    "content": "#page-title {\n  h1 {\n    font-family: $standard-font-family;\n    margin: 0;\n    margin-top: 15px;\n    margin-bottom: 32px;\n    font-size: 28px;\n    font-weight: 400;\n    line-height: 1.2;\n    color: $page-title-font-color;\n    word-wrap: break-word;\n    &.missing {\n      color: $danger-color;\n    }\n  }\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_responsive.css.scss",
    "content": "/*HEADER*/\n@media (max-width: 1200px) {\n  header ul li a {\n  color: $black;\n  font-size: 1em;\n  padding-left: 10px;\n  padding-right: 10px;\n  }\n}\n/* END HEADER*/\n\n@media (max-width: 992px) {\n  html { // serves as \"parent\" for fixed position left menu\n    position: relative;\n  }\n\n  /* Header */\n  header {\n    line-height: 0px;\n    #header-wrapper {\n      width: 100%;\n      padding: 0;\n\n      .mobile-search-bar-toggler {\n        position: absolute;\n        height: 23px;\n        width: 23px;\n        top: 15px;\n        right: 30px;\n        cursor: pointer;\n      }\n\n      #logo-wrapper {\n        width: 100%;\n        float: none;\n        text-align: center;\n        line-height: 50px;\n        border-bottom: 3px solid $header-logo-wrapper-border-bottom-color;\n        position: relative;\n        #drawer-toggle {\n          position: absolute;\n          height: 23px;\n          width: 23px;\n          top: 15px;\n          left:20px;\n          background: image_url(\"icons/drawer-toggle-closed.png\");\n          cursor: pointer;\n          &.active {\n            background: image_url(\"icons/drawer-toggle-active.png\");\n          }\n        }\n      }\n    }\n\n    #menu-wrapper {\n      max-height: 0;\n      overflow: hidden;\n      -webkit-transition: max-height 0.5s;\n      -moz-transition: max-height 0.5s;\n      transition: max-height 0.5s;\n\n      &.active {\n        max-height: 500px;\n      }\n\n      #header-nav-options-wrapper {\n        float: none;\n        display: block;\n        width: 100%;\n        a {\n          display: block;\n          width: 100%;\n        }\n        ul {\n          display: block;\n          width: 100%;\n          padding: 0;\n          margin: 0;\n          li {\n            display: block;\n            padding: 0;\n            margin: 0;\n            line-height: 50px;\n            border-bottom: 1px solid $header-nav-menu-option-border-bottom-color;\n            padding-left: 20px;\n\n            a {\n              padding: 0;\n              margin: 0;\n            }\n          }\n        }\n      }\n    }\n\n    #pill-wrapper {\n      float: none;\n      display: block;\n      margin-left: auto;\n      margin-right: auto;   \n      margin-top: 20px;\n      margin-bottom: 20px;\n      text-align: center;\n\n      &:after {\n        content: \"\";\n        clear: both;\n      }\n\n      a {\n        display: inline-block;\n      }\n    }\n  }\n  /* END Header */\n\n  body {\n    /* action call */\n    .action-call {\n      display: none;\n    }\n    /* END action call */\n    /* Page content container  */\n    #page {\n      margin-top: $search-bar-row-height + $mobile-header-height;\n    }\n    /* END Page content container  */\n\n    /* Content container */\n    .content {\n      .header-anchors:before {\n        //compensate fixed header height for in page anchor jumps.\n        content: \"\";\n        display: block; \n        height: $search-bar-row-height + $mobile-header-height;\n        margin-top: ($search-bar-row-height + $mobile-header-height) * -1;\n      }\n    }\n    /* END Content container*/\n\n    /* Table of content & Content Header (same row) */\n    .content-header {\n      width: 100%;\n      #page-title {\n        h1 {\n          margin-bottom: 28px;\n        }\n      }\n    }\n\n    #table-of-content-wrapper {\n      margin: 0;\n      clear: both;\n      float: none;\n      max-width: 320px\n    }\n    /* END Table of content */\n\n    /* Search bar */\n    #search-bar-row-wrapper {\n      top: 53px;\n      padding: 0;\n\n      #mobile-page-heading-wrapper {\n        height: 36px;\n        overflow: hidden;\n        margin-top: ($search-bar-row-height - 35px) / 2;\n        p {\n          margin: 0;\n          padding: 0;\n          margin-bottom: 4px; \n          font-size: 16px;\n          color: $mobile-page-header-label-color;\n        }\n\n        h4 {\n          margin: 0;\n          padding: 0;\n          font-size: 16px;\n          overflow: hidden;\n          white-space: nowrap;\n          text-overflow: ellipsis;\n          color: $black;\n        }\n      }\n\n      .swiftype-wrapper {\n        position: absolute;\n        background-color: white;\n        top: $search-bar-row-height * -1;\n        height: $search-bar-row-height;\n        z-index: 9990;\n        overflow: hidden;\n        transition: top 0.5s ease;\n        &.active {\n          top: 0;\n        }\n        .swiftype-row-hider {\n          position: absolute;\n          top: 15px;\n          right: 14px;\n          cursor: pointer;\n        }\n        .swiftype {\n          .search-form {\n              width: 100%;\n            .search-box {\n              margin-top: ($search-bar-row-height - $search-box-height) /2;\n              width: calc(100% - 50px);\n              .st-search-input {\n                width: 100%;\n              }\n            }\n            .search-box-toggler {\n              float: right;\n              @include clearfix;\n            }\n          }\n        }\n      }\n      \n      .mobile-left-menu-toggler {\n        background-color: rgba(0,0,0,0);\n        height: 56px;\n        position: relative;\n        top: 0;\n        cursor: pointer;\n      }\n    }\n    /* End Search bar */\n\n    /* Left Nav menu indicator */\n    #left-menu-indicator {\n      position: absolute;\n      top: ($search-bar-row-height - 11px) / 2;\n      right: 12px;\n      width: 20px;\n      height: 11px;\n      cursor: pointer;\n    }\n    /* END Left Nav menu indicator */\n\n    /* Left nav menu */\n    #left-menu-wrapper {\n      position: absolute;\n      top: 108px;\n      z-index: 9997;\n      padding: 0;\n      \n      width: 100%;\n      max-height: 0;\n      overflow: hidden;\n      transition: max-height 0.5s ease;\n\n      &.active {\n        max-height: 1000px;\n      }\n\n      #nav-main {\n        height: 100%;\n        background-color: $white;\n        \n\n        a.final.active {\n          border-top: 1px solid $nav-main-mobile-active-item-border-color;\n          border-bottom: 1px solid $nav-main-mobile-active-item-border-color;\n        }\n      }\n    }\n    /* END Left nav menu */\n\n    /* Subscription form */\n    .subscription-form-wrapper {\n        margin-bottom: 40px;\n    }\n    /* END Subscription form */\n  }\n\n  /* Footer */\n  footer {\n    .footer-link-column-row {\n      h4 {\n        font-size: 15px;\n      }\n      li {\n        font-size: 13px;\n      }\n    }\n\n    #footer-logo-wrapper {\n      text-align: center;\n      width: 100%;\n      margin-bottom: 20px;\n    }\n\n    #social-icons-wrapper {\n      float: none;\n      text-align: center;\n      width: 100%;\n      margin-bottom: 10px;\n    }\n  }\n  /* END Footer */\n}\n\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_search_bar_row.css.scss",
    "content": "#search-bar-row-wrapper {\n  font-family: $standard-font-family;\n  height: $search-box-height + ($search-bar-row-vertical-padding * 2);\n  width: 100%;\n  position: fixed;\n  top: $header-height;\n  padding-top: $search-bar-row-vertical-padding;\n  padding-bottom: $search-bar-row-vertical-padding;\n  z-index: 9998;\n  background-color: $white;\n  box-sizing: border-box;\n  line-height: 15px;\n  border-bottom: 1px solid $search-box-wrapper-border-color;\n  #search-bar-row {\n    height: 100%;\n    max-width: 1170px;\n    h4 {\n      font-size: 15px;\n    }\n  }\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_subscribe_form.css.scss",
    "content": ".subscription-form-wrapper {\n  background-color: $form-wrapper-color;\n  border: 1px solid $form-wrapper-border-color;\n  padding: 25px 20px;\n\n  h4 {\n    font-size: 18px;\n    color: $form-wrapper-heading-4-color;\n    font-weight: normal;\n  }\n\n  p {\n    color: $white;\n  }\n\n  .pill-button {\n    background-color: $subscribe-form-pill-button-background-color;\n    display: inline-block;\n    padding-left: 30px;\n    padding-right: 30px;\n    border-radius: 3em;\n    border:none;\n    letter-spacing: 3px;\n    color: $white;\n    font-size: 12px;\n    font-weight: bold;\n    text-align: center;\n\n    &:hover {\n      @include jump-on-hover;\n    }\n\n    &:disabled {\n      background-color: $disabled-pill-button-background-color !important;\n      border-color: $disabled-pill-button-background-color !important;\n        -webkit-transform: none;\n           -moz-transform: none;\n            -ms-transform: none;\n             -o-transform: none;\n                transform: none;\n       -webkit-box-shadow: none;\n          -moz-box-shadow: none;\n               box-shadow: none;\n    }\n  }\n\n  input{\n    margin-bottom: 17px;\n  }\n\n  input[type=\"email\"] {\n    width: 100%;\n    padding-left: 12px;\n  \tpadding-right: 12px;\n  \tborder-radius: 5px;\n  \tfont-size: 14px;\n  \tline-height: 26px;\n  }\n\n  input:focus:invalid, input:invalid {\n    border-color: $invalid-input-border-color;\n  }\n\n  input:valid{\n    border-color: $valid-input-border-color;\n  }\n\n  input.error, .form-control.error{\n    border-color: $errored-input-border-color;\n  }\n\n  form .error {\n    color: $errored-input-border-color;\n  }\n\n  @include placeholder-color ($subscribe-form-placeholder-color);\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_table_of_contents.css.scss",
    "content": "#table-of-content-wrapper {\n  font-family: $standard-font-family;\n  display: inline-block;\n  width: 262.5px;\n  box-sizing: border-box;\n  float: right;\n  margin-left: 20px;\n  margin-bottom: 20px;\n  padding: 20px 15px;\n  border: 1px solid #dddddd;\n\n  @include anchor-no-underline;\n\n  h5 {\n    font-size: 13px;\n    color: $black;\n  }\n\n  hr {\n    margin-top: 0;\n  }\n\n  ul {\n    padding: 0;\n    margin: 0;\n    font-size: 14px;\n    line-height: 24px;\n    color: $table-of-contents-font-color;\n\n    ul {\n      padding-left: 20px;\n    }\n  }\n\n  li {\n    list-style-type: none;\n    padding: 14px 0;\n  }\n\n  a {\n    color: $table-of-contents-link-color;\n  }\n\n  #edit-page-link {\n    color: $table-of-contents-edit-page-link-color;\n    font-size: 13px;\n\n    &:hover {\n      text-decoration: none;\n    }\n\n    img {\n      height: 19px;\n      margin-right: 5px;\n    }\n\n  }\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_tables.css.scss",
    "content": "table {\n  width: 100%;\n  th {\n    text-align: left;\n  }\n\n  tr {\n    border-bottom: 1px solid $border-color;\n  }\n\n  td, th {\n    padding: 2px 20px 2px 0;\n  }\n\n}"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_tabs.css.scss",
    "content": ".tabs {\n  & > div {\n    display: block;\n    width: auto;\n  }\n  ul {\n    list-style-type: none;\n    padding: 0;\n    margin-bottom: 0;\n    display: inline-block;\n  }\n  li {\n    display: block;\n    float: left;\n    margin: 0 10px 0 0;\n    a {\n      display: inline-block;\n      background-color: $tab-background-color;\n      padding: 10px 15px 5px 15px;\n      border-bottom: 5px solid $border-color;\n      color: $tab-link-color;\n      text-decoration: none;\n\n      &:hover {\n        background-color: $tab-hover-background-color;\n        color: $tab-hover-link-color;\n      }\n    }\n    &.active {\n\n      a {\n        background-color: $tab-active-background-color;\n        border-bottom: 5px solid #da1111;\n        color: $tab-active-link-color;\n        &:hover {\n          background-color: $tab-active-background-color;\n        }\n      }\n    }\n  }\n  ul.tab-list {\n    list-style-type: disc; \n    list-style-position: inside; \n    margin-top: 0px; \n    margin-bottom: 15px;\n  }\n  li.tab-list-element {\n    display: list-item; \n    float: none;\n    width: 100%\n  }\n}"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_tags.css.scss",
    "content": "body {\n  font-family: $body-font-family;\n  color: $body-font-color;\n  background-color: $footer-background-color;\n}\n\na {\n  color: $link-color;\n  text-decoration: none;\n  &:hover {\n    text-decoration: underline;\n  }\n}\n\n// Hack for links with anchors with fixed header!\n:target:before {\n  display: block;\n  content: '';\n  margin-top: -100px;\n  height: 100px;\n  visibility: hidden;\n}\n\nblockquote {\n  margin: 0;\n  p {\n    padding: 20px;\n  }\n\n  background-color: $blockquote-background-color;\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/_tryit.css.scss",
    "content": ".codepicnic {\n  display: flex;\n  margin-top: 20px;\n  flex-wrap: wrap;\n\n  i {\n    font-weight: 600;\n    color: $tryit-i-color;\n  }\n}\n\n.tutorial {\n  margin: 0 25px 0 0;\n  min-width: 400px;\n}\n\n@include large {\n  .codepicnic {\n    flex-wrap: nowrap;\n  }\n\n}\n\n.iframe {\n  width: 100%;\n  margin-bottom: 70px;\n\n  iframe {\n    border: 1px solid $border-color;\n    width: 100%;\n    height: 100%;\n    min-height: 500px;\n  }\n}\n\n.tryit {\n\n  #main {\n    width: 100%;\n  }\n\n  #footer {\n    width: 100%;\n    margin-left: 0;\n  }\n\n  #copyright {\n    width: 100%;\n    margin-left: 0;\n  }\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/nav/_header.css.scss",
    "content": "#nav-header {\n  display: none;\n  margin: 0 25px 0 0;\n  ul {\n    padding: 0;\n    margin: 8px 0 0 0;\n  }\n  li {\n    display: inline-block;\n    margin-right: 25px;\n    &:last-child {\n      margin-right: 0;\n    }\n  }\n  a {\n    font-weight: 400;\n    color: $nav-header-link-color;\n    font-size: 18px;\n  }\n\n\n  @include medium {\n    display :inline-block;\n  }\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/nav/_main.css.scss",
    "content": "#nav-main {\n  font-family: $standard-font-family;\n  @include anchor-no-underline;\n\n  a.collapsible { // to counter unknown css behavior\n    text-decoration: none;\n  }\n\n  ul {\n    padding: 0;\n    margin: 0;\n    width: 100%;\n    list-style-type: none;\n    background-color: $white;\n  }\n  & > ul {\n    border: 1px solid $nav-main-border-color;\n    ul {\n        display: none; // hide inactive sub-levels\n    }\n    a {\n      min-height: 44px;\n      font-size: 16px;\n      display: flex; // a serves as container, text is inside a span\n      flex-direction: column;\n      justify-content: center;\n      padding:15px;\n    }\n    & > li{\n      // level 1\n      a.collapsible {\n        color: $nav-main-collapsible-anchor-color;\n        background-color: $white;\n        border-bottom: none;\n      }\n\n      & > a {\n        background-color: $nav-main-active-menu-background-color;\n        border-bottom: 1px solid $nav-main-active-menu-bottom-border-color;\n        color: $nav-main-active-menu-link-color;\n      }\n      &>ul {\n        //level 2\n        li {\n          a { // inactive menu item\n            font-size: 14px;\n            padding-left: 25px;\n            color: $nav-main-level-2-link-color;\n            &.active {\n              border-left: 5px solid $nav-main-level-2-active-link-left-border-color;\n              color: $nav-main-level-2-active-link-color;\n            }\n          }\n          &>ul {\n            //level 3\n            li {\n              a {\n                padding-left: 30px;\n              }\n              &>ul {\n                //level 4\n                li {\n                  a {\n                    padding-left: 35px;\n                  }\n                }\n              }\n            }\n          }\n        }\n      }\n    }\n  }\n}"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/nav/_page.css.scss",
    "content": "#nav-page {\n  font-size: 24px;\n  white-space: nowrap;\n  display: inline-block;\n  margin: 15px 25px 0 -100px;\n  a {\n    color: $nav-page-icon-color;\n  }\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/partials/nav/_swiftype.css.scss",
    "content": ".swiftype {\n  transition: width 1s ease;\n  float: right;\n  width: 100%;\n  box-sizing: border-box;\n\n  .search-form {\n    transition: width 1s ease;\n    float: right;\n    width: 10%;\n\n    .search-box-toggler {\n      float: left;\n      top: 8px;\n      position: relative;\n      cursor: pointer;\n    }\n\n    .search-box {\n      position: relative;\n      width: 0;\n      overflow: hidden;\n      img {\n        position: absolute;\n        top: 8px;\n        left: 13px;\n      }\n\n      .st-search-input {\n        float: right;\n        color: $body-font-color;\n        border-radius: 40px;\n        line-height: $search-box-height;\n        height: $search-box-height;\n        width: 100%;\n        text-indent: 30px;\n        box-sizing: border-box;\n        border: 1px solid $search-box-border-color;\n        box-shadow: none;\n        font-weight: 400;\n        font-size: 16px;\n        background: none;\n        background-color: $swiftype-input-background-color;\n        padding: 5px;\n        padding-left: 10px;\n      }\n    }\n\n    &.active {\n      width: 100%;\n      .search-box-toggler {\n        display: none;\n      }\n\n      .search-box {\n        width: 100%;\n      }\n    }\n  }\n}\n\nbody { \n  // style for swiftype result listings\n  // Using body forces a higher specificity over the Swiftype stylesheet!\n  div.swiftype div.st-result-listing div.st-search-summary {\n    border-bottom: 1px solid $border-color;\n    margin: 15px 0;\n    padding-bottom: 10px;\n  }\n\n  div.swiftype div.st-result-listing div.st-search-summary h2 {\n    color: $body-font-color;\n    font-size: 13px;\n    font-weight: normal;\n    margin: 0;\n  }\n\n  div.swiftype div.st-result-listing div.st-search-summary h2 .st-query {\n    color: $body-font-color;\n    font-style: italic;\n  }\n\n  div.swiftype div.st-result-listing div.st-search-summary div.st-logo-header {\n    display: none;\n  }\n\n  div.swiftype div.st-result-listing div.st-result {\n    border-bottom: 1px solid $border-color;\n    margin-bottom: 12px;\n    padding-bottom: 12px;\n  }\n\n  div.swiftype div.st-result-listing div.st-result div.st-result-text h3 {\n    font-size: 16px;\n    margin: 5px 0;\n  }\n\n  div.swiftype div.st-result-listing div.st-result .st-metadata {\n    font-size: 14px;\n    line-height: 20px;\n  }\n\n  div.swiftype div.st-result-listing div.st-result div.st-result-text div.st-metadata .st-snippet {\n    color: $body-font-color;\n  }\n\n  div.swiftype div.st-result-listing div.st-result .st-result-image {\n    float: left;\n    margin-right: 10px;\n    overflow: hidden;\n  }\n\n  div.swiftype div.st-result-listing div.st-result.with_image .st-result-text {\n    margin-left: 100px;\n  }\n\n  div.swiftype div.st-result-listing div.st-result.final {\n    border-bottom: medium none;\n    margin-bottom: 0;\n  }\n\n  div.swiftype div.st-result-listing .st-pagination {\n    border-top: 1px solid $border-color;\n    margin-top: 5px;\n    padding-top: 5px;\n  }\n\n  div.swiftype div.st-result-listing .st-pagination .st-prev {\n    margin-right: 20px;\n  }\n\n  div.swiftype div.st-result-listing div.st-logo-footer {\n    display: none;\n  }\n\n  .swiftype-widget .autocomplete {\n    background-color: $swiftype-background-color;\n    border-radius: 0;\n    box-shadow: none;\n    display: block;\n    font-family: $body-font-family;\n    list-style-type: none;\n    margin: 5px 0 0 0;\n    padding: 0;\n    position: absolute;\n    text-align: left;\n    border: 1px solid $border-color;\n    border-bottom: none;\n  }\n\n  .swiftype-widget .autocomplete ul {\n    background-color: $swiftype-background-color;\n    border-radius: 0;\n    display: block;\n    font-family: $body-font-family;\n    list-style-type: none;\n    margin: 0;\n    padding: 0;\n    text-align: left;\n  }\n\n  .swiftype-widget .autocomplete li {\n    background-image: none;\n    border-bottom: 1px solid $border-color;\n    border-top: 0;\n    cursor: pointer;\n    font-size: 13px;\n    list-style-type: none;\n    margin: 0;\n    padding: 10px 8px;\n  }\n\n  .swiftype-widget .autocomplete li:first-child {\n    border-radius: 0;\n    border-top: 0;\n  }\n\n  .swiftype-widget .autocomplete li:last-child {\n    border-radius: 0;\n  }\n\n  .swiftype-widget .autocomplete li.active {\n    background: none;\n    background-color: $swiftype-autocomplete-active-color;\n    border-bottom: 1px solid $border-color;\n    border-top: 0;\n    box-shadow: none;\n  }\n\n  .swiftype-widget .autocomplete li p {\n    font-size: 14px;\n    line-height: 20px;\n    margin: 0;\n    overflow: hidden;\n    padding: 0;\n  }\n\n  .swiftype-widget .autocomplete li p.title {\n    color: $swiftype-link-color;\n    font-weight: bold;\n  }\n\n  .swiftype-widget .autocomplete li p.title em {\n    color: $body-font-color;\n    font-style: normal;\n    font-weight: bold;\n  }\n\n  .swiftype-widget .autocomplete li.active p.title {\n    color: $swiftype-background-color;\n    text-shadow: none;\n  }\n\n  .swiftype-widget .autocomplete li.active p.title em {\n    color: $swiftype-background-color;\n    font-style: normal;\n  }\n\n  .swiftype-widget .autocomplete li .sections {\n    color: $body-font-color;\n    font-size: 11px;\n  }\n\n  .swiftype-widget .autocomplete li .sections em {\n    color: $body-font-color;\n    font-style: normal;\n  }\n\n  .swiftype-widget .autocomplete li .sections .section {\n    display: inline;\n  }\n\n  .swiftype-widget .autocomplete li.active .sections {\n    color: $swiftype-font-active-color;\n    text-shadow: none;\n  }\n\n  .swiftype-widget .autocomplete li.active .sections em {\n    color: $swiftype-font-active-color;\n    font-style: normal;\n  }\n}\n"
  },
  {
    "path": "docs/manual/source/stylesheets/variables/_colors.css.scss",
    "content": "// Named colors;\n$white: #ffffff;\n$black: #000000;\n\n// General\n\n$body-font-color: #373535;\n$link-color: #1ba4d8;\n$search-box-wrapper-border-color: #e5e5e5;\n$search-box-border-color: #cccccc;\n$border-color: #e3e3e3;\n$header-icon-color: #373535;\n$off-canvas-background-color: #ffffff;\n$main-background-color: #ffffff;\n\n// Header\n$header-border-bottom-color: #249DEC;\n$header-left-pill-background-color: #249DEC;\n$header-right-pill-background-color: #72D6FF;\n$header-left-pill-hover-backgounr-color: #0087DF;\n$header-right-pill-hover-backgounr-color: #57CBFA;\n$header-logo-wrapper-border-bottom-color: #BDC6D3;\n$header-nav-menu-option-border-bottom-color: #ccc;\n\n// Alerts\n$info-color: #0f99dd;\n$info-background-color: #c5eafc;\n\n$success-color: #2dd62d;\n$success-background-color: #d1f9d1;\n\n$warning-color: #f3d22d;\n$warning-background-color: #faf1c5;\n\n$danger-color: #bf3124;\n$danger-background-color: #ffe5e3;\n\n$note-color: #828385;\n$note-background-color: #e4e6eb;\n\n$todo-color: #c622c6;\n$todo-background-color: #fbc2fb;\n\n// Nav\n$nav-header-link-color: #373535;\n$nav-page-icon-color: #828385;\n\n// Main navigation menu\n$nav-main-border-color: #e5e5e5;\n$nav-main-mobile-active-item-border-color: #e5e5e5;\n$nav-main-collapsible-anchor-color: #333333;\n$nav-main-active-menu-background-color: #f0f0f0;\n$nav-main-active-menu-bottom-border-color: #E5E5E5;\n$nav-main-active-menu-link-color: #5f5f5f;\n$nav-main-level-2-link-color: #3482ab;\n$nav-main-level-2-active-link-color: #5f5f5f;\n$nav-main-level-2-active-link-left-border-color: #da1111;\n\n// Tabs\n$tab-background-color: #e9e9e9;\n$tab-link-color: #3482ab;\n$tab-hover-background-color: #7f93a8;\n$tab-active-border-color: #da1111;\n$tab-active-background-color: #f7f7f7;\n\n$tab-active-link-color: #5f5f5f;\n$tab-hover-link-color: #ffffff;\n\n// Buttons\n$button-download-font-color: #ffffff;\n$button-download-background-color: #0f99dd;\n\n$button-primary-background-color: #0f99dd;\n$button-primary-hover-background-color: #373535;\n$button-primary-font-color: #ffffff;\n$button-primary-hover-font-color: #ffffff;\n// Code Blocks\n$code-block-background-color: #091F38;\n$code-block-border-color: #e5e5e5;\n$code-line-numbers-background-color: #E1E7ED;\n$code-line-numbers-border-color: #2dd62d;\n$code-line-numbers-font-color: #373535;\n$code-inline-background-color: #edf1f6;\n$code-inline-border-color: $border-color;\n$code-inline-color: #222;\n\n// Table of Contents\n$table-of-contents-link-color: #27ABEB;\n$table-of-contents-font-color: #828385;\n$table-of-contents-background-color: #f0f0f0;\n$table-of-contents-edit-page-link-color: #AAAAAA;\n\n// Footer\n$footer-seperator-color: #eeeeee;\n$footer-background-color: #ffffff;\n$footer-link-color: #888888;\n$footer-heading-4-color: #262626;\n$footer-bottom-background-color: #262626;\n$footer-bottom-span-color: #ffffff;\n\n// Subscribe Form\n$form-wrapper-color: #091F38;\n$form-wrapper-border-color: #ccc;\n$form-wrapper-heading-4-color: #E5E5E5;\n$subscribe-form-pill-button-background-color: #2499E6;\n$disabled-pill-button-background-color: #ccc;\n$invalid-input-border-color: #ffd654;\n$valid-input-border-color: #EBF1F9;\n$errored-input-border-color: #a94442;\n$subscribe-form-placeholder-color: #BCBCBC;\n\n// Copyright\n$copyright-font-color: #828385;\n$copyright-link-color: #828385;\n\n// Page Title\n$page-title-font-color: #000000;\n\n// Headings\n$heading-1-font-color: #000000;\n$heading-2-font-color: #000000;\n$heading-3-font-color: #373535;\n$heading-4-font-color: #373535;\n$heading-5-font-color: #373535;\n$heading-6-font-color: #373535;\n\n$content-paragraph-font-color: #666666;\n$content-link-color:#2499E6;\n\n// Tags\n$blockquote-background-color: #f0f0f0;\n\n// Swiftype\n$swiftype-link-color: #3482ab;\n$swiftype-background-color: #fcfcfc;\n$swiftype-input-background-color: #ffffff;\n$swiftype-font-active-color: #ffffff;\n$swiftype-autocomplete-active-color: #7f93a8;\n\n$mobile-page-header-label-color: #BDC6D3;\n\n// Edit Page\n$edit-page-background-color: #0f99dd;\n$edit-page-font-color: #ffffff;\n\n\n$carousel-active-background-color: #0f99dd;\n$carousel-hover-background-color: #4f626c;\n\n$carousel-link-color: #373535;\n$carousel-hover-link-color: #ffffff;\n$carousel-active-link-color: #ffffff;\n\n$carousel-control-background-color: #0f99dd;\n$carousel-control-font-color: #ffffff;\n$carousel-control-hover-background-color: #373535;\n\n$tryit-i-color: #3482ab;\n\n$global-background-color: #ffffff;\n\n$breadcrumbs-link-color: #27ABEC;\n$breadcrumbs-text-color: #6F7072;\n\n$logo-dark-color: #000000;\n"
  },
  {
    "path": "docs/manual/source/stylesheets/variables/_fonts.css.scss",
    "content": "$standard-font-family: \"pt-sans\", 'Helvetica Neue', Helvetica, Arial, sans-serif;\n$body-font-family: \"pt-sans\", 'Helvetica Neue', Helvetica, Arial, sans-serif;\n$code-font-family: Menlo, Monaco, Consolas, monospace;\n"
  },
  {
    "path": "docs/manual/source/stylesheets/variables/_sizes.css.scss",
    "content": "$search-bar-row-height: 56px;\n$header-height: 83px;\n$mobile-header-height: 56px;\n$search-box-height: 36px;\n$search-bar-row-vertical-padding: 10px;"
  },
  {
    "path": "docs/manual/source/support/index.html.md.erb",
    "content": "---\ntitle: Getting Help\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Community Support\n\nApache PredictionIO has a welcoming and active community. We are\nhere to support you and make sure that you can use Apache PredictionIO\nsuccessfully.\n\nIf you are a user, please subscribe to our user mailing list.\n\n1.  Click on one of these:\n    * [Subscribe](mailto:user-subscribe@predictionio.apache.org)\n    * [Unsubscribe](mailto:user-unsubscribe@predictionio.apache.org)\n2.  Simply send an empty e-mail after your e-mail client opens. Make sure you\n    send from an e-mail address that you want to receive posts.\n3.  In a moment, you should receive a confirmation e-mail from the Apache\n    mailing list server. Simply click reply to that e-mail, and click send\n    without changing anything.\n4.  In a moment, you should receive another e-mail that confirms your\n    subscription. You can now start posting by simply e-mailing\n    user@predictionio.apache.org.\n\nIf you are a contributor, please subscribe to our development mailing list.\n\n* [Subscribe](mailto:dev-subscribe@predictionio.apache.org)\n* [Unsubscribe](mailto:dev-unsubscribe@predictionio.apache.org)\n\n## Enterprise Support\n\nFor enterprise users who require professional support, you may contact <a\nhref=\"mailto:&#x63;&#x6F;&#x6E;&#x74;&#x61;&#x63;&#x74;&#x40;&#x61;&#x63;&#x74;&#x69;&#x6F;&#x6E;&#x6D;&#x6C;&#x2E;&#x63;&#x6F;&#x6D;\">&#x63;&#x6F;&#x6E;&#x74;&#x61;&#x63;&#x74;&#x40;&#x61;&#x63;&#x74;&#x69;&#x6F;&#x6E;&#x6D;&#x6C;&#x2E;&#x63;&#x6F;&#x6D;</a>.\n"
  },
  {
    "path": "docs/manual/source/system/anotherdatastore.html.md",
    "content": "---\ntitle: Using Another Data Store\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nPredictionIO has a thin storage layer to abstract meta data, event data, and\nmodel data access. The layer defines a set of standard interfaces to support\nmultiple data store backends. PredictionIO users can configure the backend of\nchoice through configuration files or environmental variables. Engine developers\nneed not worry about the actual underlying storage architecture. Advanced\ndevelopers can implement their own backend driver as an external library.\n\n\n## Concepts\n\nIn this section, we will visit some storage layer concepts that are common to\nusers, engine developers, and advanced developers:\n\n- **Repository** is the highest level of data access abstraction and is where all\nengines and PredictionIO itself access data with.\n\n- **Source** is the actual data store backend that provide data access. A source is an\nimplementation of the set of data access interfaces defined by *repositories*.\n\nEach of them will be explained in detail below:\n\n### Repositories\n\n*Repository* is the highest level of data access abstraction and is where all\nengines and PredictionIO itself access data with.\n\nThe storage layer currently defines three mandatory data repositories: *meta\ndata*, *event data*, and *model data*. Each repository has its own set of data\naccess interfaces.\n\n- **Meta data** is used by PredictionIO to store engine training and evaluation\ninformation. Commands like `pio build`, `pio train`, `pio deploy`, and `pio\neval` all access meta data.\n\n- **Event data** is used by the Event Server to collect events, and by engines to\nsource data.\n\n- **Model data** is used by PredictionIO for automatic persistence of trained\nmodels.\n\nThe following configuration variables are used for configure these repositories:\n\n  - *Meta data* is configured by the `PIO_STORAGE_REPOSITORIES_METADATA_XXX` variables.\n  - *Event data* is configured by the `PIO_STORAGE_REPOSITORIES_EVENTDATA_XXX` variables.\n  - *Model data* is configured by the `PIO_STORAGE_REPOSITORIES_MODELDATA_XXX` variables.\n\nConfiguration variables will be explained in more details in later sections below (see Data Store Configuration).\n\nFor example, you may see the following configuration variables defined in `conf/pio-env.sh`\n\n```shell\nPIO_STORAGE_REPOSITORIES_METADATA_NAME=predictionio_metadata\nPIO_STORAGE_REPOSITORIES_METADATA_SOURCE=ELASTICSEARCH\n\nPIO_STORAGE_REPOSITORIES_EVENTDATA_NAME=predictionio_eventdata\nPIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=HBASE\n\nPIO_STORAGE_REPOSITORIES_MODELDATA_NAME=pio_\nPIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=LOCALFS\n```\n\nThe configuration variable with the *NAME* suffix controls the namespace used by\nthe *source*.\n\nThe configuration variable with the *SOURCE* suffix points to the actual\n**source** that will back this repository. *Source* will be explained below.\n\n\n### Sources\n\n*Sources* are actual data store backends that provide data access. A source is an\nimplementation of the set of data access interfaces defined by *repositories*.\n\nPredictionIO comes with the following sources:\n\n- **JDBC** (tested on MySQL and PostgreSQL):\n  * Type name is **jdbc**.\n  * Can be used for *Meta Data*, *Event Data* and *Model Data* repositories\n\n- **Elasticsearch**:\n  * Type name is **elasticsearch**\n  * Can be used for *Meta Data* repository\n\n- **Apache HBase**:\n  * Type name is **hbase**\n  * Can be used for *Event Data* repository\n\n- **Local file system**:\n  * Type name is **localfs**\n  * Can be used for *Model Data* repository\n\n- **HDFS**:\n  * Type name is **hdfs**.\n  * Can be used for *Model Data* repository\n\n- **S3**:\n  * Type name is **s3**.\n  * Can be used for *Model Data* repository\n\nEach repository can be configured to use different sources as shown above.\n\nEach source has its own set of configuration parameters. Configuration variables will be explained in more details in later sections below (see Data Store Configuration).\n\nThe following is an example source configuration with name \"PGSQL\" with type `jdbc`:\n\n```shell\nPIO_STORAGE_SOURCES_PGSQL_TYPE=jdbc\nPIO_STORAGE_SOURCES_PGSQL_URL=jdbc:postgresql:predictionio\nPIO_STORAGE_SOURCES_PGSQL_USERNAME=pio\nPIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio\n```\n\nThe following is an example of using this source \"PGSQL\" for the *meta data* repository:\n\n```shell\nPIO_STORAGE_REPOSITORIES_METADATA_NAME=predictionio_metadata\nPIO_STORAGE_REPOSITORIES_METADATA_SOURCE=PGSQL\n```\n\n## Data Store Configuration\n\nData store configuration is done by settings environmental variables. If you set\nthem inside `conf/pio-env.sh`, they will be automatically available whenever you\nperform a `pio` command, e.g. `pio train`.\n\nNotice that all variables are prefixed by `PIO_STORAGE_`.\n\n### Repositories Configuration\n\nVariable Format: `PIO_STORAGE_REPOSITORIES_<REPO>_<KEY>`\n\nConfiguration variables of repositories are prefixed by\n`PIO_STORAGE_REPOSITORIES_`, followed by the repository name (e.g. `METADATA`),\nand then either `NAME` or `SOURCE`.\n\nConsider the following example:\n\n```shell\nPIO_STORAGE_REPOSITORIES_METADATA_NAME=predictionio_metadata\nPIO_STORAGE_REPOSITORIES_METADATA_SOURCE=PGSQL\n```\n\nThe above configures PredictionIO to look for a source configured with the name\n`PGSQL`, and use `predictionio_metadata` as the namespace within such source. There is no\nrestriction on namespace usage by the source, so behavior may vary. As an\nexample, the official JDBC source uses the namespace as database table prefix.\n\n\n### Sources Configuration\n\nVariable Format: `PIO_STORAGE_SOURCES_<NAME>_<KEY>`\n\nConfiguration variables of sources are prefixed by\n`PIO_STORAGE_SOURCES_`, followed by the source name of choice (e.g. `PGSQL`,\n`MYSQL`, `HBASE`, etc), and a configuration `KEY`.\n\nINFO: The `TYPE` configuration key is mandatory. It is used by PredictionIO to\ndetermine the actual driver type to load.\n\nDepending on what the source `TYPE` is, different configuration keys are\nrequired.\n\n\n#### JDBC Configuration\n\nVariable Format: `PIO_STORAGE_SOURCES_[NAME]_TYPE=jdbc`\n\nSupported Repositories: **meta**, **event**, **model**\n\nTested on: MySQL 5.1+, PostgreSQL 9.1+\n\nWhen `TYPE` is set to `jdbc`, the following configuration keys are supported.\n\n-   URL (mandatory)\n\n    The value must be a valid JDBC URL that points to a database, e.g.\n    `PIO_STORAGE_SOURCES_PGSQL_URL=jdbc:postgresql:predictionio`\n\n-   USERNAME (mandatory)\n\n    The value must be a valid, non-empty username for the JDBC connection, e.g.\n    `PIO_STORAGE_SOURCES_PGSQL_USERNAME=pio_user`\n\n-   PASSWORD (mandatory)\n\n    The value must be a valid, non-empty password for the JDBC connection, e.g.\n    `PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio_user_password`\n\n-   PARTITIONS (optional, default to 4)\n\n    This value is used by Apache Spark to determine the number of partitions to\n    use when it reads from the JDBC connection, e.g.\n    `PIO_STORAGE_SOURCES_PGSQL_PARTITIONS=4`\n\n-   CONNECTIONS (optional, default to 8)\n\n    This value is used by scalikejdbc library to determine the max size of connection pool, e.g.\n    `PIO_STORAGE_SOURCES_PGSQL_CONNECTIONS=8`\n\n-   INDEX (optional since v0.9.6, default to disabled)\n\n    This value is used by creating indexes on entityId and entityType columns to\n    improve performance when findByEntity function is called. Note that these columns\n    of entityId and entityType will be created as varchar(255), e.g.\n    `PIO_STORAGE_SOURCES_PGSQL_INDEX=enabled`\n\n\n#### Apache HBase Configuration\n\nVariable Format: `PIO_STORAGE_SOURCES_[NAME]_TYPE=hbase`\n\nSupported Repositories: **event**\n\nTested on: Apache HBase 0.98.5+, 1.0.0+\n\nWhen `TYPE` is set to `hbase`, no other configuration keys are required. Other\nclient side HBase configuration must be done through `hbase-site.xml` pointed\nby the `HBASE_CONF_DIR` configuration variable.\n\n\n#### Elasticsearch Configuration\n\nVariable Format: `PIO_STORAGE_SOURCES_[NAME]_TYPE=elasticsearch`\n\nSupported Repositories: **meta**\n\nWhen `TYPE` is set to `elasticsearch`, the following configuration keys are\nsupported.\n\n-   HOSTS (mandatory)\n\n    Comma-separated list of hostnames, e.g.\n    `PIO_STORAGE_SOURCES_ES_HOSTS=es1,es2,es3`\n\n-   PORTS (mandatory)\n\n    Comma-separated list of ports that corresponds to `HOSTS`, e.g.\n    `PIO_STORAGE_SOURCES_ES_PORTS=9200,9200,9222`\n\n-   CLUSTERNAME (optional, default to `elasticsearch`)\n\n    Elasticsearch cluster name, e.g.\n    `PIO_STORAGE_SOURCES_ES_CLUSTERNAME=myescluster`\n\nINFO: Other advanced Elasticsearch parameters can be set by pointing\n`ES_CONF_DIR` configuration variable to the location of `elasticsearch.yml`.\n\n\n#### Local File System Configuration\n\nVariable Format: `PIO_STORAGE_SOURCES_[NAME]_TYPE=localfs`\n\nSupported Repositories: **model**\n\nWhen `TYPE` is set to `localfs`, the following configuration keys are\nsupported.\n\n-   PATH (mandatory)\n\n    File system path at where models are stored, e.g.\n    `PIO_STORAGE_SOURCES_FS_PATH=/mymodels`\n\n\n#### HDFS Configuration\n\nVariable Format: `PIO_STORAGE_SOURCES_[NAME]_TYPE=hdfs`\n\nSupported Repositories: **model**\n\nWhen `TYPE` is set to `hdfs`, the following configuration keys are\nsupported.\n\n-   PATH (mandatory)\n\n    HDFS path at where models are stored, e.g.\n    `PIO_STORAGE_SOURCES_HDFS_PATH=/mymodels`\n\n\n#### S3 Configuration\n\nVariable Format: `PIO_STORAGE_SOURCES_[NAME]_TYPE=s3`\n\nSupported Repositories: **model**\n\nTo provide authentication information, you can set the `AWS_ACCESS_KEY_ID`\nand `AWS_SECRET_ACCESS_KEY` environment variables or use one of the other\nmethods in the [AWS Setup Docs](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html#config-settings-and-precedence)\n\nWhen `TYPE` is set to `s3`, the following configuration keys are\nsupported.\n\n-   REGION (mandatory)\n\n    AWS Region to use, e.g.\n    `PIO_STORAGE_SOURCES_S3_REGION=us-east-1`\n\n-   BUCKET_NAME (mandatory)\n\n    S3 Bucket where models are stored, e.g.\n    `PIO_STORAGE_SOURCES_S3_BUCKET_NAME=pio_bucket`\n\n-   BASE_PATH (optional)\n\n    S3 base path where models are stored, e.g.\n    `PIO_STORAGE_SOURCES_S3_BASE_PATH=pio_model`\n\n-   DISABLE_CHUNKED_ENCODING (optional)\n\n    Disable the use of Chunked Encoding when transferring files to/from S3, e.g.\n    `PIO_STORAGE_SOURCES_S3_DISABLE_CHUNKED_ENCODING=true`\n\n-   ENDPOINT (optional)\n\n    S3 Endpoint to use, e.g.\n    `PIO_STORAGE_SOURCES_S3_ENDPOINT=http://localstack:4572`\n\n\n## Adding Support of Other Backends\n\nIt is quite straightforward to implement support of other backends. A good\nstarting point is to reference the JDBC implementation inside the\n[org.apache.predictionio.data.storage.jdbc\npackage](https://github.com/apache/predictionio/tree/develop/data/src/main/scala/org/apache/predictionio/data/storage/jdbc).\n\nContributions of different backends implementation is highly encouraged. To\nstart contributing, please refer to [this guide](/community/contribute-code/).\n\n\n### Deploying Your Custom Backend Support as a Plugin\n\nIt is possible to deploy your custom backend implementation as a standalone JAR\napart from the main PredictionIO binary distribution. The following is an\noutline of how this can be achieved.\n\n1.  Create an SBT project with a library dependency on PredictionIO's data\n    access base traits (inside the `data` artifact).\n\n2.  Implement traits that you intend to support, and package everything into a\n    big fat JAR (e.g. sbt-assembly).\n\n3.  Create a directory named `plugins` inside PredictionIO binary installation.\n\n4.  Copy the JAR from step 2 to `plugins`.\n\n5.  In storage configuration, specify `TYPE` as your complete package name. As\n    an example, if you have implemented all your traits under the package name\n    `org.mystorage.jdbc`, use something like\n\n    ```shell\n    PIO_STORAGE_SOURCES_MYJDBC_TYPE=org.mystorage.jdbc\n    ...\n    PIO_STORAGE_REPOSITORIES_METADATA_SOURCE=MYJDBC\n    ```\n\n    to instruct PredictionIO to pick up `StorageClient` from the appropriate\n    package.\n\n6.  Now you should be able to use your custom source and assign it to different\n    repositories as you wish.\n"
  },
  {
    "path": "docs/manual/source/system/deploy-cloudformation.html.md.erb",
    "content": "---\ntitle: Deploying with AWS CloudFormation\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n##Overview\n\nYou can scale PredictionIO on AWS with\n[CloudFormation](http://aws.amazon.com/cloudformation/). Here we have defined a\nPredictionIO CloudFormation stack that you can deploy a functional, fully\ndistributed PredictionIO cluster in minutes.\n\n###  Instances\n\nThe PredictionIO CloudFormation stack creates two types of instance: **compute\nand storage**. By default, the stack will launch **1 compute Instance and 3\nStorage instances**.\n\nThe compute instance *(ComputeInstance)* acts as Spark master. You can launch\nextra compute instances *(ComputeInstanceExtra)* by updating the stack. The\nstorage instances *(StorageInstance)* form the core of the HDFS, ZooKeeper\nquorum, and HBase storage. Extra storage instances *(StorageInstanceExtra)* can\nbe added to the cluster by updating the stack. They cannot be removed once they\nare spinned up.\n\nPredictionIO Event Server will be launched on all storage instances.\n\n### Networking\n\nThe stack will automatically create a VPC and a subnet with an Internet gateway.\nAll cluster instances will be launched inside this subnet using a single\nsecurity group that enables all TCP communications among all instances within\nthe same group. All compute instances (including those that are launched after\nstack creation) will receive public IPs. All core storage instances will receive\npublic Elastic IPs.\n\n## Step-by-Step\n\nFirst, you need to have an active Amazon Web Services account with permissions\nto use the following services:\n\n* Auto Scaling\n* CloudFormation\n* EC2\n* VPC\n\n### Subscribe to PredictionIO Cluster\n\nBefore you can start using PredictionIO CloudFormation template, you will need\nto subscribe to PredictionIO Cluster AMI through AWS Marketplace. To subscribe,\ngo to this [link](https://aws.amazon.com/marketplace/pp/B00S74CY0A). Click the\nyellow \"Continue\" button on the next screen.\n\n![AWS Marketplace Subscription Page](/images/cloudformation/awsmp-1.png)\n\nIn the next screen, click the \"Manual Launch\" tab, then select your preferred\npricing model. Pick your desired version, then click \"Accept Terms\".\n\n![AWS Marketplace Term Page](/images/cloudformation/awsmp-2.png)\n\nOnce your subscription is in place, you may proceed to the next section.\n\n### Start Using CloudFormation\n\nFrom your main AWS console, locate CloudFormation and click on it.\n\n![CloudFormation on AWS Console](/images/cloudformation/cf-01.png)\n\nThis will bring you to the CloudFormation console below.\n\n![CloudFormation Console](/images/cloudformation/cf-02.png)\n\n### Select the PredictionIO CloudFormation Stack Template\n\nFrom the CloudFormation console, click on the **Create New Stack** blue button\nas shown above. This will bring up the **Select Template** screen. Name your\nstack as you like. Within the *Template* section, choose **Specify an Amazon S3\ntemplate URL**, and put\nhttps://s3.amazonaws.com/cloudformation.prediction.io/<%= data.versions.pio %>/pio.json as the\nvalue.\n\n![CloudFormation Stack Template Selection](/images/cloudformation/cf-03.png)\n\nClick **Next** when you are done.\n\n### Specify Stack Parameters\n\nThe next screen shows the stack parameters. You must enter your AWS SSH key\npair. For the other parameters, you can change them to meet your needs or simply\nuse the default values.\n\n![Stack Parameters](/images/cloudformation/cf-04.png)\n\n| Parameter | Description |\n|-----------|-------------|\n| AWS-KeyPair | The AWS SSH key pair name that can be used to access all instances in the cluster. |\n| AvailabilityZone | Specify the availability zone that the PredictionIO cluster will be launched in. All instances of the cluster will be launched into the same zone for optimal network performance between one another.\n| ComputeInstanceType | The EC2 instance type of all compute instances. Memory-optimized EC2 instances are recommended. |\n| ComputeInstanceExtra | Number of extra compute instances besides the core compute instance. This can be increased and decreased. |\n| StorageInstanceExtra | Number of extra storage instances besides core storage instances. **Never decrease this value or you will risk data corruption.** |\n| StorageInstanceExtraSize | Size in GB of each extra storage instance. This can be changed when you add an extra storage instance. |\n| StorageInstanceExtraVolumeType | The EBS volume type of each extra storage instance. Valid values are *standard* and *gp2*. This can be changed when you add an extra storage instance. |\n StorageInstanceType | The EC2 instance type of all storage instances. General purpose EC2 instances are recommended. |\n| StorageInstanceSize | Size in GB of each core storage instance. This cannot be changed once the cluster is launched. |\n| StorageInstanceVolumeType | The EBS volume type of each core storage instance. Valid values are *standard* and *gp2*. This cannot be changed once the cluster is launched. |\n\nClick **Next** when you are done. You will arrive at the **Options** screen. You\ncan skip this step if you do not have other options to specify.\n\nAt the **Review** screen, click **Create** to finish.\n\n## Using the Cluster\n\nYou should see the following when the cluster is being created after the\nprevious step.\n\n![Stack Creation](/images/cloudformation/cf-05.png)\n\nOnce the stack creation has finished, you can click on **Events** and select\n**Outputs** to arrive to the following screen.\n\n![Completed Stack](/images/cloudformation/cf-06.png)\n\nTake note of **PIOComputeMasterPublicIp** and **PIOStorageMasterPublicIp**. We\nwill now access the cluster and make sure everything is in place.\n\nWARNING: Sometimes the stack is created successfully but not all\ncluster services would launch due to potential network\nglitches or system issues within a cluster instance. In this case, simply\ndelete and create the stack again.\n\n### Verify Compute Instances\n\nSSH to the master compute instance using the **PIOComputeMasterPublicIp**. In this\nexample, let us assume the IP address be 54.175.145.84, and your private key\nfile be **yourkey.pem**.\n\n```bash\n$ ssh -i yourkey.pem -A -L 8080:localhost:8080 ubuntu@54.175.145.84\n```\n\nOnce you are in, point your web browser to http://localhost:8080. You should see\nsomething similar to the following.\n\n![Example Spark UI](/images/cloudformation/spark.png)\n\nNOTE: In the example above **NumberOfComputeWorkers** is **2**. This is because\nthe example has 1 compute instance and 1 extra compute instance. If you do not\nhave any extra compute instances, you will see only 1 worker on the above page.\n\n### Verify Storage Instances\n\nSSH to the storage instance using the **PIOStorageMasterPublicIp**. In\nthis example, let us assume the IP address be 54.175.1.36, and your private key\nfile be **yourkey.pem**.\n\n```bash\n$ ssh -i yourkey.pem -A -L 50070:localhost:50070 -L 16010:localhost:16010 -L 16030:localhost:16030 ubuntu@54.175.1.36\n```\n\nOnce you are in, point your web browser to http://localhost:50070 and click on\n**Datainstances** at the top menu. You should see the following page.\n\n![Example HDFS UI](/images/cloudformation/hdfs.png)\n\nWARNING: All **3 storage instances** must be up for proper operation.\n\nIf all **3 storage instances** are working properly, you can then verify HBase\nby pointing your web browser to http://localhost:16010. You should see something\nsimilar to the following.\n\n![Example HBase UI](/images/cloudformation/hbase.png)\n\nIf you do not specify any extra storage instances, you should see 2 region\nservers. There should also be 1 backup master.\n\n### Running Quick Start\n\nYou can now start with the fully-distributed PredictionIO\ncluster. Let's start with the [recommendation quick\nstart](/templates/recommendation/quickstart/) with a few twists.\n\n1. Skip the installation steps and run `pio status`. You should see\n   everything functional.\n\n2. Run through the section **Create a Sample App** as described. The\n   installation directory of PredictionIO is `/opt/PredictionIO`.\n\n3. Run through the section **Collecting Data** as described, except that you\n   will be connecting to the Event Server at the master core storage instance.\n   Assuming the private IP of the master core storage instance is `10.0.0.123`,\n   add `--url http://10.0.0.123:7070` to the `import_eventserver.py` command.\n\n4. Copy HBase configuration to the engine template directory. The full path of\n   the configuration file is `/opt/hbase-0.98.9-hadoop2/conf/hbase-site.xml`.\n   (This step will not be required in future releases.)\n\n5. Run through the section **Deploy the Engine as a Service** up to the\n   subsection **Training**. Assuming the private DNS name of the master compute\n   instance is `ip-10-0-0-234.ec2.internal`, add\n   `-- --master spark://ip-10-0-0-234.ec2.internal:7077` after the `pio train`\n   command. This will send the training to the compute cluster instead of the\n   local machine. The Spark master URL must match exactly the one shown on its\n   web UI. Repeat the same steps for subsection **Deploying**, which will\n   create an Engine Server backed by the compute cluster.\n\n## Scaling the Cluster\n\nAs your data size and/or audience grow, you can scale your cluster\nto handle more workload or decrease turnaround time. In this section, we will\nprovide some general guidelines about when and how to scale your cluster with\nCloudFormation.\n\n### Scaling Compute Instances\n\nYou can increase compute instances to reduce training time *($pio train)* and\nthe time to query an engine server. You can also check the [Spark Master Web UI]\nto see if you need additional compute power.\n\nNotice that for compute instances, you can increase or decrease the number of\nextra compute instances *(ComputeInstanceExtra)* as much as you like. The extra\ncompute instances will join the master and become slave compute instances as\nSpark workers.\n\nLet us begin by adding 2 extra compute instances. At the CloudFormation console,\nright click on the cluster stack and click on **Update Stack**.\n\n![Updating Stack to Add Extra Compute\nInstances](/images/cloudformation/compute-1.png)\n\nAt the **Select Template** screen, make sure **Use existing template** is\npicked, then click **Next**.\n\nAt the **Specify Parameters** screen, increase the value of\n**ComputeInstanceExtra** to **2**, then click **Next**.\n\n![Adding 2 Extra Compute Instances](/images/cloudformation/compute-2.png)\n\nAt the **Options** screen, leave everything unchanged, and click **Next**.\n\nAt the **Review** screen, make sure **ComputeInstanceExtra** is now updated to\n**2**. Finish by clicking **Update**.\n\n![Review Changes of Adding Extra Compute\nInstances](/images/cloudformation/compute-3.png)\n\nYou will be brought back to the CloudFormation console. You should see the\nstack status changed to **UPDATE_IN_PROGRESS**.\n\n![Adding Extra Compute Instances In\nProgress](/images/cloudformation/compute-4.png)\n\nOnce the status become **UPDATE_COMPLETED**, you will have 2 extra compute\ninstances. Notice that during the update, your cluster is still functional and\nany existing work will not be affected. If you are downscaling, existing work\nmight be affected during the update process.\n\n### Scaling Storage Instances\n\nYou can scale your storage instances when you are about to run out of space. You\ncan check your storage usage at the [Hadoop NameNode web UI].\n\nWARNING: For storage instances, you can only increase the number of extra\nstorage instances *(StorageInstanceExtra)* within the bounds of AWS EC2 limits.\nDecreasing the instances will risk data corruption.\n\nLet us begin by adding 2 extra storage instances. At the CloudFormation console,\nright click on the cluster stack and click on **Update Stack**.\n\n![Updating Stack to Add Extra Storage\nInstances](/images/cloudformation/compute-1.png)\n\nAt the **Select Template** screen, make sure **Use existing template** is\npicked, then click **Next**.\n\nAt the **Specify Parameters** screen, increase the value of\n**StorageInstanceExtra** to **2**, and set the value of\n**StorageInstanceExtraSize** to **100**, then click **Next**. Notice that\nwhenever you add an extra storage instance, you can change its size to a new\nvalue. The new size will not affect existing storage instances and your data\nwill be safe.\n\n![Adding 2 Extra Storage Instances](/images/cloudformation/storage-1.png)\n\nAt the **Options** screen, leave everything unchanged, and click **Next**.\n\nAt the **Review** screen, make sure **StorageInstanceExtra** is now updated to\n**2**, and **StorageInstanceExtraSize** is updated to **100**. Finish by\nclicking **Update**.\n\n![Review Changes of Adding Extra Storage\nInstances](/images/cloudformation/storage-2.png)\n\nYou will be brought back to the CloudFormation console. You should see the\nstack status changed to **UPDATE_IN_PROGRESS**.\n\n![Adding Extra Compute Instances In\nProgress](/images/cloudformation/compute-4.png)\n\nOnce the status become **UPDATE_COMPLETED**, you will have 2 extra storage\ninstances. Notice that during the up-scaling update, your cluster is still\nfunctional and existing work will not be affected. They may be affected during\ndownscale.\n\n## Support and Pricing\n\nPredictionIO Cluster comes with Enterprise Support. For pricing and support\ndetails, please contact support@prediction.io.\n"
  },
  {
    "path": "docs/manual/source/system/index.html.md",
    "content": "---\ntitle: System Architecture and Dependencies\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis section explains general rules-of-thumb for how they are used in\nPredictionIO. The actual implementation of the Template will define how much of\nthis applies. PredictionIO is flexible about much of this configuration but its\nTemplates generally fit the Lambda model for integrating real-time serving with\nbackground periodic model updates.\n\n![PredictionIO Systems](/images/pio-architecture.svg)\n\n**HBase**: Event Server uses Apache HBase (or JDBC DB for small data) as the\ndata store. It stores imported events. If you are not using the PredictionIO\nEvent Server, you do not need to install HBase.\n\n**Apache Spark**: Spark is a large-scale data processing engine that powers the\ndata preparation and input to the algorithm, training, and sometimes the\nserving processing. PredictionIO allows for different engines to be used in\ntraining but many algorithms come from Spark's MLlib.\n\n**HDFS**: is a distributed filesystem from Hadoop. It allows storage to be\nshared among clustered machines. It is used to stage data for batch import into\nPredictionIO, for export of Event Server datasets, and for storage of some\nmodels (see your template for details).\n\n\nThe output of training has two parts: a model and its meta-data. The model is\nthen stored in HDFS, a local file system, or Elasticsearch. See the details of\nyour algorithm.\n\n**Elasticsearch**: stores metadata such as model versions, engine versions,\naccess key and app ID mappings, evaluation results, etc. For some templates it\nmay store the model.\n"
  },
  {
    "path": "docs/manual/source/templates/classification/add-algorithm.html.md",
    "content": "---\ntitle: Using Alternative Algorithm\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThe classification template uses the Naive Bayes algorithm by default. You can easily add and use other MLlib classification algorithms. The following will demonstrate how to add the [MLlib Random Forests algorithm](https://spark.apache.org/docs/latest/mllib-ensembles.html) into the engine.\n\nYou can find the complete modified source code [here](https://github.com/apache/predictionio/tree/develop/examples/scala-parallel-classification/add-algorithm).\n\n## Create a new file RandomForestAlgorithm.scala\n\nLocate `src/main/scala/NaiveBayesAlgorithm.scala` under your engine directory, which should be /MyClassification if you are following the [Classification QuickStart](/templates/classification/quickstart/).  Copy `NaiveBayesAlgorithm.scala` and create a new file `RandomForestAlgorithm.scala`. You will modify this file and follow the instructions below to define a new RandomForestAlgorithm class.\n\n##  Define the algorithm class and parameters\n\nIn 'RandomForestAlgorithm.scala', import the MLlib Random Forests algorithm by changing the following lines:\n\nOriginal\n\n```scala\nimport org.apache.spark.mllib.classification.NaiveBayes\nimport org.apache.spark.mllib.classification.NaiveBayesModel\n```\n\nChange to:\n\n```scala\nimport org.apache.spark.mllib.tree.RandomForest // CHANGED\nimport org.apache.spark.mllib.tree.model.RandomForestModel // CHANGED\n```\n\nThese are the necessary classes in order to use the MLLib's Random Forest algorithm.\n\nModify the `AlgorithmParams` class for the Random Forest algorithm:\n\n```scala\n// CHANGED\ncase class RandomForestAlgorithmParams(\n  numClasses: Int,\n  numTrees: Int,\n  featureSubsetStrategy: String,\n  impurity: String,\n  maxDepth: Int,\n  maxBins: Int\n) extends Params\n```\n\nThis class defines the parameters of the Random Forest algorithm (which later you can specify the value in engine.json). Please refer to [MLlib  documentation](https://spark.apache.org/docs/latest/mllib-ensembles.html) for the description and usage of these parameters.\n\nModify the `NaiveBayesAlgorithm` class to `RandomForestAlgorithm`. The changes are:\n\n* The new `RandomForestAlgorithmParams` class is used as parameter.\n* `RandomForestModel` is used in type parameter. This is the model returned by the Random Forest algorithm.\n* the `train()` function is modified and it returns the `RandomForestModel` instead of `NaiveBayesModel`.\n* the `predict()` function takes the `RandomForestModel` as input.\n\n\n\n```scala\n// extends P2LAlgorithm because the MLlib's RandomForestModel doesn't\n// contain RDD.\nclass RandomForestAlgorithm(val ap: RandomForestAlgorithmParams) // CHANGED\n  extends P2LAlgorithm[PreparedData, RandomForestModel, // CHANGED\n  Query, PredictedResult] {\n\n  // CHANGED\n  def train(sc: SparkContext, data: PreparedData): RandomForestModel = {\n    // CHANGED\n    // Empty categoricalFeaturesInfo indicates all features are continuous.\n    val categoricalFeaturesInfo = Map[Int, Int]()\n    RandomForest.trainClassifier(\n      data.labeledPoints,\n      ap.numClasses,\n      categoricalFeaturesInfo,\n      ap.numTrees,\n      ap.featureSubsetStrategy,\n      ap.impurity,\n      ap.maxDepth,\n      ap.maxBins)\n  }\n\n  def predict(\n    model: RandomForestModel, // CHANGED\n    query: Query): PredictedResult = {\n\n    val label = model.predict(Vectors.dense(\n      Array(query.attr0, query.attr1, query.attr2)\n    ))\n    PredictedResult(label)\n  }\n\n}\n```\nNote that the MLlib Random Forest algorithm takes the same training data as the Naive Bayes algorithm (ie, RDD[LabeledPoint]) so you don't need to modify the `DataSource` and `PreparedData` classes. If the new algorithm to be added requires different types of training data, then you need to modify these classes accordingly to accommodate your new algorithm.\n##  Update Engine.scala\n\nModify the EngineFactory to add the new algorithm class `RandomForestAlgorithm` you just defined and give it a name `\"randomforest\"`. The name will be used in `engine.json` to specify which algorithm to use.\n\n```scala\nobject ClassificationEngine extends IEngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"naive\" -> classOf[NaiveBayesAlgorithm],\n        \"randomforest\" -> classOf[RandomForestAlgorithm]), // ADDED\n      classOf[Serving])\n  }\n}\n```\n\nThis engine factory now returns an engine with two algorithms and they are named as `\"naive\"` and `\"randomforest\"` respectively.\n\n##  Update engine.json\n\nIn order to use the new algorithm, you need to modify `engine.json` to specify the name of the algorithm and the parameters.\n\nUpdate the engine.json to use **randomforest**:\n\n```json\n...\n\"algorithms\": [\n  {\n    \"name\": \"randomforest\",\n    \"params\": {\n      \"numClasses\": 4,\n      \"numTrees\": 5,\n      \"featureSubsetStrategy\": \"auto\",\n      \"impurity\": \"gini\",\n      \"maxDepth\": 4,\n      \"maxBins\": 100\n    }\n  }\n]\n...\n```\n\nThe engine now uses **MLlib Random Forests algorithm** instead of the default Naive Bayes algorithm. You are ready to build, train and deploy the engine as described in [quickstart](/templates/classification/quickstart/).\n\n```\n$ pio build\n$ pio train\n$ pio deploy\n```\n\nINFO: To switch back using Naive Bayes algorithm, simply modify engine.json.\n"
  },
  {
    "path": "docs/manual/source/templates/classification/dase.html.md.erb",
    "content": "---\ntitle: DASE Components Explained (Classification)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n<%= partial 'shared/dase/dase',\nlocals: {\n  template_name: 'Classification Engine Template',\n  evaluation_link: '/evaluation/paramtuning/'\n} %>\n\n## The Engine Design\n\nAs you can see from the Quick Start, *MyClassification* takes a JSON prediction\nquery, e.g. `{ \"attr0\":4, \"attr1\":3, \"attr2\":8 }`, and return a JSON predicted result.\n\nWARNING: for version < v0.3.1, it is array of features values: `{ \"features\": [4, 3, 8] }`\n\nIn MyClassification/src/main/scala/***Engine.scala***, the `Query` case class\ndefines the format of **query**, such as `{ \"attr0\":4, \"attr1\":3, \"attr2\":8 }`:\n\n```scala\ncase class Query(\n  attr0 : Double,\n  attr1 : Double,\n  attr2 : Double\n)\n\n```\n\nThe `PredictedResult` case class defines the format of **predicted result**,\nsuch as `{\"label\":2.0}`:\n\n```scala\ncase class PredictedResult(\n  val label: Double\n)\n```\n\nFinally, `ClassificationEngine` is the Engine Factory that defines the\ncomponents this engine will use: Data Source, Data Preparator, Algorithm(s) and\nServing components.\n\n```scala\nobject ClassificationEngine extends IEngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"naive\" -> classOf[NaiveBayesAlgorithm]),\n      classOf[Serving])\n  }\n}\n```\n\n### Spark MLlib\n\nSpark's MLlib NaiveBayes algorithm takes training data of RDD type, i.e.\n`RDD[LabeledPoint]` and train a model, which is a `NaiveBayesModel` object.\n\nPredictionIO's MLlib Classification engine template, which *MyClassification*\nbases on, integrates this algorithm under the DASE architecture. We will take a\ncloser look at the DASE code below.\n> [Check this out](https://spark.apache.org/docs/latest/mllib-naive-bayes.html)\nto learn more about MLlib's NaiveBayes algorithm.\n\n## Data\n\nIn the DASE architecture, data is prepared by 2 components sequentially: *Data\nSource* and *Data Preparator*. *Data Source* and *Data Preparator* takes data\nfrom the data store and prepares `RDD[LabeledPoint]` for the NaiveBayes\nalgorithm.\n\n### Data Source\n\nIn MyClassification/src/main/scala/***DataSource.scala***, the `readTraining`\nmethod of the class `DataSource` reads, and selects, data from datastore of\nEventServer and it returns `TrainingData`.\n\n```scala\ncase class DataSourceParams(appName: String) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData, EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    val labeledPoints: RDD[LabeledPoint] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"user\",\n      // only keep entities with these required properties defined\n      required = Some(List(\"plan\", \"attr0\", \"attr1\", \"attr2\")))(sc)\n      // aggregateProperties() returns RDD pair of\n      // entity ID and its aggregated properties\n      .map { case (entityId, properties) =>\n        try {\n          LabeledPoint(properties.get[Double](\"plan\"),\n            Vectors.dense(Array(\n              properties.get[Double](\"attr0\"),\n              properties.get[Double](\"attr1\"),\n              properties.get[Double](\"attr2\")\n            ))\n          )\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Failed to get properties ${properties} of\" +\n              s\" ${entityId}. Exception: ${e}.\")\n            throw e\n          }\n        }\n      }.cache()\n\n    new TrainingData(labeledPoints)\n  }\n}\n```\n\n`PEventStore` is an object which provides function to access data that is collected through the *Event Server*, and\n`PEventStore.aggregateProperties` aggregates the event records of the 4 properties\n(attr0, attr1, attr2 and plan) for each user.\n\nPredictionIO automatically loads the parameters of *datasource* specified in\nMyEngine/***engine.json***, including *appName*, to `dsp`.\n\nIn ***engine.json***:\n\n```\n{\n  ...\n  \"datasource\": {\n    \"params\": {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  ...\n}\n```\n\nIn this sample text data file, columns are delimited by comma (,). The first\ncolumn are labels. The second column are features.\n\nThe class definition of `TrainingData` is:\n\n```scala\nclass TrainingData(\n  val labeledPoints: RDD[LabeledPoint]\n) extends Serializable\n```\nand PredictionIO passes the returned `TrainingData` object to *Data Preparator*.\n\n\n### Data Preparator\n\nIn MyClassification/src/main/scala/***Preparator.scala***, the `prepare` of\nclass `Preparator` takes `TrainingData`. It then conducts any necessary feature\nselection and data processing tasks. At the end, it returns `PreparedData` which\nshould contain the data *Algorithm* needs. For MLlib NaiveBayes, it is\n`RDD[LabeledPoint]`.\n\nBy default, `prepare` simply copies the unprocessed `TrainingData` data to\n`PreparedData`:\n\n```scala\nclass PreparedData(\n  val labeledPoints: RDD[LabeledPoint]\n) extends Serializable\n\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(trainingData.labeledPoints)\n  }\n}\n```\n\nPredictionIO passes the returned `PreparedData` object to Algorithm's `train`\nfunction.\n\n## Algorithm\n\nIn MyClassification/src/main/scala/***NaiveBayesAlgorithm.scala***, the two\nmethods of the algorithm class are `train` and `predict`. `train` is responsible\nfor training a predictive model. PredictionIO will store this model and\n`predict` is responsible for using this model to make prediction.\n\n### train(...)\n\n`train` is called when you run **pio train**. This is where MLlib NaiveBayes\nalgorithm, i.e. `NaiveBayes.train`, is used to train a predictive model.\n\n```scala\ndef train(sc: SparkContext, data: PreparedData): NaiveBayesModel = {\n    NaiveBayes.train(data.labeledPoints, ap.lambda)\n}\n```\n\nIn addition to `RDD[LabeledPoint]` (i.e. `data.labeledPoints`),\n`NaiveBayes.train` takes 1 parameter: *lambda*.\n\nThe values of this parameter is specified in *algorithms* of\nMyClassification/***engine.json***:\n\n```\n{\n  ...\n  \"algorithms\": [\n    {\n      \"name\": \"naive\",\n      \"params\": {\n        \"lambda\": 1.0\n      }\n    }\n  ]\n  ...\n}\n```\n\nPredictionIO will automatically loads these values into the constructor `ap`,\nwhich has a corresponding case class `AlgorithmParams`:\n\n```scala\ncase class AlgorithmParams(\n  lambda: Double\n) extends Params\n```\n\n`NaiveBayes.train` then returns a `NaiveBayesModel` model. PredictionIO will\nautomatically store the returned model.\n\n### predict(...)\n\nThe `predict` method is called when you send a JSON query to\nhttp://localhost:8000/queries.json. PredictionIO converts the query, such as `{ \"attr0\":4, \"attr1\":3, \"attr2\":8 }` to the `Query` class you defined previously.\n\nThe predictive model `NaiveBayesModel` of MLlib NaiveBayes offers a function\ncalled `predict`. `predict` takes a dense vector of features. It predicts the\nlabel of the item represented by this feature vector.\n\n```scala\n  def predict(model: NaiveBayesModel, query: Query): PredictedResult = {\n    val label = model.predict(Vectors.dense(\n    \tquery.attr0, query.attr1, query.attr2\n    ))\n    PredictedResult(label)\n  }\n```\n\n> You have defined the class `PredictedResult` earlier in this page.\n\nPredictionIO passes the returned `PredictedResult` object to *Serving*.\n\n## Serving\n\nThe `serve` method of class `Serving` processes predicted result. It is also\nresponsible for combining multiple predicted results into one if you have more\nthan one predictive model. *Serving* then returns the final predicted result.\nPredictionIO will convert it to a JSON response automatically.\n\nIn MyClassification/src/main/scala/***Serving.scala***,\n\n```scala\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n```\n\nWhen you send a JSON query to http://localhost:8000/queries.json,\n`PredictedResult` from all models will be passed to `serve` as a sequence, i.e.\n`Seq[PredictedResult]`.\n\n> An engine can train multiple models if you specify more than one Algorithm\ncomponent in `object RecommendationEngine` inside ***Engine.scala***. Since only\none `NaiveBayesAlgorithm` is implemented by default, this `Seq` contains one\nelement.\n\nIn this case, `serve` simply returns the predicted result of the first, and the\nonly, algorithm, i.e. `predictedResults.head`.\n\nCongratulations! You have just learned how to customize and build a\nproduction-ready engine. Have fun!\n"
  },
  {
    "path": "docs/manual/source/templates/classification/how-to.html.md",
    "content": "---\ntitle: How-To  (Classification)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nHere are the pages that show you how you can customize the Classification engine template.\n\n- [Use Alternative Algorithm](/templates/classification/add-algorithm/)\n- [Read Custom Properties](/templates/classification/reading-custom-properties/)\n"
  },
  {
    "path": "docs/manual/source/templates/classification/quickstart.html.md.erb",
    "content": "---\ntitle: Quick Start - Classification Engine Template\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Overview\n\nAn engine template is an almost-complete implementation of an engine.\nPredictionIO's Classification Engine Template\nhas integrated **Apache Spark MLlib**'s Naive Bayes algorithm by default.\n\nThe default use case of Classification Engine Template is to predict the service\nplan (*plan*) a user will subscribe to based on his 3 properties: *attr0*,\n*attr1* and *attr2*.\n\nYou can customize it easily to fit your specific use case and needs.\n\nWe are going to show you how to create your own classification engine for\nproduction use based on this template.\n\n## Usage\n\n### Event Data Requirements\n\nBy default, the template requires the following events to be collected:\n\n- user $set event, which set the attributes of the user\n\nNOTE: You can customize to use other event.\n\n### Input Query\n\n- individual attributes values (for version >= v0.3.1)\n\nWARNING: for version < v0.3.1, it is array of features values\n\n### Output PredictedResult\n\n- the predicted label\n\n## 1. Install and Run PredictionIO\n\n<%= partial 'shared/quickstart/install' %>\n\n## 2. Create a new Engine from an Engine Template\n\n<%= partial 'shared/quickstart/create_engine', locals: { engine_name: 'MyClassification', template_name: 'Classification Engine Template', template_repo: 'apache/predictionio-template-attribute-based-classifier' } %>\n\n## 3. Generate an App ID and Access Key\n\n<%= partial 'shared/quickstart/create_app' %>\n\n## 4. Collecting Data\n\nNext, let's collect some training data. By default, the Classification Engine Template reads 4 properties of a user record: attr0, attr1, attr2 and plan. This templates requires '$set' user events.\n\nINFO: This template can easily be customized to use different or more number of attributes.\n\n<%= partial 'shared/quickstart/collect_data' %>\n\nTo set properties \"attr0\", \"attr1\", \"attr2\" and \"plan\" for user \"u0\" on time `2014-11-02T09:39:45.618-08:00` (current time will be used if eventTime is not specified), you can send `$set` event for the user. To send this event, run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u0\",\n  \"properties\" : {\n    \"attr0\" : 0,\n    \"attr1\" : 1,\n    \"attr2\" : 0,\n    \"plan\" : 1\n  }\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\n\nclient = predictionio.EventClient(\n    access_key=<ACCESS KEY>,\n    url=<URL OF EVENTSERVER>,\n    threads=5,\n    qsize=500\n)\n\n# Set the 4 properties for a user\nclient.create_event(\n    event=\"$set\",\n    entity_type=\"user\",\n    entity_id=<USER ID>,\n    properties= {\n      \"attr0\" : int(<VALUE OF ATTR0>),\n      \"attr1\" : int(<VALUE OF ATTR1>),\n      \"attr2\" : int(<VALUE OF ATTR2>),\n      \"plan\" : int(<VALUE OF PLAN>)\n    }\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EventClient;\n\n$client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// Set the 4 properties for a user\n$client->createEvent(array(\n   'event' => '$set',\n   'entityType' => 'user',\n   'entityId' => <USER ID>,\n   'properties' => array(\n     'attr0' => <VALUE OF ATTR0>,\n     'attr1' => <VALUE OF ATTR1>,\n     'attr2' => <VALUE OF ATTR2>,\n     'plan' => <VALUE OF PLAN>\n   )\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Create a client object.\nclient = PredictionIO::EventClient.new(<ACCESS KEY>, <URL OF EVENTSERVER>)\n\n# Set the 4 properties for a user.\nclient.create_event(\n  '$set',\n  'user',\n  <USER ID>, {\n    'properties' => {\n      'attr0' => <VALUE OF ATTR0 (integer)>,\n      'attr1' => <VALUE OF ATTR1 (integer)>,\n      'attr2' => <VALUE OF ATTR2 (integer)>,\n      'plan' => <VALUE OF PLAN (integer)>,\n    }\n  }\n)\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\nimport com.google.common.collect.ImmutableMap;\nimport org.apache.predictionio.Event;\nimport org.apache.predictionio.EventClient;\n\nEventClient client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// set the 4 properties for a user\nEvent event = new Event()\n    .event(\"$set\")\n    .entityType(\"user\")\n    .entityId(<USER ID>)\n    .properties(ImmutableMap.<String, Object>of(\n        \"attr0\", <VALUE OF ATTR0>,\n        \"attr1\", <VALUE OF ATTR1>,\n        \"attr2\", <VALUE OF ATTR2>,\n        \"plan\", <VALUE OF PLAN>\n    ));\nclient.createEvent(event);\n```\n  </div>\n</div>\n\n\nNote that you can also set the properties for the user with multiple `$set` events (They will be aggregated during engine training).\n\nTo set properties \"attr0\", \"attr1\" and \"attr2\", and \"plan\" for user \"u1\" at different time, you can send following `$set` events for the user. To send these events, run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u1\",\n  \"properties\" : {\n    \"attr0\" : 0\n  }\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u1\",\n  \"properties\" : {\n    \"attr1\" : 1,\n    \"attr2\":  0\n  }\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u1\",\n  \"properties\" : {\n    \"plan\" : 1\n  }\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# You may also set the properties one by one\nclient.create_event(\n    event=\"$set\",\n    entity_type=\"user\",\n    entity_id=<USER ID>,\n    properties= {\n      \"attr0\" : int(<VALUE OF ATTR0>)\n    }\n)\nclient.create_event(\n    event=\"$set\",\n    entity_type=\"user\",\n    entity_id=<USER ID>,\n    properties= {\n      \"attr1\" : int(<VALUE OF ATTR1>),\n      \"attr2\" : int(<VALUE OF ATTR2>)\n    }\n)\n\nclient.create_event(\n    event=\"$set\",\n    entity_type=\"user\",\n    entity_id=<USER ID>,\n    properties= {\n      \"plan\" : int(<VALUE OF PLAN>)\n    }\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\n\n// You may also set the properties one by one\n$client->createEvent(array(\n   'event' => '$set',\n   'entityType' => 'user',\n   'entityId' => <USER ID>,\n   'properties' => array(\n     'attr0' => <VALUE OF ATTR0>\n   )\n));\n\n$client->createEvent(array(\n   'event' => '$set',\n   'entityType' => 'user',\n   'entityId' => <USER ID>,\n   'properties' => array(\n     'attr1' => <VALUE OF ATTR1>,\n     'attr2' => <VALUE OF ATTR2>\n   )\n));\n\n$client->createEvent(array(\n   'event' => '$set',\n   'entityType' => 'user',\n   'entityId' => <USER ID>,\n   'properties' => array(\n     'plan' => <VALUE OF PLAN>\n   )\n));\n\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# You may also set the properties one by one.\nclient.create_event(\n  '$set',\n  'user',\n  <USER ID>, {\n    'properties' => {\n      'attr0' => <VALUE OF ATTR0 (integer)>\n    }\n  }\n)\n\nclient.create_event(\n  '$set',\n  'user',\n  <USER ID>, {\n    'properties' => {\n      'attr1' => <VALUE OF ATTR1 (integer)>,\n    }\n  }\n)\n\n# Etc...\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n// you may also set the properties one by one\nclient.createEvent(new Event()\n    .event(\"$set\")\n    .entityType(\"user\")\n    .entityId(<USER ID>)\n    .property(\"attr0\", <VALUE OF ATTR0>));\nclient.createEvent(new Event()\n    .event(\"$set\")\n    .entityType(\"user\")\n    .entityId(<USER ID>)\n    .property(\"attr1\", <VALUE OF ATTR1>)\n    .property(\"attr2\", <VALUE OF ATTR2>));\nclient.createEvent(new Event()\n    .event(\"$set\")\n    .entityType(\"user\")\n    .entityId(<USER ID>)\n    .property(\"plan\", <VALUE OF PLAN>));\n```\n  </div>\n</div>\n\nThe properties of the `user` can be set, unset, or delete by special events **$set**, **$unset** and **$delete**. Please refer to [Event API](/datacollection/eventapi/#note-about-properties) for more details of using these events.\n\n\n<%= partial 'shared/quickstart/query_eventserver' %>\n\n### Import More Sample Data\n\n<%= partial 'shared/quickstart/import_sample_data' %>\n\nA Python import script `import_eventserver.py` is provided to import the data to\nEvent Server using Python SDK. Please upgrade to the latest Python SDK.\n\n<%= partial 'shared/quickstart/install_python_sdk' %>\n\nMake sure you are under the `MyClassification` directory. Execute the following to import the data:\n\n```\n$ cd MyClassification\n$ python data/import_eventserver.py --access_key $ACCESS_KEY\n```\n\nYou should see the following output:\n\n```\nImporting data...\n6 events are imported.\n```\n\nNow the training data is stored as events inside the Event Store.\n\n<%= partial 'shared/quickstart/query_eventserver_short' %>\n\n## 5. Deploy the Engine as a Service\n\n<%= partial 'shared/quickstart/deploy_enginejson', locals: { engine_name: 'MyClassification' } %>\n\n<%= partial 'shared/quickstart/deploy', locals: { engine_name: 'MyClassification' } %>\n\n## 6. Use the Engine\n\nNow, You can try to retrieve predicted results. For example, to predict the\nlabel (i.e. *plan* in this case) of a user with attr0=2, attr1=0 and attr2=0,\nyou send this JSON `{ \"attr0\":2, \"attr1\":0, \"attr2\":0 }` to the deployed engine and it will\nreturn a JSON of the predicted plan. Simply send a query by making a HTTP\nrequest or through the `EngineClient` of an SDK.\n\nWith the deployed engine running, open another terminal and run the following `curl` command or use SDK to send the query:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"bash\">\n```bash\n$ curl -H \"Content-Type: application/json\" \\\n-d '{ \"attr0\":2, \"attr1\":0, \"attr2\":0 }' http://localhost:8000/queries.json\n\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint engine_client.send_query({\"attr0\":2, \"attr1\":0, \"attr2\":0})\n```\n  </div>\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EngineClient;\n\n$client = new EngineClient('http://localhost:8000');\n\n$response = $client->sendQuery(array('attr0'=> 2, 'attr1' => 0, 'attr2' => 0));\nprint_r($response);\n\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Create client object.\nclient = PredictionIO::EngineClient.new(<ENGINE DEPLOY URL>)\n\n# Query PredictionIO.\nresponse = client.send_query('attr0' => 2, 'attr1' => 0, 'attr2' => 0)\n\nputs response\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n\n```java\nimport com.google.common.collect.ImmutableList;\nimport com.google.common.collect.ImmutableMap;\nimport com.google.gson.JsonObject;\n\nimport org.apache.predictionio.EngineClient;\n\nEngineClient engineClient = new EngineClient(<ENGINE DEPLOY URL>);\n\nJsonObject response = engineClient.sendQuery(ImmutableMap.<String, Object>of(\n        \"attr0\", 2,\n        \"attr1\", 0,\n        \"attr2\", 0\n    ));\n```\n  </div>\n</div>\n\nWARNING: The Query format is changed since version v0.3.1. If you are using old Classification template version v0.3.0 or earlier, the query format is array of feature values instead: `{ \"features\": [2, 0, 0] } `.\n\nThe following is sample JSON response:\n\n```\n{\"label\":0.0}\n```\n\nSimilarly, to predict the label (i.e. *plan* in this case) of a user with\nattr0=4, attr1=3 and attr2=8, you send this JSON `{ \"attr0\": 4, \"attr1\": 3, \"attr2\": 8] }` to\nthe deployed engine and it will return a JSON of the predicted plan.\n\nWARNING: For classification template version v0.3.0 or earlier, the query JSON would be `{ \"features\": [4, 3, 8] }`.\n\n*MyClassification* is now running.\n\n<%= partial 'shared/quickstart/production' %>\n\nNext, we are going to take a look at the engine\narchitecture and explain how you can customize it completely.\n\n#### [Next: DASE Components Explained](/templates/classification/dase/)\n"
  },
  {
    "path": "docs/manual/source/templates/classification/reading-custom-properties.html.md",
    "content": "---\ntitle: Reading Custom Properties (Classification)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nBy default, the classification template reads 4 properties of a user entity: \"attr0\", \"attr1\", \"attr2\" and \"plan\". You can modify the [default DataSource](dase.html#data) to read your custom properties or different Entity Type.\n\nIn this example, we modify DataSource to read properties \"featureA\", \"featureB\", \"featureC\", \"featureD\" and \"label\" for entity type \"item\". You can find the complete modified source code [here](https://github.com/apache/predictionio/tree/develop/examples/scala-parallel-classification/reading-custom-properties).\n\n>> Note: you also need import events with these properties accordingly.\n\nModify the `readTraining()` and `readEval()` in DataSource.scala:\n\n- modify the `entityType` parameter\n- modify the list of properties names in the `required` parameter\n- modify how to create the `LabeledPoint` object using the entity properties\n\n```scala\n  def readTraining(sc: SparkContext): TrainingData = {\n    ...\n    val labeledPoints: RDD[LabeledPoint] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"item\", // MODIFIED\n      // only keep entities with these required properties defined\n      required = Some(List( // MODIFIED\n        \"featureA\", \"featureB\", \"featureC\", \"featureD\", \"label\")))(sc)\n      // aggregateProperties() returns RDD pair of\n      // entity ID and its aggregated properties\n      .map { case (entityId, properties) =>\n        try {\n          // MODIFIED\n          LabeledPoint(properties.get[Double](\"label\"),\n            Vectors.dense(Array(\n              properties.get[Double](\"featureA\"),\n              properties.get[Double](\"featureB\"),\n              properties.get[Double](\"featureC\"),\n              properties.get[Double](\"featureD\")\n            ))\n          )\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Failed to get properties ${properties} of\" +\n              s\" ${entityId}. Exception: ${e}.\")\n            throw e\n          }\n        }\n      }.cache()\n    ...\n  }\n```\n\nLastly, redefine the Query class parameters to take in four double values: featureA, featureB, featureC, and featureD. Now, to send a query, the field names must be changed accordingly:\n\n```\n$ curl -H \"Content-Type: application/json\" -d '{ \"featureA\":2, \"featureB\":0, \"featureC\":0, \"featureD\":0 }' http://localhost:8000/queries.json\n```\n\nThat's it! Now your classification engine is using different properties as training data.\n"
  },
  {
    "path": "docs/manual/source/templates/complementarypurchase/dase.html.md.erb",
    "content": "---\ntitle: DASE Components Explained (Complementary Purchase)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n<%= partial 'shared/dase/dase', locals: { template_name: 'Complementary Purchase Engine Template' } %>\n\n## The Engine Design\n\nAs you can see from the Quick Start, *MyComplementaryPurchase* takes a JSON prediction\nquery, e.g. `{ \"items\" : [\"s2i1\"], \"num\" : 3 }`, and return a JSON predicted result.\nIn MyComplementaryPurchase/src/main/scala/***Engine.scala***, the `Query` case class\ndefines the format of such **query**:\n\n```scala\ncase class Query(items: Set[String], num: Int)\n  extends Serializable\n```\n\nThe `PredictedResult` case class defines the format of **predicted result**,\nsuch as\n\n```json\n{\n  \"rules\":[\n    {\n      \"cond\":[\"s2i1\"],\n      \"itemScores\":[\n        {\n          \"item\":\"s2i2\",\n          \"support\":0.2,\n          \"confidence\":0.9090909090909091,\n          \"lift\":3.787878787878788\n        },\n        {\n          \"item\":\"s2i3\",\n          \"support\":0.14,\n          \"confidence\":0.6363636363636364,\n          \"lift\":3.535353535353535\n        }\n      ]\n    }\n  ]\n}\n```\n\nwith:\n\n```scala\ncase class PredictedResult(rules: Array[Rule])\n  extends Serializable\n\ncase class Rule(cond: Set[String], itemScores: Array[ItemScore])\n  extends Serializable\n\ncase class ItemScore(\n  item: String, support: Double, confidence: Double, lift: Double\n) extends Serializable\n```\n\nFinally, `ComplementaryPurchaseEngine` is the *Engine Factory* that defines the\ncomponents this engine will use: Data Source, Data Preparator, Algorithm(s) and\nServing components.\n\n```scala\nobject ComplementaryPurchaseEngine extends IEngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"algo\" -> classOf[Algorithm]),\n      classOf[Serving])\n  }\n}\n```\n\nEach DASE component of the `ComplementaryPurchaseEngine` will be explained below.\n\n## Data\n\nIn the DASE architecture, data is prepared by 2 components sequentially: *DataSource* and *DataPreparator*. They take data\nfrom the data store and prepare them for Algorithm.\n\n### Data Source\n\nIn MyComplementaryPurchase/src/main/scala/***DataSource.scala***, the `readTraining`\nmethod of class `DataSource` reads and selects data from the *Event Store*\n(data store of the *Event Server*). It returns `TrainingData`.\n\n```scala\ncase class DataSourceParams(appName: String) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    // get all \"user\" \"buy\" \"item\" events\n    val buyEvents: RDD[BuyEvent] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"buy\")),\n      targetEntityType = Some(Some(\"item\")))(sc)\n      .map { event =>\n        try {\n          new BuyEvent(\n            user = event.entityId,\n            item = event.targetEntityId.get,\n            t = event.eventTime.getMillis\n          )\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Cannot convert ${event} to BuyEvent. ${e}\")\n            throw e\n          }\n        }\n      }.cache()\n\n    new TrainingData(buyEvents)\n  }\n}\n```\n\nPredictionIO automatically loads the parameters of *datasource* specified in MyComplementaryPurchase/***engine.json***, including *appName*, to `dsp`.\n\nIn ***engine.json***:\n\n```\n{\n  ...\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  ...\n}\n```\n\nIn `readTraining()`, `PEventStore` is an object which provides function to access data that is collected by PredictionIO Event Server.\n\nThis Complementary Purchase Engine Template requires \"buy\" events.\n\n`PEventStore.find(...)` specifies the events that you want to read. In this case, \"user buy item\" events are read and then each is mapped to a `BuyEvent` object.\n\n`BuyEvent` case class is defined as:\n\n```scala\ncase class BuyEvent(user: String, item: String, t: Long)\n```\n\n`TrainingData` contains an RDD of `BuyEvent` objects. The class definition of `TrainingData` is:\n\n```scala\nclass TrainingData(\n  val buyEvents: RDD[BuyEvent]\n) extends Serializable { ... }\n```\n\nPredictionIO then passes the returned `TrainingData` object to *Data Preparator*.\n\nNOTE: You could modify the DataSource to read other event other than the default **buy**.\n\n### Data Preparator\n\nIn MyComplementaryPurchase/src/main/scala/***Preparator.scala***, the `prepare` method\nof class `Preparator` takes `TrainingData` as its input and performs any\nnecessary feature selection and data processing tasks. At the end, it returns\n`PreparedData` which should contain the data *Algorithm* needs.\n\nBy default, `prepare` simply copies the unprocessed `TrainingData` data to `PreparedData`:\n\n```scala\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  def prepare(sc: SparkContext, td: TrainingData): PreparedData = {\n    new PreparedData(buyEvents = td.buyEvents)\n  }\n}\n\nclass PreparedData(\n  val buyEvents: RDD[BuyEvent]\n) extends Serializable\n```\n\nPredictionIO passes the returned `PreparedData` object to Algorithm's `train` function.\n\n## Algorithm\n\nIn MyComplementaryPurchase/src/main/scala/***ALSAlgorithm.scala***, the two methods of\nthe algorithm class are `train` and `predict`. `train` is responsible for\ntraining the predictive model; `predict` is\nresponsible for using this model to make prediction.\n\n\nThe default algorithm is based on concept of [Association Rule Learning] (http://en.wikipedia.org/wiki/Association_rule_learning) to find interesting association rules (A implies B) that indicates additional item (B) may be bought together given a list of items (A). A is the *condition* and B is the *consequence*.\n\n### Algorithm parameters\n\nThe Algorithm takes the following parameters, as defined by the `AlgorithmParams` case class:\n\n```scala\ncase class AlgorithmParams(\n  basketWindow: Int, // in seconds\n  maxRuleLength: Int,\n  minSupport: Double,\n  minConfidence: Double,\n  minLift: Double,\n  minBasketSize: Int,\n  maxNumRulesPerCond: Int // max number of rules per condition\n  ) extends Params\n\n```\n\nParameter description:\n\n- **basketWindow**: The buy event is considered as the same basket as previous one if the time difference is within this window (in unit of seconds). For example, if it's set to 120, it means that if the user buys item B within 2 minutes of previous purchase (item A), then the item set [A, B] is considered as the same basket. The purchase of this *basket* is referred as one *transaction*.\n- **maxRuleLength**: The maximum length of the association rule length. Must be at least 2. For example, rule of \"A implies B\" has length of 2 while rule \"A, B implies C\" has a length of 3. Increasing this number will increase the training time significantly because more combinations are considered.\n- **minSupport**: The minimum required *support* for the item set to be considered as rule (valid range is 0 to 1). It's the percentage of the item set appearing among all transactions. This is used to filter out infrequent item set. For example, setting to 0.1 means that the item set must appear in 10 % of all transactions.\n- **minConfidence**: The minimum *confidence* required for the rules (valid range is 0 to 1). The confidence indicates the probability of the condition and conseuquence appear in the same transaction. For example, if A appears in 30 transactions and the item set [A, B] appears in 20 transactions, then the rule \"A implies B\" has confidence of 0.66.\n- **minLift**: The minimum *lift* required for the rule. It should be set to 1 to find high quality rule. It's the confidence of the rule divided by the support of the consequence. It is used to filter out rules that the consequence is very frequent anyway regardless of the condition.\n- **minBasketSize**: The minimum number of items in basket to be considered by algorithm. This value must be at least 2.\n- **maxNumRulesPerCond**: Maximum number of rules generated per condition and stored in the model. By default, the top rules are sorted by *lift* score.\n\nINFO: If you import your own data and the engine doesn't return any results, it could be caused by the following reasons: (1) the algorithm parameter constraint is too high and the algo couldn't find rules that satisfy the condition. you could try setting the following param to 0: **minSupport**, **minConfidence**, **minLift** and then see if anything returned (regardless of recommendation quality), and then adjust the parameter accordingly. (2) the complementary purchase engine requires buy event with correct eventTime. If you import data without specifying eventTime, the SDK will use current time because it assumes the event happens in real time (which is not the case if you import as batch offline), resulting in that all buy events are treated as one big transaction while they should be treated as multiple transactions.\n\n\nThe values of these parameters can be specified in *algorithms* of\nMyComplementaryPurchase/***engine.json***:\n\n```\n{\n  ...\n  \"algorithms\": [\n    {\n      \"name\": \"algo\",\n      \"params\": {\n        \"basketWindow\" : 120,\n        \"maxRuleLength\" : 2,\n        \"minSupport\": 0.1,\n        \"minConfidence\": 0.6,\n        \"minLift\" : 1.0,\n        \"minBasketSize\" : 2,\n        \"maxNumRulesPerCond\": 5\n      }\n    }\n  ]\n  ...\n}\n```\n\nPredictionIO will automatically loads these values into the constructor of the `Algorithm` class.\n\n```scala\nclass Algorithm(val ap: AlgorithmParams)\n  extends P2LAlgorithm[PreparedData, Model, Query, PredictedResult] {\n    ...\n}\n```\n\n### train(...)\n\n`train` is called when you run **pio train** to train a predictive model. The algorithm first find all basket transactions, generates and filters the association rules based on the algorithm parameters:\n\n```scala\n\n  def train(sc: SparkContext, pd: PreparedData): Model = {\n    val windowMillis = ap.basketWindow * 1000\n\n    ...\n\n    val transactions: RDD[Set[String]] = ...\n\n    val totalTransaction = transactions.count()\n    val minSupportCount = ap.minSupport * totalTransaction\n\n    ...\n\n    // generate item sets\n    val itemSets: RDD[Set[String]] = transactions\n      .flatMap { tran =>\n        (1 to ap.maxRuleLength).flatMap(n => tran.subsets(n))\n      }\n\n    ...\n\n    val itemSetCount: RDD[(Set[String], Int)] = ...\n\n    ...\n\n    val rules: RDD[(Set[String], RuleScore)] = ...\n\n    val sortedRules = rules.groupByKey\n      .mapValues(iter =>\n        iter.toVector\n          .sortBy(_.lift)(Ordering.Double.reverse)\n          .take(ap.maxNumRulesPerCond)\n        )\n      .collectAsMap.toMap\n\n    new Model(sortedRules)\n  }\n\n```\n\nPredictionIO will automatically store the returned model after training, i.e. the `Model` object.\n\nThe `Model` stores the top rules for each condition:\n\n```scala\nclass Model(\n  val rules: Map[Set[String], Vector[RuleScore]]\n) extends Serializable {\n  ...\n}\n```\n\n### predict(...)\n\n`predict` is called when you send a JSON query to\nhttp://localhost:8000/queries.json. PredictionIO converts the query, such as `{ \"items\" : [\"s2i1\"], \"num\" : 3 }` to the `Query` class you defined previously in `Engine.scala`.\n\nThe `predict()` function does the following:\n\n1. find all possible subset of the items in query\n2. use the subsets as condition to look up the model and return the rules for each condition.\n\n```scala\n\n  ...\n\n  def predict(model: Model, query: Query): PredictedResult = {\n    val conds = (1 to maxCondLength).flatMap(n => query.items.subsets(n))\n\n    val rules = conds.map { cond =>\n      model.rules.get(cond).map{ vec =>\n        val itemScores = vec.take(query.num).map { rs =>\n          new ItemScore(\n            item = rs.conseq,\n            support = rs.support,\n            confidence = rs.confidence,\n            lift = rs.lift\n          )\n        }.toArray\n        Rule(cond = cond, itemScores = itemScores)\n      }\n    }.flatten.toArray\n\n    new PredictedResult(rules)\n  }\n\n  ...\n\n```\n\nPredictionIO passes the returned `PredictedResult` object to *Serving*.\n\n## Serving\n\nThe `serve` method of class `Serving` processes predicted result. It is also\nresponsible for combining multiple predicted results into one if you have more\nthan one predictive model. *Serving* then returns the final predicted result.\nPredictionIO will convert it to a JSON response automatically.\n\nIn MyComplementaryPurchase/src/main/scala/***Serving.scala***,\n\n```scala\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n```\n\nWhen you send a JSON query to http://localhost:8000/queries.json,\n`PredictedResult` from all models will be passed to `serve` as a sequence, i.e.\n`Seq[PredictedResult]`.\n\nNOTE: An engine can train multiple models if you specify more than one Algorithm\ncomponent in `object ComplementaryPurchaseEngine` inside ***Engine.scala*** and  the corresponding parameters in ***engine.json***. Since only one `Algorithm` is implemented by default, this `Seq` contains one element.\n"
  },
  {
    "path": "docs/manual/source/templates/complementarypurchase/quickstart.html.md.erb",
    "content": "---\ntitle: Quick Start - Complementary Purchase Engine Template\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Overview\n\nThis engine template recommends the complementary items which most user frequently buy at the same time together with one or more items in the query.\n\n## Usage\n\n### Event Data Requirements\n\nBy default, the template requires the following events to be collected:\n\n- user 'buy' item events\n\nINFO: A correct eventTime should be used in order for engine to determine if the items being bought are in the same 'basket'.\n\nNOTE: You can customize to use other event.\n\n### Input Query\n\n- set of items\n- num of recommends items per condition\n\n### Output PredictedResult\n\n- array of condition and top n recommended items given the condition. The engine will use each combination of the query items as condition.\n\n## 1. Install and Run PredictionIO\n\n<%= partial 'shared/quickstart/install' %>\n\n## 2. Create a new Engine from an Engine Template\n\n<%= partial 'shared/quickstart/create_engine', locals: { engine_name: 'MyComplementaryPurchase', template_name: 'Complementary Purchase Engine Template', template_repo: 'PredictionIO/template-scala-parallel-complementarypurchase' } %>\n\n## 3. Generate an App ID and Access Key\n\n<%= partial 'shared/quickstart/create_app' %>\n\n## 4. Collecting Data\n\nNext, let's collect training data for this Engine. By default,\n Complementary Purchase Engine Template supports the following entities: **user**, **item**. A user buys an item. This template requires user-buy-item events.\n\nNote that the engine requires correct buy event time being used in order to determine if the items being bought are in the same 'basket', which is configured by the 'basketWindow' parameter. Using an unreal event time for the buy events will cause an incorrect model. If you use SDK, the current time is used as event time by default.\n\nWARNING: In particular, make sure correct event time is specified if you import data in batch (i.e. not in real time). If the event time is omitted, the SDK will use **current time** as event time which is not the actual time of the buy event in this case!\n\n<%= partial 'shared/quickstart/collect_data' %>\n\n\nWhen an user u0 buys item i0 on time `2014-11-02T09:39:45.618-08:00` (current time will be used if eventTime is not specified), you can send a buy event. Run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"buy\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u0\",\n  \"targetEntityType\" : \"item\",\n  \"targetEntityId\" : \"i0\",\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\n\nclient = predictionio.EventClient(\n  access_key=<ACCESS KEY>,\n  url=<URL OF EVENTSERVER>,\n  threads=5,\n  qsize=500\n)\n\n# A user buys an item (use current time as event time)\nclient.create_event(\n  event=\"buy\",\n  entity_type=\"user\",\n  entity_id=<USER ID>,\n  target_entity_type=\"item\",\n  target_entity_id=<ITEM ID>\n)\n\n# A user buys an item (explicitly specify event time)\nclient.create_event(\n  event=\"buy\",\n  entity_type=\"user\",\n  entity_id=<USER ID>,\n  target_entity_type=\"item\",\n  target_entity_id=<ITEM ID>,\n  event_time=<EVENT_TIME>\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EventClient;\n\n$client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// A user buys an item (use current time as event time)\n$client->createEvent(array(\n  'event' => 'buy',\n  'entityType' => 'user',\n  'entityId' => <USER ID>,\n  'targetEntityType' => 'item',\n  'targetEntityId' => <ITEM ID>\n));\n\n// A user buys an item (explicitly specify event time)\n$client->createEvent(array(\n  'event' => 'buy',\n  'entityType' => 'user',\n  'entityId' => <USER ID>,\n  'targetEntityType' => 'item',\n  'targetEntityId' => <ITEM ID>,\n  'eventTime' => <EVENT_TIME>\n));\n\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Create a client object.\nclient = PredictionIO::EventClient.new(<ACCESS KEY>, <URL OF EVENTSERVER>)\n\n# A user buys an item (use current time as event time)\nclient.create_event(\n  'buy',\n  'user',\n  <USER ID>, {\n    'targetEntityType' => 'item',\n    'targetEntityId' => <ITEM ID>\n  }\n)\n\n# A user buys an item (explicitly specify event time)\nclient.create_event(\n  'buy',\n  'user',\n  <USER ID>, {\n    'targetEntityType' => 'item',\n    'targetEntityId' => <ITEM ID>,\n    'eventTime' => <EVENT_TIME>\n  }\n)\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\nimport org.apache.predictionio.Event;\nimport org.apache.predictionio.EventClient;\n\nimport com.google.common.collect.ImmutableList;\n\nEventClient client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// A user buys an item (use current time as event time)\nEvent buyEvent = new Event()\n    .event(\"buy\")\n    .entityType(\"user\")\n    .entityId(<USER_ID>)\n    .targetEntityType(\"item\")\n    .targetEntityId(<ITEM_ID>)\nclient.createEvent(buyEvent);\n\n// A user buys an item (explicitly specify event time)\nEvent buyEvent = new Event()\n    .event(\"buy\")\n    .entityType(\"user\")\n    .entityId(<USER_ID>)\n    .targetEntityType(\"item\")\n    .targetEntityId(<ITEM_ID>)\n    .eventTime(<EVENT_TIME>)\nclient.createEvent(buyEvent);\n```\n  </div>\n</div>\n\n<%= partial 'shared/quickstart/query_eventserver' %>\n\n### Import More Sample Data\n\n<%= partial 'shared/quickstart/import_sample_data' %>\n\nA Python import script `import_eventserver.py` is provided to import sample data. The script generates some frequent item sets (prefix with \"s\"), some other random items (prefix with \"i\") and a few popular items (prefix with \"p\"). Then each user (with user ID \"u1\" to \"u10\") performs 5 buy transactions (buy events are within 10 seconds in each transaction). In each transaction, the user may or may not buy some random items, always buy one of the popular items and buy 2 or more items in one of the frequent item sets.\n\n<%= partial 'shared/quickstart/install_python_sdk' %>\n\nMake sure you are under the `MyComplementaryPurchase` directory. Execute the following to import the data:\n\n```\n$ cd MyComplementaryPurchase\n$ python data/import_eventserver.py --access_key $ACCESS_KEY\n```\n\nYou should see the following output:\n\n```\n...\nUser u10 buys item i20 at 2014-10-19 15:42:35.618000-07:53\nUser u10 buys item i5 at 2014-10-19 15:42:45.618000-07:53\nUser u10 buys item p3 at 2014-10-19 15:42:55.618000-07:53\nUser u10 buys item s2i3 at 2014-10-19 15:43:05.618000-07:53\nUser u10 buys item s2i1 at 2014-10-19 15:43:15.618000-07:53\n225 events are imported.\n```\n\n<%= partial 'shared/quickstart/query_eventserver_short' %>\n\n## 5. Deploy the Engine as a Service\n\n<%= partial 'shared/quickstart/deploy_enginejson', locals: { engine_name: 'MyComplementaryPurchase' } %>\n\n<%= partial 'shared/quickstart/deploy', locals: { engine_name: 'MyComplementaryPurchase' } %>\n\n## 6. Use the Engine\n\nNow, You can query the engine. For example, return top 3 items which are frequently bought with item \"s2i1\". You can sending this JSON '{ \"items\" : [\"s2i1\"], \"num\" : 3 }' to the deployed engine. The engine will return a JSON with the recommended items.\n\nIf you include one or more items in the query, the engine will use each combination of the query items as condition, and return recommended items if there is any for this condition. For example, if you query items are [\"A\", \"B\"], then the engine will use [\"A\"], [\"B\"], and [\"A\", \"B\"] as condition and try to find top n recommended items for each combination.\n\nYou can simply send a query by making a HTTP request or through the `EngineClient` of an SDK.\n\nWith the deployed engine running, open another terminal and run the following `curl` command or use SDK to send the query:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{\n  \"items\" : [\"s2i1\"],\n  \"num\" : 3\n}' \\\nhttp://localhost:8000/queries.json\n\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint engine_client.send_query({\n  \"items\" : [\"s2i1\"],\n  \"num\" : 3\n})\n```\n  </div>\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EngineClient;\n\n$client = new EngineClient('http://localhost:8000');\n\n$response = $client->sendQuery(array(\n  'items' => array('s2i1'),\n  'num' => 3\n));\n\nprint_r($response);\n\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n\n```ruby\n# Create client object.\nclient = PredictionIO::EngineClient.new('http://localhost:8000')\n\n# Query PredictionIO.\nresponse = client.send_query(\n  'items' => ['s2i1'],\n  'num' => 3\n)\n\nputs response\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n\n```java\nimport com.google.common.collect.ImmutableMap;\nimport com.google.common.collect.ImmutableList;\nimport com.google.gson.JsonObject;\n\nimport org.apache.predictionio.EngineClient;\n\n// create client object\nEngineClient engineClient = new EngineClient(\"http://localhost:8000\");\n\n// query\n\nJsonObject response = engineClient.sendQuery(ImmutableMap.<String, Object>of(\n  \"items\", ImmutableList.of(\"s2i1\"),\n  \"num\", 3\n));\n```\n  </div>\n</div>\n\nThe following is sample JSON response. The `cond` field is one of the combination of query items used as condition to determine other frequently bought items with this condition, followed by top items. If there are multiple conditions with recommended items found, the `rules` array will contain multiple elements, and each correspond to the condition.\n\n```\n{\n  \"rules\":[\n    {\n      \"cond\":[\"s2i1\"],\n      \"itemScores\":[\n        {\n          \"item\":\"s2i2\",\n          \"support\":0.2,\n          \"confidence\":0.9090909090909091,\n          \"lift\":3.787878787878788\n        },\n        {\n          \"item\":\"s2i3\",\n          \"support\":0.14,\n          \"confidence\":0.6363636363636364,\n          \"lift\":3.535353535353535\n        }\n      ]\n    }\n  ]\n}\n```\n\n*MyComplementaryPurchase* is now running.\n\n<%= partial 'shared/quickstart/production' %>\n\n#### [Next: DASE Components Explained](/templates/complementarypurchase/dase/)\n"
  },
  {
    "path": "docs/manual/source/templates/ecommercerecommendation/adjust-score.html.md.erb",
    "content": "---\ntitle: Adjust Score (E-Commerce Recommendation)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis examples demonstrates how to modify E-Commerce Recommendation template to further adjust score.\n\nBy default, items have a weight of 1.0. Giving an item a weight greater than 1.0 will make them appear more often and can be useful for i.e. promoted products. An item can also be given a weight smaller than 1.0 (but bigger than 0), in which case it will be recommended less often than originally. Weight values smaller than 0.0 are invalid.\n\nYou can find the complete modified source code [here](https://github.com/apache/predictionio/tree/develop/examples/scala-parallel-ecommercerecommendation/adjust-score).\n\n\n## Modification\n\n### ECommAlgorithm.scala\n\nAdd a case class to represent each group items which are given the same weight.\n\n```scala\n// ADDED\ncase class WeightGroup(\n  items: Set[String],\n  weight: Double\n)\n```\n\nIn ECommAlgorithm, add `weightedItems` function to extract the sequence of `WeightGroup`.\n\n```scala\n  // ADDED\n  /** Get the latest constraint weightedItems */\n  def weightedItems: Seq[WeightGroup] = {\n    try {\n      val constr = LEventStore.findByEntity(\n        appName = ap.appName,\n        entityType = \"constraint\",\n        entityId = \"weightedItems\",\n        eventNames = Some(Seq(\"$set\")),\n        limit = Some(1),\n        latest = true,\n        timeout = Duration(200, \"millis\")\n      )\n      if (constr.hasNext) {\n        constr.next.properties.get[Seq[WeightGroup]](\"weights\")\n      } else {\n        Nil\n      }\n    } catch {\n      case e: scala.concurrent.TimeoutException =>\n        logger.error(s\"Timeout when read set weightedItems event.\" +\n          s\" Empty list is used. ${e}\")\n        Nil\n      case e: Exception =>\n        logger.error(s\"Error when read set weightedItems event: ${e}\")\n        throw e\n    }\n  }\n```\n\nModify the `predictKnownUser()`, `predictDefault()` and `predictSimilar()`:\n\n- add the `weights: Map[Int, Double]` parameter\n- adjust score according to item weights\n\n```scala\n  def predictKnownUser(\n    userFeature: Array[Double],\n    productModels: Map[Int, ProductModel],\n    query: Query,\n    whiteList: Option[Set[Int]],\n    blackList: Set[Int],\n    weights: Map[Int, Double] // ADDED\n  ): Array[(Int, Double)] = {\n\n      ...\n      .map { case (i, pm) =>\n        // NOTE: features must be defined, so can call .get\n        val s = dotProduct(userFeature, pm.features.get)\n        // may customize here to further adjust score\n        // ADDED\n        val adjustedScore = s * weights(i)\n        (i, adjustedScore)\n      }\n      ...\n\n  }\n```\n\nLastly, modify the `predict()` method. The sequence of `WeightGroup` transforms into a `Map[Int, Double]` that we can easily query to extract the weight given to an item, using its `Int` index.\n\n```scala\n  def predict(model: ECommModel, query: Query): PredictedResult = {\n\n    ...\n\n    // ADDED\n    val weights: Map[Int, Double] = (for {\n      group <- weightedItems\n      item <- group.items\n      index <- model.itemStringIntMap.get(item)\n    } yield (index, group.weight))\n      .toMap\n      .withDefaultValue(1.0)\n\n    ...\n\n    val topScores: Array[(Int, Double)] = if (userFeature.isDefined) {\n      // the user has feature vector\n      predictKnownUser(\n        userFeature = userFeature.get,\n        productModels = productModels,\n        query = query,\n        whiteList = whiteList,\n        blackList = finalBlackList,\n        weights = weights // ADDED\n      )\n    } else {\n      ...\n\n      if (recentFeatures.isEmpty) {\n        logger.info(s\"No features vector for recent items ${recentItems}.\")\n        predictDefault(\n          productModels = productModels,\n          query = query,\n          whiteList = whiteList,\n          blackList = finalBlackList,\n          weights = weights // ADDED\n        )\n      } else {\n        predictSimilar(\n          recentFeatures = recentFeatures,\n          productModels = productModels,\n          query = query,\n          whiteList = whiteList,\n          blackList = finalBlackList,\n          weights = weights // ADDED\n        )\n      }\n    }\n\n    ...\n  }\n```\n\nNow, to send an event to Event Server:\n\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"constraint\",\n  \"entityId\" : \"weightedItems\",\n  \"properties\" : {\n    \"weights\": [\n      {\n        \"items\": [\"i4\", \"i14\"],\n        \"weight\": 1.2\n      },\n      {\n        \"items\": [\"i11\"],\n        \"weight\": 1.5\n      }\n    ]\n  },\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n```\n\nThat's it! Now your engine can predict with adjusted scores.\n"
  },
  {
    "path": "docs/manual/source/templates/ecommercerecommendation/dase.html.md.erb",
    "content": "---\ntitle: DASE Components Explained (E-Commerce Recommendation)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n<%= partial 'shared/dase/dase', locals: { template_name: 'E-Commerce Recommendation Engine Template' } %>\n\n## The Engine Design\n\nAs you can see from the Quick Start, *MyECommerceRecommendation* takes a JSON prediction\nquery, e.g. `{ \"user\": \"u1\", \"num\": 4 }`, and return a JSON predicted result.\nIn MyECommerceRecommendation/src/main/scala/***Engine.scala***, the `Query` case class\ndefines the format of such **query**:\n\n```scala\ncase class Query(\n  user: String,\n  num: Int,\n  categories: Option[Set[String]],\n  whiteList: Option[Set[String]],\n  blackList: Option[Set[String]]\n) extends Serializable\n```\n\nThe `PredictedResult` case class defines the format of **predicted result**,\nsuch as\n\n```json\n{\"itemScores\":[\n  {\"item\":22,\"score\":4.07},\n  {\"item\":62,\"score\":4.05},\n  {\"item\":75,\"score\":4.04},\n  {\"item\":68,\"score\":3.81}\n]}\n```\n\nwith:\n\n```scala\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n) extends Serializable\n\ncase class ItemScore(\n  item: String,\n  score: Double\n) extends Serializable\n```\n\nFinally, `ECommerceRecommendationEngine` is the *Engine Factory* that defines the\ncomponents this engine will use: Data Source, Data Preparator, Algorithm(s) and\nServing components.\n\n```scala\nobject ECommerceRecommendationEngine extends IEngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"ecomm\" -> classOf[ECommAlgorithm]),\n      classOf[Serving])\n  }\n}\n```\n\n### Spark MLlib\n\nThe PredictionIO E-Commerce Recommendation Engine Template integrates Spark's MLlib ALS algorithm under the DASE\narchitecture. We will take a closer look at the DASE code below.\n\nThe MLlib ALS algorithm takes training data of RDD type, i.e. `RDD[Rating]` and train a model, which is a `MatrixFactorizationModel` object.\n\nYou can visit [here](https://spark.apache.org/docs/latest/mllib-collaborative-filtering.html) to learn more about MLlib's ALS collaborative filtering algorithm.\n\n\n## Data\n\nIn the DASE architecture, data is prepared by 2 components sequentially: *DataSource* and *DataPreparator*. They take data\nfrom the data store and prepare them for Algorithm.\n\n### Data Source\n\nIn MyECommerceRecommendation/src/main/scala/***DataSource.scala***, the `readTraining`\nmethod of class `DataSource` reads and selects data from the *Event Store*\n(data store of the *Event Server*). It returns `TrainingData`.\n\n```scala\ncase class DataSourceParams(appName: String) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    // create a RDD of (entityID, User)\n    val usersRDD: RDD[(String, User)] = PEventStore.aggregateProperties(...) ...\n\n    // create a RDD of (entityID, Item)\n    val itemsRDD: RDD[(String, Item)] = PEventStore.aggregateProperties(...) ...\n\n    // get all \"user\" \"view\" or \"buy\" \"item\" events from event store\n    val eventsRDD: RDD[Event] = PEventStore.find(...) ...\n\n    // filter all view events\n    val viewEventsRDD: RDD[ViewEvent] = eventsRDD.filter { ... } ...\n\n    // filter all buy events\n    val buyEventsRDD: RDD[BuyEvent] = eventsRDD.filter { ...} ...\n\n    new TrainingData(\n      users = usersRDD,\n      items = itemsRDD,\n      viewEvents = viewEventsRDD,\n      buyEvents = buyEventsRDD\n    )\n  }\n}\n```\n\nPredictionIO automatically loads the parameters of *datasource* specified in MyECommerceRecommendation/***engine.json***, including *appName*, to `dsp`.\n\nIn ***engine.json***:\n\n```\n{\n  ...\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  ...\n}\n```\n\nIn `readTraining()`, `PEventStore` is an object which provides function to access data that is collected by PredictionIO Event Server.\n\nThis E-Commerce Recommendation Engine Template requires \"user\" and \"item\" entities that are set by events.\n\n`PEventStore.aggregateProperties(...)` aggregates properties of the `user` and `item` that are set, unset, or delete by special events **$set**, **$unset** and **$delete**. Please refer to [Event API](/datacollection/eventapi/#note-about-properties) for more details of using these events.\n\nThe following code aggregates the properties of `user` and then map each result to a `User()` object.\n\n```scala\n\n  // create a RDD of (entityID, User)\n  val usersRDD: RDD[(String, User)] = PEventStore.aggregateProperties(\n    appName = dsp.appName,\n    entityType = \"user\"\n  )(sc).map { case (entityId, properties) =>\n    val user = try {\n      User()\n    } catch {\n      case e: Exception => {\n        logger.error(s\"Failed to get properties ${properties} of\" +\n          s\" user ${entityId}. Exception: ${e}.\")\n        throw e\n      }\n    }\n    (entityId, user)\n  }.cache()\n\n```\nIn the template, `User()` object is a simple dummy as a placeholder for you to customize and expand.\n\n\nSimilarly, the following code aggregates `item` properties and then map each result to an `Item()` object. By default, this template assumes each item has an optional property `categories`, which is a list of String.\n\n```scala\n  // create a RDD of (entityID, Item)\n  val itemsRDD: RDD[(String, Item)] = PEventStore.aggregateProperties(\n    appName = dsp.appName,\n    entityType = \"item\"\n  )(sc).map { case (entityId, properties) =>\n    val item = try {\n      // Assume categories is optional property of item.\n      Item(categories = properties.getOpt[List[String]](\"categories\"))\n    } catch {\n      case e: Exception => {\n        logger.error(s\"Failed to get properties ${properties} of\" +\n          s\" item ${entityId}. Exception: ${e}.\")\n        throw e\n      }\n    }\n    (entityId, item)\n  }.cache()\n```\n\nThe `Item` case class is defined as\n\n```scala\ncase class Item(categories: Option[List[String]])\n```\n\n`PEventStore.find(...)` specifies the events that you want to read. In this case, \"user view item\" and \"user buy item\" events are read\n\n```scala\n\n  // get all \"user\" \"view\" \"item\" events\n  val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"view\", \"buy\")),\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n      .cache()\n\n```\n\nNote that `.cache()` is used to cache the RDD data into memory since eventsRDD will be used multiple times later.\n\nThen we filter the events we are intersted in and map the event to a `ViewEvent` object.\n\n```scala\n\n  val viewEventsRDD: RDD[ViewEvent] = eventsRDD\n      .filter { event => event.event == \"view\" }\n      .map { event =>\n        try {\n          ViewEvent(\n            user = event.entityId,\n            item = event.targetEntityId.get,\n            t = event.eventTime.getMillis\n          )\n        } catch {\n          case e: Exception =>\n            logger.error(s\"Cannot convert ${event} to ViewEvent.\" +\n              s\" Exception: ${e}.\")\n            throw e\n        }\n      }\n```\n\n`ViewEvent` case class is defined as:\n\n```scala\ncase class ViewEvent(user: String, item: String, t: Long)\n```\n\nWe filter buy event in similar way and map to `BuyEvent` object for later use.\n\n\n```scala\n\n  val buyEventsRDD: RDD[BuyEvent] = eventsRDD\n      .filter { event => event.event == \"buy\" }\n      .map { event =>\n        try {\n          BuyEvent(\n            user = event.entityId,\n            item = event.targetEntityId.get,\n            t = event.eventTime.getMillis\n          )\n        } catch {\n          case e: Exception =>\n            logger.error(s\"Cannot convert ${event} to BuyEvent.\" +\n              s\" Exception: ${e}.\")\n            throw e\n        }\n      }\n\n```\n\n`BuyEvent` case class is defined as:\n\n```scala\ncase class BuyEvent(user: String, item: String, t: Long)\n```\n\nINFO: For flexibility, this template is designed to support user ID and item ID in String.\n\n`TrainingData` contains an RDD of `User`, `Item` and `ViewEvent` objects. The class definition of `TrainingData` is:\n\n```scala\nclass TrainingData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent],\n  val buyEvents: RDD[BuyEvent]\n) extends Serializable { ... }\n```\n\nPredictionIO then passes the returned `TrainingData` object to *Data Preparator*.\n\nNOTE: You could modify the DataSource to read other event other than the default **view** or **buy**.\n\n### Data Preparator\n\nIn MyECommerceRecommendation/src/main/scala/***Preparator.scala***, the `prepare` method\nof class `Preparator` takes `TrainingData` as its input and performs any\nnecessary feature selection and data processing tasks. At the end, it returns\n`PreparedData` which should contain the data *Algorithm* needs.\n\nBy default, `prepare` simply copies the unprocessed `TrainingData` data to `PreparedData`:\n\n```scala\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(\n      users = trainingData.users,\n      items = trainingData.items,\n      viewEvents = trainingData.viewEvents,\n      buyEvents = trainingData.buyEvents)\n  }\n}\n\nclass PreparedData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent],\n  val buyEvents: RDD[BuyEvent]\n) extends Serializable\n```\n\nPredictionIO passes the returned `PreparedData` object to Algorithm's `train` function.\n\n## Algorithm\n\nIn MyECommerceRecommendation/src/main/scala/***ECommAlgorithm.scala***, the two methods of\nthe algorithm class are `train` and `predict`. `train` is responsible for\ntraining the predictive model;`predict` is\nresponsible for using this model to make prediction.\n\n\n### Algorithm parameters\n\nThe ECommAlgorithm takes the following parameters, as defined by the `ECommAlgorithmParams` case class:\n\n```scala\ncase class ECommAlgorithmParams(\n  appName: String,\n  unseenOnly: Boolean,\n  seenEvents: List[String],\n  similarEvents: List[String],\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]\n) extends Params\n```\n\nParameter description:\n\n- **appName**: Your App name. Events defined by \"seenEvents\" and \"similarEvents\" will be read from this app during `predict`.\n- **unseenOnly**: true or false. Set to true if you want to recommend unseen items only. Seen items are defined by *seenEvents* which mean if the user has these events on the items, then it's treated as *seen*.\n- **seenEvents**: A list of user-to-item events which will be treated as *seen* events. Used when *unseenOnly* is set to true.\n- **similarEvents**: A list of user-item-item events which will be used to find similar items to the items which the user has performed these events on.\n- **rank**: Parameter of the MLlib ALS algorithm. Number of latent features.\n- **numIterations**: Parameter of the MLlib ALS algorithm. Number of iterations.\n- **lambda**: Regularization parameter of the MLlib ALS algorithm.\n- **seed**: Optional. A random seed of the MLlib ALS algorithm. Specify a fixed value if want to have deterministic result.\n\n### train(...)\n\n`train` is called when you run **pio train**. This is where MLlib ALS algorithm,\ni.e. `ALS.trainImplicit()`, is used to train a predictive model. In addition, we also count the number of items being bought for each item as default model which will be used when there is no ALS model available or other useful information about the user is available during `predict`.\n\n```scala\n\n  def train(sc: SparkContext, data: PreparedData): ECommModel = {\n    ...\n\n    // create User and item's String ID to integer index BiMap\n    val userStringIntMap = BiMap.stringInt(data.users.keys)\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    // generate MLlibRating data for ALS algorithm\n    val mllibRatings: RDD[MLlibRating] = genMLlibRating(\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap,\n      data = data\n    )\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    val m = ALS.trainImplicit(\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      alpha = 1.0,\n      seed = seed)\n\n    ...\n\n    // count the number of items being bought for recommendation popular items as default case\n    val popularCount = trainDefault(\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap,\n      data = data\n    )\n    ...\n\n  }\n\n```\n\n#### Working with Spark MLlib's ALS.trainImplicit(....)\n\nMLlib ALS does not support `String` user ID and item ID. `ALS.trainImplicit` thus also assumes int-only `Rating` object. First, you can rename MLlib's Integer-only `Rating` to `MLlibRating` for clarity:\n\n```\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\n```\n\nIn order to use MLlib's ALS algorithm, we need to convert the `viewEvents` into `MLlibRating`. There are two things we need to handle:\n\n1. Map user and item String ID of the ViewEvent into Integer ID, as required by `MLlibRating`.\n2. `ViewEvent` object is an implicit event that does not have an explicit rating value. `ALS.trainImplicit()` supports implicit preference. If the `MLlibRating` has higher rating value, it means higher confidence that the user prefers the item. Hence we can aggregate how many times the user has viewed the item to indicate the confidence level that the user may prefer the item.\n\nYou create a bi-directional map with `BiMap.stringInt` which maps each String record to an Integer index.\n\n```scala\nval userStringIntMap = BiMap.stringInt(data.users.keys)\nval itemStringIntMap = BiMap.stringInt(data.items.keys)\n```\n\nThen convert the user and item String ID in each ViewEvent to Int with these BiMaps. We use default -1 if the user or item String ID couldn't be found in the BiMap and filter out these events with invalid user and item ID later. After filtering, we use `reduceByKey()` to add up all values for the same key (uindex, iindex) and then finally map to `MLlibRating` object. You can find the code inside the function `genMLlibRating()`:\n\n```scala\n\n  def genMLlibRating(\n    userStringIntMap: BiMap[String, Int],\n    itemStringIntMap: BiMap[String, Int],\n    data: PreparedData): RDD[MLlibRating] = {\n\n    val mllibRatings = data.viewEvents\n      .map { r =>\n        // Convert user and item String IDs to Int index for MLlib\n        val uindex = userStringIntMap.getOrElse(r.user, -1)\n        val iindex = itemStringIntMap.getOrElse(r.item, -1)\n\n        if (uindex == -1)\n          logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n            + \" to Int index.\")\n\n        if (iindex == -1)\n          logger.info(s\"Couldn't convert nonexistent item ID ${r.item}\"\n            + \" to Int index.\")\n\n        ((uindex, iindex), 1)\n      }\n      .filter { case ((u, i), v) =>\n        // keep events with valid user and item index\n        (u != -1) && (i != -1)\n      }\n      .reduceByKey(_ + _) // aggregate all view events of same user-item pair\n      .map { case ((u, i), v) =>\n        // MLlibRating requires integer index for user and item\n        MLlibRating(u, i, v)\n      }\n      .cache()\n\n    mllibRatings\n  }\n\n```\n\nNOTE: You can customize this function if you want to convert other events to MLlibRating or need different ways to aggregate the events into MLlibRating.\n\nIn addition to `RDD[MLlibRating]`, `ALS.trainImplicit` takes the following parameters: *rank*, *iterations*, *lambda* and *seed*.\n\nThe values of these parameters are specified in *algorithms* of\nMyECommerceRecommendation/***engine.json***:\n\n```\n{\n  ...\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"appName\": \"MyApp1\",\n        \"unseenOnly\": true,\n        \"seenEvents\": [\"buy\", \"view\"],\n        \"similarEvents\" : [\"view\"]\n        \"rank\": 10,\n        \"numIterations\" : 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n  ...\n}\n```\n\nThe parameters `appName`, `unseenOnly`, `seenEvents` and `similarEvents` are used during `predict()`, which will be explained later.\n\nPredictionIO will automatically loads these values into the constructor `ap`,\nwhich has a corresponding case class `ECommAlgorithmParams`.\n\nThe `seed` parameter is an optional parameter, which is used by MLlib ALS algorithm internally to generate random values. If the `seed` is not specified, current system time would be used and hence each train may produce different results. Specify a fixed value for the `seed` if you want to have deterministic result (For example, when you are testing).\n\n`ALS.trainImplicit()` returns a `MatrixFactorizationModel` model which contains two RDDs: userFeatures and productFeatures. They correspond to the user X latent features matrix and item X latent features matrix, respectively.\n\nIn addition to the latent feature vector, the item properties (e.g. categories) and popular count are also used during `predict()`. Hence, we also save these data along with the feature vector by joining them and then collect the data as local Map. Each item is represented by a `ProductModel` class, which consists of the `item` information, `features` calculated by ALS, and `count` returned by `trainDefault()`.\n\n```scala\n\ncase class ProductModel(\n  item: Item,\n  features: Option[Array[Double]], // features by ALS\n  count: Int // popular count for default score\n)\n\n```\n\n```scala\n    // join item with the trained productFeatures\n    val productFeatures: Map[Int, (Item, Option[Array[Double]])] =\n      items.leftOuterJoin(m.productFeatures).collectAsMap.toMap\n\n    ...\n\n    val productModels: Map[Int, ProductModel] = productFeatures\n      .map { case (index, (item, features)) =>\n        val pm = ProductModel(\n          item = item,\n          features = features,\n          // NOTE: use getOrElse because popularCount may not contain all items.\n          count = popularCount.getOrElse(index, 0)\n        )\n        (index, pm)\n      }\n\n    new ECommModel(\n      rank = m.rank,\n      userFeatures = userFeatures,\n      productModels = productModels,\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap\n    )\n\n```\n\nNote that `leftOuterJoin` is used because the productFeatures returned by ALS may not contain all items.\n\nThe `ECommModel` is defined as the following:\n\n```scala\nclass ECommModel(\n  val rank: Int,\n  val userFeatures: Map[Int, Array[Double]],\n  val productModels: Map[Int, ProductModel],\n  val userStringIntMap: BiMap[String, Int],\n  val itemStringIntMap: BiMap[String, Int]\n) extends Serializable  { ... }\n```\n\nPredictionIO will automatically store the returned model after training, i.e. `ECommModel` in this example.\n\n### predict(...)\n\n`predict` is called when you send a JSON query to\nhttp://localhost:8000/queries.json. PredictionIO converts the query, such as `{ \"user\": \"u1\", \"num\": 4 }` to the `Query` class you defined previously.\n\nWe can use the userFeatures and productFeatures stored in ECommModel to calculate the scores of items for the user.\n\nThis template also supports additional business logic features, such as filtering items by categories, recommending items in the white list, excluding items in the black list, recommend unseen items only, and exclude unavailable items defined in constraint event.\n\nThe `predict()` function does the following:\n\n1. Convert the item in query's whiteList from string ID to integer index\n2. Get a list seen items by the user (defined by parameter `seenEvents`)\n3. Get the latest unavailableItems which is used to exclude unavailable items for all users\n4. Combine query's blackList, seenItems, and unavailableItems into a final black list of items to be excluded from recommendation.\n5. Get the user feature vector from the ECommModel.\n6. If there is feature vector for the user, recommend top N items based on the user feature and product features.\n7. If there is no feature vector for the user, use the recent items acted by the user (defined by `similarEvents` parameter) to recommend similar items.\n8. If there is no recent `similarEvents` available for the user, popular items are then recommended (added in template version 0.4.0).\n\nOnly items which satisfy the `isCandidate()` condition will be recommended. By default, the item can be recommended if:\n\n- it belongs to one of the categories defined in query.\n- it is one of the white list items if white list is defined.\n- it is not in the black list.\n\nINFO: You can easily modify `isCandidate()` checking or related logic if you have different requirements or condition to determine if an item is a candidate item to be recommended.\n\n```scala\n\n  def predict(model: ECommModel, query: Query): PredictedResult = {\n\n    val userFeatures = model.userFeatures\n    val productFeatures = model.productFeatures\n\n    // convert whiteList's string ID to integer index\n    val whiteList: Option[Set[Int]] = query.whiteList.map( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n\n    // generate final blackList based on additional constraints\n    val finalBlackList: Set[Int] = genBlackList(query = query)\n      // convert seen Items list from String ID to integer Index\n      .flatMap(x => model.itemStringIntMap.get(x))\n\n    // look up user feature from model\n    val userFeature =\n      model.userStringIntMap.get(query.user).map { userIndex =>\n        userFeatures.get(userIndex)\n      }\n      // flatten Option[Option[Array[Double]]] to Option[Array[Double]]\n      .flatten\n\n    val topScores: Array[(Int, Double)] = if (userFeature.isDefined) {\n      // the user has feature vector\n      predictKnownUser(\n        userFeature = userFeature.get,\n        productModels = productModels,\n        query = query,\n        whiteList = whiteList,\n        blackList = finalBlackList\n      )\n    } else {\n      // the user doesn't have feature vector.\n      // For example, new user is created after model is trained.\n      logger.info(s\"No userFeature found for user ${query.user}.\")\n\n      // check if the user has recent events on some items\n      val recentItems: Set[String] = getRecentItems(query)\n      val recentList: Set[Int] = recentItems.flatMap (x =>\n        model.itemStringIntMap.get(x))\n\n      val recentFeatures: Vector[Array[Double]] = recentList.toVector\n        // productModels may not contain the requested item\n        .map { i =>\n          productModels.get(i).flatMap { pm => pm.features }\n        }.flatten\n\n      if (recentFeatures.isEmpty) {\n        logger.info(s\"No features vector for recent items ${recentItems}.\")\n        predictDefault(\n          productModels = productModels,\n          query = query,\n          whiteList = whiteList,\n          blackList = finalBlackList\n        )\n      } else {\n        predictSimilar(\n          recentFeatures = recentFeatures,\n          productModels = productModels,\n          query = query,\n          whiteList = whiteList,\n          blackList = finalBlackList\n        )\n      }\n    }\n\n    ...\n  }\n```\n\nNote that the item IDs in top N results are the `Int` indices. You map them back to `String` with `itemIntStringMap` before they are returned.\n\n```scala\n  val itemScores = topScores.map { case (i, s) =>\n    new ItemScore(\n      // convert item int index back to string ID\n      item = model.itemIntStringMap(i),\n      score = s\n    )\n  }\n\n  new PredictedResult(itemScores)\n```\n\nPredictionIO passes the returned `PredictedResult` object to *Serving*.\n\n## Serving\n\nThe `serve` method of class `Serving` processes predicted result. It is also\nresponsible for combining multiple predicted results into one if you have more\nthan one predictive model. *Serving* then returns the final predicted result.\nPredictionIO will convert it to a JSON response automatically.\n\nIn MyECommerceRecommendation/src/main/scala/***Serving.scala***,\n\n```scala\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n```\n\nWhen you send a JSON query to http://localhost:8000/queries.json,\n`PredictedResult` from all models will be passed to `serve` as a sequence, i.e.\n`Seq[PredictedResult]`.\n\n> An engine can train multiple models if you specify more than one Algorithm\ncomponent in `object RecommendationEngine` inside ***Engine.scala***. Since only\none `ECommAlgorithm` is implemented by default, this `Seq` contains one element.\n"
  },
  {
    "path": "docs/manual/source/templates/ecommercerecommendation/how-to.html.md",
    "content": "---\ntitle: How-To (E-Commerce Recommendation)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nHere are the pages that show you how you can customize the E-Commerce Recommendation engine template.\n\n- [Train with Rate Event](/templates/ecommercerecommendation/train-with-rate-event/)\n- [Adjust Score](/templates/ecommercerecommendation/adjust-score/)\n"
  },
  {
    "path": "docs/manual/source/templates/ecommercerecommendation/quickstart.html.md.erb",
    "content": "---\ntitle: Quick Start - E-Commerce Recommendation Engine Template\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Overview\n\nThis engine template provides personalized recommendation for e-commerce applications with the following features by default:\n\n- Exclude out-of-stock items\n- Provide recommendation to new users who sign up after the model is trained\n- Recommend unseen items only (configurable)\n- Recommend popular items if no information about the user is available (added in template version v0.4.0)\n\nWARNING: This template requires PredictionIO version >= 0.9.0\n\n\n## Usage\n\n### Event Data Requirements\n\nBy default, this template takes the following data from Event Server:\n\n- Users' *view* events\n- Users' *buy* events\n- Items' with *categories* properties\n- Constraint *unavailableItems* set events\n\nINFO: This template can easily be customized to consider more user events such as *rate* and *like*.\n\nThe *view* events are used as Training Data to train the model. The algorithm has a parameter *unseenOnly*; when this parameter is set to true, the engine would recommend unseen items only. You can specify a list of events which are considered as *seen* events with the algorithm parameter *seenEvents*. The default values are *view* and *buy* events, which means that the engine by default recommends un-viewed and un-bought items only. You can also define your own events which are considered as *seen*.\n\nThe constraint *unavailableItems* set events are used to exclude a list of unavailable items (such as out of stock) for all users in real time.\n\n### Input Query\n\n- UserID\n- Num of items to be recommended\n- List of white-listed item categories (optional)\n- List of white-listed ItemIds (optional)\n- List of black-listed ItemIds (optional)\n\nThe template also supports black-list and whitelist. If a whitelist is provided, the engine will include only those products in the recommendation.\nLikewise, if a blacklist is provided, the engine will exclude those products in the recommendation.\n\n### Output PredictedResult\n\n- A ranked list of recommended itemIDs\n\n\n## 1. Install and Run PredictionIO\n\n<%= partial 'shared/quickstart/install' %>\n\n## 2. Create a new Engine from an Engine Template\n\n<%= partial 'shared/quickstart/create_engine', locals: { engine_name: 'MyECommerceRecommendation', template_name: 'E-Commerce Recommendation Engine Template', template_repo: 'apache/predictionio-template-ecom-recommender' } %>\n\n## 3. Generate an App ID and Access Key\n\n<%= partial 'shared/quickstart/create_app' %>\n\n## 4. Collecting Data\n\nNext, let's collect training data for this Engine. By default,\nthe E-Commerce Recommendation Engine Template supports 2 types of entities and 2 events: **user** and\n**item**; events **view** and **buy**. An item has the **categories** property, which is a list of category names (String). A user can view and buy an item. The special **constraint** entity with entityId **unavailableItems** defines a list of unavailable items and is taken into account in realtime during serving.\n\nIn summary, this template requires '$set' user event, '$set' item event, user-view-item events, user-buy-item event and '$set' constraint event.\n\nINFO: This template can easily be customized to consider other user-to-item events.\n\n<%= partial 'shared/quickstart/collect_data' %>\n\nFor example, when a new user with id \"u0\" is created in your app on time `2014-11-02T09:39:45.618-08:00` (current time will be used if eventTime is not specified), you can send a `$set` event for this user. To send this event, run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u0\",\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\n\nclient = predictionio.EventClient(\n  access_key=<ACCESS KEY>,\n  url=<URL OF EVENTSERVER>,\n  threads=5,\n  qsize=500\n)\n\n# Create a new user\n\nclient.create_event(\n  event=\"$set\",\n  entity_type=\"user\",\n  entity_id=<USER_ID>\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EventClient;\n\n$client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// Create a new user\n$client->createEvent(array(\n  'event' => '$set',\n  'entityType' => 'user',\n  'entityId' => <USER ID>\n));\n\n// Create a new item or set existing item's categories\n$client->createEvent(array(\n  'event' => '$set',\n  'entityType' => 'item',\n  'entityId' => <ITEM ID>\n  'properties' => array('categories' => array('<CATEGORY_1>', '<CATEGORY_2>'))\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Create a client object.\nclient = PredictionIO::EventClient.new(<ACCESS KEY>, <URL OF EVENTSERVER>)\n\n# Create a new user\nclient.create_event(\n  '$set',\n  'user',\n  <USER ID>\n)\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\nimport org.apache.predictionio.Event;\nimport org.apache.predictionio.EventClient;\n\nimport com.google.common.collect.ImmutableList;\n\nEventClient client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// Create a new user\nEvent userEvent = new Event()\n  .event(\"$set\")\n  .entityType(\"user\")\n  .entityId(<USER_ID>);\nclient.createEvent(userEvent);\n```\n  </div>\n</div>\n\nWhen a new item \"i0\" is created in your app on time `2014-11-02T09:39:45.618-08:00` (current time will be used if eventTime is not specified), you can send a `$set` event for the item. Note that the item is set with categories properties: `\"c1\"` and `\"c2\"`. Run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"item\",\n  \"entityId\" : \"i0\",\n  \"properties\" : {\n    \"categories\" : [\"c1\", \"c2\"]\n  }\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# Create a new item or set existing item's categories\n\nclient.create_event(\n  event=\"$set\",\n  entity_type=\"item\",\n  entity_id=item_id,\n  properties={\n    \"categories\" : [\"<CATEGORY_1>\", \"<CATEGORY_2>\"]\n  }\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\n// Create a new item or set existing item's categories\n$client->createEvent(array(\n  'event' => '$set',\n  'entityType' => 'item',\n  'entityId' => <ITEM ID>\n  'properties' => array('categories' => array('<CATEGORY_1>', '<CATEGORY_2>'))\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Create a new item or set existing item's categories\nclient.create_event(\n  '$set',\n  'item',\n  <ITEM ID>, {\n    'properties' => { 'categories' => ['<CATEGORY_1>', '<CATEGORY_2>'] }\n  }\n)\n\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n// Create a new item or set existing item's categories\nEvent itemEvent = new Event()\n  .event(\"$set\")\n  .entityType(\"item\")\n  .entityId(<ITEM_ID>)\n  .property(\"categories\", ImmutableList.of(\"<CATEGORY_1>\", \"<CATEGORY_2>\"));\nclient.createEvent(itemEvent)\n```\n  </div>\n</div>\n\nThe properties of the `user` and `item` can be set, unset, or delete by special events **$set**, **$unset** and **$delete**. Please refer to [Event API](/datacollection/eventapi/#note-about-properties) for more details of using these events.\n\nWhen the user \"u0\" view item \"i0\" on time `2014-11-10T12:34:56.123-08:00` (current time will be used if eventTime is not specified), you can send a view event. Run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"view\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u0\",\n  \"targetEntityType\" : \"item\",\n  \"targetEntityId\" : \"i0\",\n  \"eventTime\" : \"2014-11-10T12:34:56.123-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# A user views an item\n\nclient.create_event(\n  event=\"view\",\n  entity_type=\"user\",\n  entity_id=<USER ID>,\n  target_entity_type=\"item\",\n  target_entity_id=<ITEM ID>\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\n// A user views an item\n$client->createEvent(array(\n   'event' => 'view',\n   'entityType' => 'user',\n   'entityId' => <USER ID>,\n   'targetEntityType' => 'item',\n   'targetEntityId' => <ITEM ID>\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# A user views an item.\nclient.create_event(\n  'view',\n  'user',\n  <USER ID>, {\n    'targetEntityType' => 'item',\n    'targetEntityId' => <ITEM ID>\n  }\n)\n\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n// A user views an item\nEvent viewEvent = new Event()\n    .event(\"view\")\n    .entityType(\"user\")\n    .entityId(<USER_ID>)\n    .targetEntityType(\"item\")\n    .targetEntityId(<ITEM_ID>);\nclient.createEvent(viewEvent);\n```\n  </div>\n</div>\n\nWhen the user \"u0\" buy item \"i0\" on time `2014-11-10T13:00:00.123-08:00` (current time will be used if eventTime is not specified), you can send a view event. Run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"buy\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u0\",\n  \"targetEntityType\" : \"item\",\n  \"targetEntityId\" : \"i0\",\n  \"eventTime\" : \"2014-11-10T13:00:00.123-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# A user buys an item\n\nclient.create_event(\n  event=\"buy\",\n  entity_type=\"user\",\n  entity_id=<USER ID>,\n  target_entity_type=\"item\",\n  target_entity_id=<ITEM ID>\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\n// A user buys an item\n$client->createEvent(array(\n   'event' => 'buy',\n   'entityType' => 'user',\n   'entityId' => <USER ID>,\n   'targetEntityType' => 'item',\n   'targetEntityId' => <ITEM ID>\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# A user buys an item.\nclient.create_event(\n  'buy',\n  'user',\n  <USER ID>, {\n    'targetEntityType' => 'item',\n    'targetEntityId' => <ITEM ID>\n  }\n)\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n// A user buys an item\nEvent viewEvent = new Event()\n    .event(\"buy\")\n    .entityType(\"user\")\n    .entityId(<USER_ID>)\n    .targetEntityType(\"item\")\n    .targetEntityId(<ITEM_ID>);\nclient.createEvent(viewEvent);\n```\n  </div>\n</div>\n\n<%= partial 'shared/quickstart/query_eventserver' %>\n\n### Import More Sample Data\n\n<%= partial 'shared/quickstart/import_sample_data' %>\n\nA Python import script `import_eventserver.py` is provided to import sample data. It imports 10 users (with user ID \"u1\" to \"u10\") and 50 items (with item ID \"i1\" to \"i50\") with some random assigned categories ( with categories \"c1\" to \"c6\"). Each user then randomly view 10 items.\n\n<%= partial 'shared/quickstart/install_python_sdk' %>\n\nMake sure you are under the `MyECommerceRecommendation` directory. Execute the following to import the data:\n\n```\n$ cd MyECommerceRecommendation\n$ python data/import_eventserver.py --access_key $ACCESS_KEY\n```\n\nYou should see the following output:\n\n```\n...\nUser u10 buys item i14\nUser u10 views item i46\nUser u10 buys item i46\nUser u10 views item i30\nUser u10 buys item i30\nUser u10 views item i40\nUser u10 buys item i40\n204 events are imported.\n```\n\n<%= partial 'shared/quickstart/query_eventserver_short' %>\n\n## 5. Deploy the Engine as a Service\n\n<%= partial 'shared/quickstart/deploy_enginejson', locals: { engine_name: 'MyECommerceRecommendation' } %>\n\nWARNING: Note that the \"algorithms\" also has `appName` parameter which you need to modify to match your **App Name** as well:\n\n```\n  ...\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"appName\": \"MyApp1\",\n        ...\n      }\n    }\n  ]\n  ...\n```\n\nNOTE: You may see `appId` in engine.json instead, which means you are using old template. In this case, make sure the `appId` defined in the file match your **App ID**. Alternatively, you can download the latest version of the template or follow our [upgrade instructions](/resources/upgrade/#upgrade-to-0.9.2) to modify the template to use `appName` as parameter.\n\n<%= partial 'shared/quickstart/deploy', locals: { engine_name: 'MyECommerceRecommendation' } %>\n\n## 6. Use the Engine\n\nNow, You can retrieve predicted results. To recommend 4 items to user ID \"u1\". You send this JSON `{ \"user\": \"u1\", \"num\": 4 }` to the deployed engine and it will return a JSON of the recommended items. Simply send a query by making a HTTP request or through the `EngineClient` of an SDK.\n\nWith the deployed engine running, open another terminal and run the following `curl` command or use SDK to send the query:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{ \"user\": \"u1\", \"num\": 4 }' \\\nhttp://localhost:8000/queries.json\n\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint engine_client.send_query({\"user\": \"u1\", \"num\": 4})\n```\n  </div>\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EngineClient;\n\n$client = new EngineClient('http://localhost:8000');\n\n$response = $client->sendQuery(array('user'=> 'i1', 'num'=> 4));\nprint_r($response);\n\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n\n```ruby\n# Create client object.\nclient = PredictionIO::EngineClient.new('http://localhost:8000')\n\n# Query PredictionIO.\nresponse = client.send_query('user' => 'i1', 'num' => 4)\n\nputs response\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n\n```java\nimport com.google.common.collect.ImmutableMap;\nimport com.google.common.collect.ImmutableList;\nimport com.google.gson.JsonObject;\n\nimport org.apache.predictionio.EngineClient;\n\n// create client object\nEngineClient engineClient = new EngineClient(\"http://localhost:8000\");\n\n// query\n\nJsonObject response = engineClient.sendQuery(ImmutableMap.<String, Object>of(\n        \"user\", \"u1\",\n        \"num\",  4\n    ));\n```\n  </div>\n</div>\n\nThe following is sample JSON response:\n\n```\n{\n  \"itemScores\":[\n    {\"item\":\"i4\",\"score\":0.006009267718658978},\n    {\"item\":\"i33\",\"score\":0.005999267822052033},\n    {\"item\":\"i14\",\"score\":0.005261309429391667},\n    {\"item\":\"i3\",\"score\":0.003007015026561692}\n  ]\n}\n```\n\n*MyECommerceRecommendation* is now running.\n\n<%= partial 'shared/quickstart/production' %>\n\n## Setting constraint \"unavailableItems\"\n\nNow let's send a item contraint \"unavailableItems\" (replace accessKey with your Access Key):\n\nNOTE: You can also use SDK to send this event as described in the SDK sample above.\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"constraint\"\n  \"entityId\" : \"unavailableItems\",\n  \"properties\" : {\n    \"items\": [\"i4\", \"i14\", \"i11\"],\n  }\n  \"eventTime\" : \"2015-02-17T02:11:21.934Z\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# Set a list of unavailable items\n\nclient.create_event(\n  event=\"$set\",\n  entity_type=\"constraint\",\n  entity_id=\"unavailableItems\",\n  properties={\n    \"items\" : [\"<ITEM ID1>\", \"<ITEM ID2>\"]\n  }\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\n// Set a list of unavailable items\n$client->createEvent(array(\n  'event' => '$set',\n  'entityType' => 'constraint',\n  'entityId' => 'unavailableItems',\n  'properties' => array('items' => array('<ITEM ID1>', '<ITEM ID2>'))\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Set a list of unavailable items\nclient.create_event(\n  '$set',\n  'constraint',\n  'unavailableItems', {\n    'properties' => { 'items' => ['<ITEM ID1>', '<ITEM ID2>'] }\n  }\n)\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n// Set a list of unavailable items\nEvent itemEvent = new Event()\n  .event(\"$set\")\n  .entityType(\"constraint\")\n  .entityId(\"unavailableItems\")\n  .property(\"items\", ImmutableList.of(\"<ITEM ID1>\", \"<ITEM ID2>\"));\nclient.createEvent(itemEvent)\n```\n  </div>\n</div>\n\n\nTry to get recommendation for user *u1* again, the unavailable items (e.g. i4, i14, i11). won't be recommended anymore:\n\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{\n  \"user\": \"u1\",\n  \"num\": 4,\n  \"blackList\": [\"i21\", \"i26\", \"i40\"]\n}' \\\nhttp://localhost:8000/queries.json\n\n{\"itemScores\":[{\"item\":\"i33\",\"score\":0.005999267822052019},{\"item\":\"i3\",\"score\":0.0030070150265619003},{\"item\":\"i2\",\"score\":0.0028489173099429527},{\"item\":\"i5\",\"score\":0.0028489173099429527}]}\n```\n\nINFO: You should send a full list of unavailable items whenever there is any updates in the list. The latest event is used.\n\nWhen there is no more unavilable items, simply set an empty list. ie.\n\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=zPkr6sBwQoBwBjVHK2hsF9u26L38ARSe19QzkdYentuomCtYSuH0vXP5fq7advo4 \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"constraint\"\n  \"entityId\" : \"unavailableItems\",\n  \"properties\" : {\n    \"items\": [],\n  }\n  \"eventTime\" : \"2015-02-18T02:11:21.934Z\"\n}'\n```\n\n## Advanced Query\n\nIn addition, the Query support the following optional parameters `categories`, `whiteList` and `blackList`.\n\n### Recommend items in selected categories:\n\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{\n  \"user\": \"u1\",\n  \"num\": 4,\n  \"categories\" : [\"c4\", \"c3\"]\n}' \\\nhttp://localhost:8000/queries.json\n\n{\"itemScores\":[{\"item\":\"i4\",\"score\":0.006009267718658978},{\"item\":\"i33\",\"score\":0.005999267822052033},{\"item\":\"i14\",\"score\":0.005261309429391667},{\"item\":\"i2\",\"score\":0.002848917309942939}]}\n```\n\n### Recommend items in the whiteList:\n\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{\n  \"user\": \"u1\",\n  \"num\": 4,\n  \"whiteList\": [\"i1\", \"i2\", \"i3\", \"i21\", \"i22\", \"i23\", \"i24\", \"i25\"]\n}' \\\nhttp://localhost:8000/queries.json\n\n{\"itemScores\":[{\"item\":\"i3\",\"score\":0.003007015026561692},{\"item\":\"i2\",\"score\":0.002848917309942939},{\"item\":\"i23\",\"score\":0.0016857619403278443},{\"item\":\"i25\",\"score\":1.3707548965227745E-4}]}\n```\n\n### Recommend items not in the blackList:\n\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{\n  \"user\": \"u1\",\n  \"num\": 4,\n  \"categories\" : [\"c4\", \"c3\"],\n  \"blackList\": [\"i21\", \"i26\", \"i40\"]\n}' \\\nhttp://localhost:8000/queries.json\n\n{\"itemScores\":[{\"item\":\"i4\",\"score\":0.006009267718658978},{\"item\":\"i33\",\"score\":0.005999267822052033},{\"item\":\"i14\",\"score\":0.005261309429391667},{\"item\":\"i2\",\"score\":0.002848917309942939}]}\n```\n\n#### [Next: DASE Components Explained](/templates/ecommercerecommendation/dase/)\n"
  },
  {
    "path": "docs/manual/source/templates/ecommercerecommendation/train-with-rate-event.html.md.erb",
    "content": "---\ntitle: Train with Rate Event (E-Commerce Recommendation)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis examples demonstrates how to modify E-Commerce Recommendation template to use \"rate\" event as Training Data.\n\nHowever, recent \"view\" event is still used for recommendation for new user (to recommend items similar to what new user just recently viewed) and the returned scores are not predicted rating but a ranked scores for new user.\n\nThis template also supports that the user may rate same item multiple times and latest rating value will be used for training. The modification can be further simplified if the support of this case is not needed.\n\nYou can find the complete modified source code [here](https://github.com/apache/predictionio/tree/develop/examples/scala-parallel-ecommercerecommendation/train-with-rate-event).\n\n\n## Modification\n\n### DataSource.scala\n\nIn DataSource, change `ViewEvent` case class to RateEvent. Add `rating: Double` is added to the RateEvent.\n\nChange\n\n```scala\ncase class ViewEvent(user: String, item: String, t: Long)\n```\n\nto\n\n```scala\n// MODIFIED\ncase class RateEvent(user: String, item: String, rating: Double, t: Long)\n```\n\nModify TrainingData class to use rateEvent\n\n```scala\nclass TrainingData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val rateEvents: RDD[RateEvent], // MODIFIED\n  val buyEvents: RDD[BuyEvent]\n) extends Serializable {\n  override def toString = {\n    s\"users: [${users.count()} (${users.take(2).toList}...)]\" +\n    s\"items: [${items.count()} (${items.take(2).toList}...)]\" +\n    // MODIFIED\n    s\"rateEvents: [${rateEvents.count()}] (${rateEvents.take(2).toList}...)\" +\n    s\"buyEvents: [${buyEvents.count()}] (${buyEvents.take(2).toList}...)\"\n  }\n}\n```\n\nModify `readTraining()` function of `DataSource` to read \"rate\" events (commented with \"// MODIFIED\"). Replace all `ViewEvent` with `RateEvent`. Replace all `viewEventsRDD` with `rateEventsRDD`. Retrieve the rating value from the event properties:\n\n```scala\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n    ...\n\n    val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"rate\", \"buy\")), // MODIFIED\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n      .cache()\n\n    val rateEventsRDD: RDD[RateEvent] = eventsRDD // MODIFIED\n      .filter { event => event.event == \"rate\" } // MODIFIED\n      .map { event =>\n        try {\n          RateEvent( // MODIFIED\n            user = event.entityId,\n            item = event.targetEntityId.get,\n            rating = event.properties.get[Double](\"rating\"), // ADDED\n            t = event.eventTime.getMillis\n          )\n        } catch {\n          case e: Exception =>\n            logger.error(s\"Cannot convert ${event} to RateEvent.\" + // MODIFIED\n              s\" Exception: ${e}.\")\n            throw e\n        }\n      }\n\n    ...\n\n    new TrainingData(\n      users = usersRDD,\n      items = itemsRDD,\n      rateEvents = rateEventsRDD, // MODIFIED\n      buyEvents = buyEventsRDD\n    )\n  }\n```\n\n### Preparator.scala\n\nModify Preparator to pass rateEvents to algorithm as PreparedData (Replace all `ViewEvent` with `RateEvent`. Replace all `viewEvents` with `rateEvents`)\n\nModify Preparator's `parpare()` method:\n\n```scala\n\n  ...\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(\n      users = trainingData.users,\n      items = trainingData.items,\n      rateEvents = trainingData.rateEvents, // MODIFIED\n      buyEvents = trainingData.buyEvents)\n  }\n```\n\nModify `PreparedData` class:\n\n```scala\nclass PreparedData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val rateEvents: RDD[RateEvent], // MODIFIED\n  val buyEvents: RDD[BuyEvent]\n) extends Serializable\n\n```\n\n### ECommAlgorithm.scala\n\nModify `train()` method to train with rate event.\n\n```scala\n\n  def train(sc: SparkContext, data: PreparedData): ECommModel = {\n    require(!data.rateEvents.take(1).isEmpty, // MODIFIED\n      s\"rateEvents in PreparedData cannot be empty.\" + // MODIFIED\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n\n    ...\n  }\n\n  def genMLlibRating(\n    userStringIntMap: BiMap[String, Int],\n    itemStringIntMap: BiMap[String, Int],\n    data: PreparedData): RDD[MLlibRating] = {\n\n    val mllibRatings = data.rateEvents // MODIFIED\n      .map { r =>\n        ...\n\n        ((uindex, iindex), (r.rating, r.t)) // MODIFIED\n      }\n      .filter { case ((u, i), v) =>\n        // keep events with valid user and item index\n        (u != -1) && (i != -1)\n      }\n      .reduceByKey { case (v1, v2) => // MODIFIED\n        // if a user may rate same item with different value at different times,\n        // use the latest value for this case.\n        // Can remove this reduceByKey() if no need to support this case.\n        val (rating1, t1) = v1\n        val (rating2, t2) = v2\n        // keep the latest value\n        if (t1 > t2) v1 else v2\n      }\n      .map { case ((u, i), (rating, t)) => // MODIFIED\n        // MLlibRating requires integer index for user and item\n        MLlibRating(u, i, rating) // MODIFIED\n      }\n      .cache()\n\n    mllibRatings\n  }\n```\n\nModify `train()` method to use `ALS.trainImplicit()`:\n\nChange the following from:\n\n```scala\n    ...\n\n    val m = ALS.trainImplicit(\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      alpha = 1.0,\n      seed = seed)\n    ...\n\n```\n\nto:\n\n```scala\n    ...\n\n    val m = ALS.train( // MODIFIED\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      seed = seed)\n    ...\n\n```\n\nThat's it! Now your engine can train model with rate events.\n"
  },
  {
    "path": "docs/manual/source/templates/index.html.md",
    "content": "---\ntitle: PredictionIO Official Templates\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nPredictionIO's [template gallery](/gallery/template-gallery) offers both\nofficial and community contributed Engine Templates for all kinds of machine\nlearning tasks. You can easily create one or more engines from these templates.\n\nThe section contains more detailed documentation of the official templates for\nbeginner users to get familiar with PredictionIO and examples of how the\ntemplates can be modified for other usages.\n"
  },
  {
    "path": "docs/manual/source/templates/javaecommercerecommendation/dase.html.md.erb",
    "content": "---\ntitle: DASE Components Explained (E-Commerce Recommendation (Java))\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n<%= partial 'shared/dase/dase', locals: { template_name: 'E-Commerce Recommendation Engine Template' } %>\n\n## The Engine Design\n\nAs you can see from the Quick Start, *MyECommerceRecommendation* takes a JSON prediction\nquery, e.g. `{ \"userEntityId\": \"u1\", \"number\": 4 }`, and return a JSON predicted result.\nThe `Query` class defines the format of such **query**:\n\n```java\npublic class Query implements Serializable {\n\n  private final String userEntityId;\n  private final int number;\n  private final Set<String> categories;\n  private final Set<String> whitelist;\n  private final Set<String> blacklist;\n\n  ...\n\n}\n\n```\n\nThe `PredictedResult` and `ItemScore` classes define the format of **predicted result**,\nsuch as\n\n```json\n{\"itemScores\":[\n  {\"itemEntityId\":22,\"score\":4.07},\n  {\"itemEntityId\":62,\"score\":4.05},\n  {\"itemEntityId\":75,\"score\":4.04},\n  {\"itemEntityId\":68,\"score\":3.81}\n]}\n```\n\nwith:\n\n```java\npublic class PredictedResult implements Serializable {\n  private final List<ItemScore> itemScores;\n\n  ...\n}\n\npublic class ItemScore implements Serializable, Comparable<ItemScore> {\n  private final String itemEntityId;\n  private final double score;\n\n  ...\n}\n```\n\nFinally, `RecommendationEngine` is the *Engine Factory* class that defines the\ncomponents this engine will use: Data Source, Data Preparator, Algorithm(s) and\nServing components.\n\n```java\npublic class RecommendationEngine extends EngineFactory {\n\n  @Override\n  public BaseEngine<EmptyParams, Query, PredictedResult, Object> apply() {\n    return new Engine<>(\n      DataSource.class,\n      Preparator.class,\n      Collections.<String, Class<? extends BaseAlgorithm<PreparedData, ?, Query, PredictedResult>>>singletonMap(\"algo\", Algorithm.class),\n      Serving.class\n    );\n  }\n}\n```\n\n### Spark MLlib\n\nThe PredictionIO E-Commerce Recommendation Engine Template integrates Spark's MLlib ALS algorithm under the DASE\narchitecture. We will take a closer look at the DASE code below.\n\nThe MLlib ALS algorithm takes training data of RDD type, i.e. `RDD[Rating]` and train a model, which is a `MatrixFactorizationModel` object.\n\nYou can visit [here](https://spark.apache.org/docs/latest/mllib-collaborative-filtering.html) to learn more about MLlib's ALS collaborative filtering algorithm.\n\n\n## Data\n\nIn the DASE architecture, data is prepared by 2 components sequentially: *DataSource* and *DataPreparator*. They take data\nfrom the data store and prepare them for Algorithm.\n\n### Data Source\n\nIn ***DataSource*** class, the `readTraining`\nmethod reads and selects data from the *Event Store*. It returns `TrainingData`.\n\n```java\n  public TrainingData readTraining(SparkContext sc) {\n\n    // create a JavaPairRDD of (entityID, User)\n    JavaPairRDD<String,User> usersRDD = PJavaEventStore.aggregateProperties(...)\n\n    // create a JavaPairRDD of (entityID, Item)\n    JavaPairRDD<String, Item> itemsRDD = PJavaEventStore.aggregateProperties(...)\n\n    // find all view events\n    JavaRDD<UserItemEvent> viewEventsRDD = PJavaEventStore.find(...)\n\n    // find all buy events\n    JavaRDD<UserItemEvent> buyEventsRDD = PJavaEventStore.find(...)\n\n    return new TrainingData(usersRDD, itemsRDD, viewEventsRDD, buyEventsRDD);\n  }\n```\n\nPredictionIO automatically loads the parameters of *datasource* specified in MyECommerceRecommendation/***engine.json***, including *appName*.\n\nIn ***engine.json***:\n\n```\n{\n  ...\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  ...\n}\n```\n\nIn `readTraining()`, `PJavaEventStore` is an object which provides function to access data that is collected by PredictionIO Event Server.\n\nThis E-Commerce Recommendation Engine Template requires \"user\" and \"item\" entities that are set by events.\n\n`PJavaEventStore.aggregateProperties(...)` aggregates properties of the `user` and `item` that are set, unset, or deleted by special events **$set**, **$unset** and **$delete**. Please refer to [Event API](/datacollection/eventapi/#note-about-properties) for more details of using these events.\n\nThe following code aggregates the properties of `user` and then map each result to a `User` object.\n\n```java\n\n JavaPairRDD<String,User> usersRDD = PJavaEventStore.aggregateProperties(\n   dsp.getAppName(),\n   \"user\",\n   OptionHelper.<String>none(),\n   OptionHelper.<DateTime>none(),\n   OptionHelper.<DateTime>none(),\n   OptionHelper.<List<String>>none(),\n   sc)\n   .mapToPair(new PairFunction<Tuple2<String, PropertyMap>, String, User>() {\n     @Override\n     public Tuple2<String, User> call(Tuple2<String, PropertyMap> entityIdProperty) throws Exception {\n       Set<String> keys = JavaConversions$.MODULE$.setAsJavaSet(entityIdProperty._2().keySet());\n       Map<String, String> properties = new HashMap<>();\n       for (String key : keys) {\n         properties.put(key, entityIdProperty._2().get(key, String.class));\n       }\n\n       User user = new User(entityIdProperty._1(), ImmutableMap.copyOf(properties));\n\n       return new Tuple2<>(user.getEntityId(), user);\n     }\n   });\n\n\n```\nIn the template, `User` object is a placeholder for you to customize and expand.\n\n\nSimilarly, the following code aggregates `item` properties and then map each result to an `Item` object. By default, this template assumes each item has an optional property `categories`, which is a list of String.\n\n```java\n  JavaPairRDD<String, Item> itemsRDD = PJavaEventStore.aggregateProperties(\n    dsp.getAppName(),\n    \"item\",\n    OptionHelper.<String>none(),\n    OptionHelper.<DateTime>none(),\n    OptionHelper.<DateTime>none(),\n    OptionHelper.<List<String>>none(),\n    sc)\n    .mapToPair(new PairFunction<Tuple2<String, PropertyMap>, String, Item>() {\n       @Override\n       public Tuple2<String, Item> call(Tuple2<String, PropertyMap> entityIdProperty) throws Exception {\n         List<String> categories = entityIdProperty._2().getStringList(\"categories\");\n         Item item = new Item(entityIdProperty._1(), ImmutableSet.copyOf(categories));\n\n         return new Tuple2<>(item.getEntityId(), item);\n       }\n    });\n\n```\n\n`PJavaEventStore.find(...)` specifies the events that you want to read. In this case, \"user view item\" and \"user buy item\" events are read\n\n```java\n\n  JavaRDD<UserItemEvent> viewEventsRDD = PJavaEventStore.find(\n    dsp.getAppName(),\n    OptionHelper.<String>none(),\n    OptionHelper.<DateTime>none(),\n    OptionHelper.<DateTime>none(),\n    OptionHelper.some(\"user\"),\n    OptionHelper.<String>none(),\n    OptionHelper.some(Collections.singletonList(\"view\")),\n    OptionHelper.<Option<String>>none(),\n    OptionHelper.<Option<String>>none(),\n    sc)\n    .map(new Function<Event, UserItemEvent>() {\n       @Override\n       public UserItemEvent call(Event event) throws Exception {\n         return new UserItemEvent(event.entityId(), event.targetEntityId().get(), event.eventTime().getMillis(), UserItemEventType.VIEW);\n       }\n    });\n\n```\n\nSimilarly, we read buy events from Event Server.\n\n\n```java\n\n  JavaRDD<UserItemEvent> viewEventsRDD = PJavaEventStore.find(\n    dsp.getAppName(),\n    OptionHelper.<String>none(),\n    OptionHelper.<DateTime>none(),\n    OptionHelper.<DateTime>none(),\n    OptionHelper.some(\"user\"),\n    OptionHelper.<String>none(),\n    OptionHelper.some(Collections.singletonList(\"buy\")),\n    OptionHelper.<Option<String>>none(),\n    OptionHelper.<Option<String>>none(),\n    sc)\n    .map(new Function<Event, UserItemEvent>() {\n       @Override\n       public UserItemEvent call(Event event) throws Exception {\n         return new UserItemEvent(event.entityId(), event.targetEntityId().get(), event.eventTime().getMillis(), UserItemEventType.BUY);\n       }\n    });\n\n```\n\nINFO: For flexibility, this template is designed to support user ID and item ID in String.\n\n`TrainingData` contains Java RDD of `User`, `Item`, `View Event`, and `Buy Event`. The class definition of `TrainingData` is:\n\n```java\npublic class TrainingData implements Serializable, SanityCheck {\n    private final JavaPairRDD<String, User> users;\n    private final JavaPairRDD<String, Item> items;\n    private final JavaRDD<UserItemEvent> viewEvents;\n    private final JavaRDD<UserItemEvent> buyEvents;\n\n    ...\n}\n\n```\n\nPredictionIO then passes the returned `TrainingData` object to *Data Preparator*.\n\nNOTE: You could modify the DataSource to read events other than the default **view** or **buy**.\n\n### Data Preparator\n\nIn ***Preparator***, the `prepare` method\ntakes `TrainingData` as its input and performs any\nnecessary feature selection and data processing tasks. At the end, it returns\n`PreparedData` which should contain the data *Algorithm* needs.\n\nBy default, `prepare` simply includes the unprocessed `TrainingData` in `PreparedData`:\n\n```java\npublic class Preparator extends PJavaPreparator<TrainingData, PreparedData> {\n\n  @Override\n  public PreparedData prepare(SparkContext sc, TrainingData trainingData) {\n    return new PreparedData(trainingData);\n  }\n}\n\n```\n\nPredictionIO passes the returned `PreparedData` object to Algorithm's `train` function.\n\n## Algorithm\n\nIn the ***Algorithm*** class, the two methods of\ninterest are `train` and `predict`. `train` is responsible for\ntraining the predictive model; `predict` is\nresponsible for using this model to make a prediction.\n\n\n### Algorithm parameters\n\nThe algorithm takes the following parameters, as defined by the `AlgorithmParams` class:\n\n```java\npublic class AlgorithmParams implements Params{\n    private final long seed;\n    private final int rank;\n    private final int iteration;\n    private final double lambda;\n    private final String appName;\n    private final List<String> similarItemEvents;\n    private final boolean unseenOnly;\n    private final List<String> seenItemEvents;\n\n    ...\n}\n```\n\nParameter description:\n\n- **appName**: Your App name. Events defined by \"seenItemEvents\" and \"similarItemEvents\" will be read from this app during `predict`.\n- **unseenOnly**: true or false. Set to true if you want to recommend unseen items only. Seen items are defined by *seenItemEvents* which mean if the user has these events on the items, then it's treated as *seen*.\n- **seenItemEvents**: A list of user-to-item events which will be treated as *seen* events. Used when *unseenOnly* is set to true.\n- **similarItemEvents**: A list of user-item-item events which will be used to find similar items to the items which the user has performed these events on.\n- **rank**: Parameter of the MLlib ALS algorithm. Number of latent features.\n- **iteration**: Parameter of the MLlib ALS algorithm. Number of iterations.\n- **lambda**: Regularization parameter of the MLlib ALS algorithm.\n- **seed**: A random seed of the MLlib ALS algorithm.\n\n### train(...)\n\n`train` is called when you run **pio train**. This is where MLlib ALS algorithm,\ni.e. `ALS.trainImplicit()`, is used to train a predictive model. In addition, we also count the number of items being bought for each item as default model which will be used when there is no ALS model available or other useful information about the user is available during `predict`.\n\n```java\n\n  public Model train(SparkContext sc, PreparedData preparedData) {\n\n    ...\n\n  MatrixFactorizationModel matrixFactorizationModel = ALS.trainImplicit(JavaRDD.toRDD(ratings), ap.getRank(), ap.getIteration(), ap.getLambda(), -1, 1.0, ap.getSeed());\n\n    ...\n\n  }\n\n```\n\n#### Working with Spark MLlib's ALS.trainImplicit(....)\n\nMLlib ALS algorithm does not support `String` user ID and item ID. `ALS.trainImplicit` thus also assumes int-only `Rating` object. A view event is an implicit event that does not have an explicit rating value. `ALS.trainImplicit()` supports implicit preference. If the `Rating` has higher rating value, it means higher confidence that the user prefers the item. Hence we can aggregate how many times the user has viewed the item to indicate the confidence level that the user may prefer the item.\n\nHere are the steps to use MLlib ALS algorithm.\n\n1. Map user and item string ID of the view event into integer ID, as required by `Rating`.\n2. Filter out the events with invalid user or item ID.\n3. Use `reduceByKey()` to add up all values for events with the same user-item combination.\n4. Create a `Rating` object using user index, item index, and summed up score.\n\n```java\n\nJavaRDD<Rating> ratings = data.getViewEvents().mapToPair(new PairFunction<UserItemEvent, Tuple2<Integer, Integer>, Integer>() {\n  @Override\n  public Tuple2<Tuple2<Integer, Integer>, Integer> call(UserItemEvent viewEvent) throws Exception {\n    Integer userIndex = userIndexMap.get(viewEvent.getUser());\n    Integer itemIndex = itemIndexMap.get(viewEvent.getItem());\n\n    return (userIndex == null || itemIndex == null) ? null : new Tuple2<>(new Tuple2<>(userIndex, itemIndex), 1);\n  }\n}).filter(new Function<Tuple2<Tuple2<Integer, Integer>, Integer>, Boolean>() {\n  @Override\n  public Boolean call(Tuple2<Tuple2<Integer, Integer>, Integer> element) throws Exception {\n    return (element != null);\n  }\n}).reduceByKey(new Function2<Integer, Integer, Integer>() {\n  @Override\n  public Integer call(Integer integer, Integer integer2) throws Exception {\n    return integer + integer2;\n  }\n}).map(new Function<Tuple2<Tuple2<Integer, Integer>, Integer>, Rating>() {\n  @Override\n  public Rating call(Tuple2<Tuple2<Integer, Integer>, Integer> userItemCount) throws Exception {\n    return new Rating(userItemCount._1()._1(), userItemCount._1()._2(), userItemCount._2().doubleValue());\n  }\n});\n\n\n```\n\nIn addition to `RDD[Rating]`, `ALS.trainImplicit` takes the following parameters: *rank*, *iterations*, *lambda* and *seed*.\n\nThe values of these parameters are specified in *algorithms* section of ***engine.json***:\n\n```\n{\n  ...\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"appName\": \"MyApp1\",\n        \"unseenOnly\": true,\n        \"seenItemEvents\": [\"buy\", \"view\"],\n        \"similarItemEvents\" : [\"view\"]\n        \"rank\": 10,\n        \"iteration\" : 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n  ...\n}\n```\n\nThe parameters `appName`, `unseenOnly`, `seenItemEvents` and `similarItemEvents` are used during `predict()`, which will be explained later.\n\nPredictionIO will automatically loads these values into the `AlgorithmParams` field of the `Algorithm`.\n\nThe `seed` parameter is used by MLlib ALS algorithm internally to generate random values. Specify a fixed value for the `seed` if you want to have deterministic result (For example, when you are testing).\n\n`ALS.trainImplicit()` returns a `MatrixFactorizationModel` model which contains two RDDs: userFeatures and productFeatures. They correspond to the user X latent features matrix and item X latent features matrix, respectively.\n\nIn addition to the latent feature vector, the item properties (e.g. categories) and popular count are also used during `predict()`. Hence, we also save these data along with the feature vector.\n\nPredictionIO will store the returned model after training.\n\n### predict(...)\n\n`predict` is called when you send a JSON query to\nhttp://localhost:8000/queries.json. PredictionIO converts the query, such as `{ \"userEntityId\": \"u1\", \"number\": 4 }` to the `Query` class we defined previously.\n\nWe can use the user features and item features stored in the model to calculate the scores of items for the user.\n\nThis template also supports additional business logic features, such as filtering items by categories, recommending items in the whitelist, excluding items in the blacklist, recommend unseen items only, and exclude unavailable items defined in constraint event.\n\nThe `predict()` function does the following:\n\n1. Get the user feature vector from the model.\n2. If there is feature vector for the user, recommend top N items based on the user feature and item features.\n3. If there is no feature vector for the user, use the recent items acted on by the user (defined by `similarItemEvents` parameter) to recommend similar items.\n4. If there is no recent `similarItemEvents` available for the user, popular items are recommended.\n\nOnly items which satisfy the following conditions will be recommended. By default, an item will be recommended if:\n\n- it belongs to one of the categories defined in query.\n- it is one of the whitelisted items if a whitelist is defined.\n- it is not on the blacklist.\n- it is available.\n\nINFO: You can easily modify `validScores()` if you have different requirements or conditions to determine if an item should be recommended.\n\nPredictionIO passes the returned `PredictedResult` object to *Serving*.\n\n## Serving\n\nThe `serve` method of class `Serving` processes predicted result. It is also\nresponsible for combining multiple predicted results into one if you have more\nthan one predictive model. *Serving* then returns the final predicted result.\nPredictionIO will convert it to a JSON response automatically.\n\n```java\npublic class Serving extends LJavaServing<Query, PredictedResult> {\n\n  @Override\n  public PredictedResult serve(Query query, Seq<PredictedResult> predictions) {\n    return predictions.head();\n  }\n}\n\n```\n\nWhen you send a JSON query to http://localhost:8000/queries.json,\n`PredictedResult` from all models will be passed to `serve` as a sequence, i.e.\n`Seq<PredictedResult>`.\n\n> An engine can train multiple models if you specify more than one Algorithm\ncomponent in `RecommendationEngine`. Since we only have one algorithm, this `Seq` contains one element.\n"
  },
  {
    "path": "docs/manual/source/templates/javaecommercerecommendation/quickstart.html.md.erb",
    "content": "---\ntitle: Quick Start - E-Commerce Recommendation Engine Template (Java)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Overview\n\nThis engine template provides personalized recommendation for e-commerce applications with the following features by default:\n\n- Exclude out-of-stock items\n- Provide recommendation to new users who sign up after the model is trained\n- Recommend unseen items only (configurable)\n- Recommend popular items if no information about the user is available\n\nWARNING: This template requires PredictionIO version >= 0.9.3\n\n\n## Usage\n\n### Event Data Requirements\n\nBy default, this template takes the following data from Event Server:\n\n- Users' *view* events\n- Users' *buy* events\n- Items with *categories* properties\n- Constraint *unavailableItems* set events\n\nINFO: This template can easily be customized to consider more user events such as *rate* and *like*.\n\nThe *view* events are used as Training Data to train the model. The algorithm has a parameter *unseenOnly*; when this parameter is set to true, the engine would recommend unseen items only. You can specify a list of events which are considered as *seen* events with the algorithm parameter *seenItemEvents*. The default values are *view* and *buy* events, which means that the engine by default recommends un-viewed and un-bought items only. You can also define your own events which are considered as *seen*.\n\nThe constraint *unavailableItems* set events are used to exclude a list of unavailable items (such as out of stock) for all users in real time.\n\n### Input Query\n\n- User entity ID\n- Number of items to be recommended\n- List of whitelisted item categories (optional)\n- List of whitelisted item entity IDs (optional)\n- List of blacklisted item entity IDs (optional)\n\nThis template also supports blacklist and whitelist. If a whitelist is provided, the engine will include only those products in the recommendation.\nLikewise, if a blacklist is provided, the engine will exclude those products in the recommendation.\n\n### Output PredictedResult\n\n- A ranked list of recommended item entity IDs\n\n\n## 1. Install and Run PredictionIO\n\n<%= partial 'shared/quickstart/install' %>\n\n## 2. Create a new Engine from an Engine Template\n\n<%= partial 'shared/quickstart/create_engine', locals: { engine_name: 'MyECommerceRecommendation', template_name: 'E-Commerce Recommendation Engine Template', template_repo: 'apache/predictionio-template-java-ecom-recommender' } %>\n\n## 3. Generate an App ID and Access Key\n\n<%= partial 'shared/quickstart/create_app' %>\n\n## 4. Collecting Data\n\nNext, let's collect training data for this Engine. By default,\nthe E-Commerce Recommendation Engine Template supports 2 types of entities and 2 events: **user** and\n**item**; events **view** and **buy**. An item has the **categories** property, which is a list of category names (String). A user can view and buy an item. The specical **constraint** entiy with entityId **unavailableItems** defines a list of unavailable items and is taken into account in realtime during serving.\n\nIn summary, this template requires '$set' user event, '$set' item event, user-view-item events, user-buy-item event and '$set' constraint event.\n\nINFO: This template can easily be customized to consider other user-to-item events.\n\n<%= partial 'shared/quickstart/collect_data' %>\n\n\nFor example, when a new user with id \"u0\" is created in your app on time `2014-11-02T09:39:45.618-08:00` (current time will be used if eventTime is not specified), you can send a `$set` event for this user. To send this event, run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u0\",\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\n\nclient = predictionio.EventClient(\n  access_key=<ACCESS KEY>,\n  url=<URL OF EVENTSERVER>,\n  threads=5,\n  qsize=500\n)\n\n# Create a new user\n\nclient.create_event(\n  event=\"$set\",\n  entity_type=\"user\",\n  entity_id=<USER_ID>\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EventClient;\n\n$client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// Create a new user\n$client->createEvent(array(\n  'event' => '$set',\n  'entityType' => 'user',\n  'entityId' => <USER ID>\n));\n\n// Create a new item or set existing item's categories\n$client->createEvent(array(\n  'event' => '$set',\n  'entityType' => 'item',\n  'entityId' => <ITEM ID>\n  'properties' => array('categories' => array('<CATEGORY_1>', '<CATEGORY_2>'))\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Create a client object.\nclient = PredictionIO::EventClient.new(<ACCESS KEY>, <URL OF EVENTSERVER>)\n\n# Create a new user\nclient.create_event(\n  '$set',\n  'user',\n  <USER ID>\n)\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\nimport org.apache.predictionio.Event;\nimport org.apache.predictionio.EventClient;\n\nimport com.google.common.collect.ImmutableList;\n\nEventClient client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// Create a new user\nEvent userEvent = new Event()\n  .event(\"$set\")\n  .entityType(\"user\")\n  .entityId(<USER_ID>);\nclient.createEvent(userEvent);\n```\n  </div>\n</div>\n\nWhen a new item \"i0\" is created in your app on time `2014-11-02T09:39:45.618-08:00` (current time will be used if eventTime is not specified), you can send a `$set` event for the item. Note that the item is set with categories properties: `\"c1\"` and `\"c2\"`. Run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"item\",\n  \"entityId\" : \"i0\",\n  \"properties\" : {\n    \"categories\" : [\"c1\", \"c2\"]\n  }\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# Create a new item or set existing item's categories\n\nclient.create_event(\n  event=\"$set\",\n  entity_type=\"item\",\n  entity_id=item_id,\n  properties={\n    \"categories\" : [\"<CATEGORY_1>\", \"<CATEGORY_2>\"]\n  }\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\n// Create a new item or set existing item's categories\n$client->createEvent(array(\n  'event' => '$set',\n  'entityType' => 'item',\n  'entityId' => <ITEM ID>\n  'properties' => array('categories' => array('<CATEGORY_1>', '<CATEGORY_2>'))\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Create a new item or set existing item's categories\nclient.create_event(\n  '$set',\n  'item',\n  <ITEM ID>, {\n    'properties' => { 'categories' => ['<CATEGORY_1>', '<CATEGORY_2>'] }\n  }\n)\n\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n// Create a new item or set existing item's categories\nEvent itemEvent = new Event()\n  .event(\"$set\")\n  .entityType(\"item\")\n  .entityId(<ITEM_ID>)\n  .property(\"categories\", ImmutableList.of(\"<CATEGORY_1>\", \"<CATEGORY_2>\"));\nclient.createEvent(itemEvent)\n```\n  </div>\n</div>\n\nThe properties of the `user` and `item` can be set, unset, or delete by special events **$set**, **$unset** and **$delete**. Please refer to [Event API](/datacollection/eventapi/#note-about-properties) for more details of using these events.\n\nWhen the user \"u0\" view item \"i0\" on time `2014-11-10T12:34:56.123-08:00` (current time will be used if eventTime is not specified), you can send a view event. Run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"view\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u0\",\n  \"targetEntityType\" : \"item\",\n  \"targetEntityId\" : \"i0\",\n  \"eventTime\" : \"2014-11-10T12:34:56.123-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# A user views an item\n\nclient.create_event(\n  event=\"view\",\n  entity_type=\"user\",\n  entity_id=<USER ID>,\n  target_entity_type=\"item\",\n  target_entity_id=<ITEM ID>\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\n// A user views an item\n$client->createEvent(array(\n   'event' => 'view',\n   'entityType' => 'user',\n   'entityId' => <USER ID>,\n   'targetEntityType' => 'item',\n   'targetEntityId' => <ITEM ID>\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# A user views an item.\nclient.create_event(\n  'view',\n  'user',\n  <USER ID>, {\n    'targetEntityType' => 'item',\n    'targetEntityId' => <ITEM ID>\n  }\n)\n\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n// A user views an item\nEvent viewEvent = new Event()\n    .event(\"view\")\n    .entityType(\"user\")\n    .entityId(<USER_ID>)\n    .targetEntityType(\"item\")\n    .targetEntityId(<ITEM_ID>);\nclient.createEvent(viewEvent);\n```\n  </div>\n</div>\n\nWhen the user \"u0\" buy item \"i0\" on time `2014-11-10T13:00:00.123-08:00` (current time will be used if eventTime is not specified), you can send a view event. Run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"buy\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u0\",\n  \"targetEntityType\" : \"item\",\n  \"targetEntityId\" : \"i0\",\n  \"eventTime\" : \"2014-11-10T13:00:00.123-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# A user buys an item\n\nclient.create_event(\n  event=\"buy\",\n  entity_type=\"user\",\n  entity_id=<USER ID>,\n  target_entity_type=\"item\",\n  target_entity_id=<ITEM ID>\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\n// A user buys an item\n$client->createEvent(array(\n   'event' => 'buy',\n   'entityType' => 'user',\n   'entityId' => <USER ID>,\n   'targetEntityType' => 'item',\n   'targetEntityId' => <ITEM ID>\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# A user buys an item.\nclient.create_event(\n  'buy',\n  'user',\n  <USER ID>, {\n    'targetEntityType' => 'item',\n    'targetEntityId' => <ITEM ID>\n  }\n)\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n// A user buys an item\nEvent viewEvent = new Event()\n    .event(\"buy\")\n    .entityType(\"user\")\n    .entityId(<USER_ID>)\n    .targetEntityType(\"item\")\n    .targetEntityId(<ITEM_ID>);\nclient.createEvent(viewEvent);\n```\n  </div>\n</div>\n\n<%= partial 'shared/quickstart/query_eventserver' %>\n\n\n### Import More Sample Data\n\n<%= partial 'shared/quickstart/import_sample_data' %>\n\nA Python import script `import_eventserver.py` is provided to import sample data. It imports 10 users (with user ID \"u1\" to \"u10\") and 50 items (with item ID \"i1\" to \"i50\") with some random assigned categories ( with categories \"c1\" to \"c6\"). Each user then randomly view 10 items.\n\n<%= partial 'shared/quickstart/install_python_sdk' %>\n\nMake sure you are under the `MyECommerceRecommendation` directory. Execute the following to import the data:\n\n```\n$ cd MyECommerceRecommendation\n$ python data/import_eventserver.py --access_key $ACCESS_KEY\n```\n\nYou should see the following output:\n\n```\n...\nUser u10 buys item i14\nUser u10 views item i46\nUser u10 buys item i46\nUser u10 views item i30\nUser u10 buys item i30\nUser u10 views item i40\nUser u10 buys item i40\n204 events are imported.\n```\n\n<%= partial 'shared/quickstart/query_eventserver_short' %>\n\n## 5. Deploy the Engine as a Service\n\n<%= partial 'shared/quickstart/deploy_enginejson', locals: { engine_name: 'MyECommerceRecommendation' } %>\n\nWARNING: Note that the \"algorithms\" also has `appName` parameter which you need to modify to match your **App Name** as well:\n\n```\n  ...\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"appName\": \"MyApp1\",\n        ...\n      }\n    }\n  ]\n  ...\n```\n\n<%= partial 'shared/quickstart/deploy', locals: { engine_name: 'MyECommerceRecommendation' } %>\n\n## 6. Use the Engine\n\nNow, You can retrieve predicted results. To recommend 4 items to user ID \"u1\". You send this JSON `{ \"userEntityId\": \"u1\", \"number\": 4 }` to the deployed engine and it will return a JSON of the recommended items. Simply send a query by making a HTTP request or through the `EngineClient` of an SDK.\n\nWith the deployed engine running, open another terminal and run the following `curl` command or use SDK to send the query:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{ \"userEntityId\": \"u1\", \"number\": 4 }' \\\nhttp://localhost:8000/queries.json\n\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint engine_client.send_query({\"userEntityId\": \"u1\", \"number\": 4})\n```\n  </div>\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EngineClient;\n\n$client = new EngineClient('http://localhost:8000');\n\n$response = $client->sendQuery(array('userEntityId'=> 'i1', 'number'=> 4));\nprint_r($response);\n\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n\n```ruby\n# Create client object.\nclient = PredictionIO::EngineClient.new('http://localhost:8000')\n\n# Query PredictionIO.\nresponse = client.send_query('userEntityId' => 'i1', 'number' => 4)\n\nputs response\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n\n```java\nimport com.google.common.collect.ImmutableMap;\nimport com.google.common.collect.ImmutableList;\nimport com.google.gson.JsonObject;\n\nimport org.apache.predictionio.EngineClient;\n\n// create client object\nEngineClient engineClient = new EngineClient(\"http://localhost:8000\");\n\n// query\n\nJsonObject response = engineClient.sendQuery(ImmutableMap.<String, Object>of(\n        \"userEntityId\", \"u1\",\n        \"number\",  4\n    ));\n```\n  </div>\n</div>\n\nThe following is sample JSON response:\n\n```\n{\n  \"itemScores\":[\n    {\"itemEntityId\":\"i4\",\"score\":0.006009267718658978},\n    {\"itemEntityId\":\"i33\",\"score\":0.005999267822052033},\n    {\"itemEntityId\":\"i14\",\"score\":0.005261309429391667},\n    {\"itemEntityId\":\"i3\",\"score\":0.003007015026561692}\n  ]\n}\n```\n\n*MyECommerceRecommendation* is now running.\n\n<%= partial 'shared/quickstart/production' %>\n\n## Setting constraint \"unavailableItems\"\n\nNow let's send an item constraint \"unavailableItems\" (replace accessKey with your Access Key):\n\nNOTE: You can also use SDK to send this event as described in the SDK sample above.\n\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"constraint\"\n  \"entityId\" : \"unavailableItems\",\n  \"properties\" : {\n    \"items\": [\"i4\", \"i14\", \"i11\"],\n  }\n  \"eventTime\" : \"2015-02-17T02:11:21.934Z\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# Set a list of unavailable items\n\nclient.create_event(\n  event=\"$set\",\n  entity_type=\"constraint\",\n  entity_id=\"unavailableItems\",\n  properties={\n    \"items\" : [\"<ITEM ID1>\", \"<ITEM ID2>\"]\n  }\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\n// Set a list of unavailable items\n$client->createEvent(array(\n  'event' => '$set',\n  'entityType' => 'constraint',\n  'entityId' => 'unavailableItems',\n  'properties' => array('items' => array('<ITEM ID1>', '<ITEM ID2>'))\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Set a list of unavailable items\nclient.create_event(\n  '$set',\n  'constraint',\n  'unavailableItems', {\n    'properties' => { 'items' => ['<ITEM ID1>', '<ITEM ID2>'] }\n  }\n)\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n// Set a list of unavailable items\nEvent itemEvent = new Event()\n  .event(\"$set\")\n  .entityType(\"constraint\")\n  .entityId(\"unavailableItems\")\n  .property(\"items\", ImmutableList.of(\"<ITEM ID1>\", \"<ITEM ID2>\"));\nclient.createEvent(itemEvent)\n```\n  </div>\n</div>\n\n\nTry to get recommendation for user *u1* again, the unavaiable items (e.g. i4, i14, i11). won't be recommended anymore:\n\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{\n  \"userEntityId\": \"u1\",\n  \"number\": 4,\n  \"blackList\": [\"i21\", \"i26\", \"i40\"]\n}' \\\nhttp://localhost:8000/queries.json\n\n{\"itemScores\":[{\"itemEntityId\":\"i33\",\"score\":0.005999267822052019},{\"itemEntityId\":\"i3\",\"score\":0.0030070150265619003},{\"itemEntityId\":\"i2\",\"score\":0.0028489173099429527},{\"itemEntityId\":\"i5\",\"score\":0.0028489173099429527}]}\n```\n\nINFO: You should send a full list of unavailable items whenever there is any updates in the list. The latest event is used.\n\nWhen there is no more unavilable items, simply set an empty list. ie.\n\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=zPkr6sBwQoBwBjVHK2hsF9u26L38ARSe19QzkdYentuomCtYSuH0vXP5fq7advo4 \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"constraint\"\n  \"entityId\" : \"unavailableItems\",\n  \"properties\" : {\n    \"items\": [],\n  }\n  \"eventTime\" : \"2015-02-18T02:11:21.934Z\"\n}'\n```\n\n## Advanced Query\n\nIn addition, the Query support the following optional parameters `categories`, `whitelist` and `blacklist`.\n\n### Recommend items in selected categories:\n\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{\n  \"userEntityId\": \"u1\",\n  \"number\": 4,\n  \"categories\" : [\"c4\", \"c3\"]\n}' \\\nhttp://localhost:8000/queries.json\n\n{\"itemScores\":[{\"itemEntityId\":\"i4\",\"score\":0.006009267718658978},{\"itemEntityId\":\"i33\",\"score\":0.005999267822052033},{\"itemEntityId\":\"i14\",\"score\":0.005261309429391667},{\"itemEntityId\":\"i2\",\"score\":0.002848917309942939}]}\n```\n\n### Recommend items in the whitelist:\n\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{\n  \"userEntityId\": \"u1\",\n  \"number\": 4,\n  \"whitelist\": [\"i1\", \"i2\", \"i3\", \"i21\", \"i22\", \"i23\", \"i24\", \"i25\"]\n}' \\\nhttp://localhost:8000/queries.json\n\n{\"itemScores\":[{\"itemEntityId\":\"i3\",\"score\":0.003007015026561692},{\"itemEntityId\":\"i2\",\"score\":0.002848917309942939},{\"itemEntityId\":\"i23\",\"score\":0.0016857619403278443},{\"itemEntityId\":\"i25\",\"score\":1.3707548965227745E-4}]}\n```\n\n### Recommend items not in the blackList:\n\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{\n  \"userEntityId\": \"u1\",\n  \"number\": 4,\n  \"categories\" : [\"c4\", \"c3\"],\n  \"blacklist\": [\"i21\", \"i26\", \"i40\"]\n}' \\\nhttp://localhost:8000/queries.json\n\n{\"itemScores\":[{\"itemEntityId\":\"i4\",\"score\":0.006009267718658978},{\"itemEntityId\":\"i33\",\"score\":0.005999267822052033},{\"itemEntityId\":\"i14\",\"score\":0.005261309429391667},{\"itemEntityId\":\"i2\",\"score\":0.002848917309942939}]}\n```\n\n#### [Next: DASE Components Explained](../dase/)\n"
  },
  {
    "path": "docs/manual/source/templates/leadscoring/dase.html.md.erb",
    "content": "---\ntitle: DASE Components Explained (Lead Scoring)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n<%= partial 'shared/dase/dase', locals: { template_name: 'Lead Scoring Engine Template' } %>\n\n## The Engine Design\n\nAs you can see from the Quick Start, *MyLeadScoring* takes a JSON prediction\nquery, e.g. '{ \"landingPageId\" : \"example.com/page9\", \"referrerId\" : \"referrer10.com\", \"browser\": \"Firefox\" }' , and return a JSON predicted result.\nIn MyLeadScoring/src/main/scala/***Engine.scala***, the `Query` case class\ndefines the format of such **query**:\n\n```scala\ncase class Query(\n  landingPageId: String,\n  referrerId: String,\n  browser: String\n) extends Serializable\n```\n\nThe `PredictedResult` case class defines the format of **predicted result**,\nsuch as\n\n```json\n{\"score\":0.7466666666666667}\n```\n\nwith:\n\n```scala\ncase class PredictedResult(\n  score: Double\n) extends Serializable\n```\n\nFinally, `LeadScoringEngine` is the *Engine Factory* that defines the\ncomponents this engine will use: Data Source, Data Preparator, Algorithm(s) and\nServing components.\n\n```scala\nobject LeadScoringEngine extends IEngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"randomforest\" -> classOf[RFAlgorithm]),\n      classOf[Serving])\n  }\n}\n```\n\nEach DASE component of the `LeadScoringEngine` will be explained below.\n\nBy default, Spark's MLlib [RandomForest algorithm](https://spark.apache.org/docs/latest/mllib-ensembles.html#random-forests) is used.\n\n## Data\n\nIn the DASE architecture, data is prepared by 2 components sequentially: *DataSource* and *DataPreparator*. They take data\nfrom the data store and prepare them for Algorithm.\n\n### Data Source\n\nIn MyLeadScoring/src/main/scala/***DataSource.scala***, the `readTraining`\nmethod of class `DataSource` reads and selects data from the *Event Store*\n(data store of the *Event Server*). It returns `TrainingData`.\n\n```scala\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    val viewPage: RDD[(String, Event)] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(Seq(\"view\")),\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"page\")))(sc)\n      // PEventStore.find() returns RDD[Event]\n      .map { event =>\n        val sessionId = try {\n          event.properties.get[String](\"sessionId\")\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Cannot get sessionId from event ${event}. ${e}.\")\n            throw e\n          }\n        }\n        (sessionId, event)\n      }\n\n    val buyItem: RDD[(String, Event)] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(Seq(\"buy\")),\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n      // PEventStore.find() returns RDD[Event]\n      .map { event =>\n        val sessionId = try {\n          event.properties.get[String](\"sessionId\")\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Cannot get sessionId from event ${event}. ${e}.\")\n            throw e\n          }\n        }\n        (sessionId, event)\n      }\n\n    val session: RDD[Session] = viewPage.cogroup(buyItem)\n      .map { case (sessionId, (viewIter, buyIter)) =>\n        // the first view event of the session is the landing event\n        val landing = viewIter.reduce{ (a, b) =>\n          if (a.eventTime.isBefore(b.eventTime)) a else b\n        }\n        // any buy after landing\n        val buy = buyIter.filter( b => b.eventTime.isAfter(landing.eventTime))\n          .nonEmpty\n\n        try {\n          new Session(\n            landingPageId = landing.targetEntityId.get,\n            referrerId = landing.properties.getOrElse[String](\"referrerId\", \"\"),\n            browser = landing.properties.getOrElse[String](\"browser\", \"\"),\n            buy = buy\n          )\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Cannot create session data from ${landing}. ${e}.\")\n            throw e\n          }\n        }\n      }.cache()\n\n    new TrainingData(session)\n  }\n}\n```\n\nPredictionIO automatically loads the parameters of *datasource* specified in MyLeadScoring/***engine.json***, including *appName*, to `dsp`.\n\nIn ***engine.json***:\n\n```\n{\n  ...\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  ...\n}\n```\n\nIn `readTraining()`, `PEventStore` is an object which provides function to access data that is collected by PredictionIO Event Server.\n\nThis Lead Scoring Engine Template requires \"view\" and \"buy\" events with `sessionId` in event property.\n\n`PEventStore.find(...)` specifies the events that you want to read. In this case, \"user view page\" and \"user buy item\" events are read and then each is mapped to tuple of (sessionId, event). The event are then \"cogrouped\" by sessionId to find out the information in the session, such as first page view (landing page view), and whether the user converts (buy event), to create a RDD of Session as TrainingData:\n\n```scala\ncase class Session(\n  landingPageId: String,\n  referrerId: String,\n  browser: String,\n  buy: Boolean // buy or not\n) extends Serializable\n\nclass TrainingData(\n  val session: RDD[Session]\n) extends Serializable\n\n```\n\nPredictionIO then passes the returned `TrainingData` object to *Data Preparator*.\n\nNOTE: You could modify the DataSource to read other event other than the default **buy** if the definition of conversion is not \"buy item\" event.\n\n### Data Preparator\n\nIn MyLeadScoring/src/main/scala/***Preparator.scala***, the `prepare` method\nof class `Preparator` takes `TrainingData` as its input and performs any\nnecessary feature selection and data processing tasks. At the end, it returns\n`PreparedData` which should contain the data *Algorithm* needs.\n\nIn this template, `prepare` will select the features from the Session object and convert them to the data required by the MLlib's RandomForest algorithm.\n\nThe `PreparedData` is defined as:\n\n```scala\nclass PreparedData(\n  val labeledPoints: RDD[LabeledPoint],\n  val featureIndex: Map[String, Int],\n  val featureCategoricalIntMap: Map[String, Map[String, Int]]\n) extends Serializable\n```\n\nThe `LabeledPoint` class is defined in Spark MLlib and it's required for the RandomForest Algorithm. The `featureIndex` is a Map of feature name to the position index in the feature vector. `featureCategoricalIntMap` is a Map of categorical feature name to the Map of categorical value map for this feature.\n\nBy default, the feature used for classification is \"landingPage\", \"referrer\" and \"browser\". Since these features contain categorical values, we need to create a map of categorical values to the integer values for the algorithm to use.\n\nNOTE: You can customize the template to use other features.\n\nFor example, if the feature \"landingPage\" can be any of the following values: \"page1\", \"page2\", \"page3\", \"page4\". We can create a categorical Int value Map, such as:\n\n```scala\nMap(\n  \"page1\" -> 0,\n  \"page2\" -> 1,\n  \"page3\" -> 2,\n  \"page4\" -> 3\n)\n```\n\nInstead of manually create such Map, a helper method `createCategoricalIntMap()` is defined in **Prepraator.scala** for this purpose.\n\nEach `labeledPoint` is a label and a feature vector. The element index of the vector for the corresponding feature is defined by `featureIndex` Map. By default, it's defined as\n\n```scala\nval featureIndex = Map(\n  \"landingPage\" -> 0,\n  \"referrer\" -> 1,\n  \"browser\" -> 2\n)\n```\n\nwhich means that index 0 of the feature vector is the \"landingPage\" feature, index 1 is \"referrer\" feature, and so on.\n\nThe `prepare()` of the `Preparator` class first finds out all possible categorical values for the features and create a categorical Int map. Then it converts to the `Session` object to the `LabeledPoint` by creating the feature vector and the label. In this case, the label is 1 if there is any conversion and 0 if there is no conversion:\n\n```scala\nclass Preparator extends PPreparator[TrainingData, PreparedData] {\n\n  ...\n\n  def prepare(sc: SparkContext, td: TrainingData): PreparedData = {\n\n    // find out all values of the each feature\n    val landingValues = td.session.map(_.landingPageId).distinct.collect\n    val referrerValues = td.session.map(_.referrerId).distinct.collect\n    val browserValues = td.session.map(_.browser).distinct.collect\n\n    // map feature value to integer for each categorical feature\n    val featureCategoricalIntMap = Map(\n      \"landingPage\" -> createCategoricalIntMap(landingValues, \"\"),\n      \"referrer\" -> createCategoricalIntMap(referrerValues, \"\"),\n      \"browser\" -> createCategoricalIntMap(browserValues, \"\")\n    )\n    // index position of each feature in the vector\n    val featureIndex = Map(\n      \"landingPage\" -> 0,\n      \"referrer\" -> 1,\n      \"browser\" -> 2\n    )\n\n    // inject some default to cover default cases\n    val defaults = Seq(\n      new Session(\n        landingPageId = \"\",\n        referrerId = \"\",\n        browser = \"\",\n        buy = false\n      ),\n      new Session(\n        landingPageId = \"\",\n        referrerId = \"\",\n        browser = \"\",\n        buy = true\n      ))\n\n    val defaultRDD = sc.parallelize(defaults)\n    val sessionRDD = td.session.union(defaultRDD)\n\n    val labeledPoints: RDD[LabeledPoint] = sessionRDD.map { session =>\n      logger.debug(s\"${session}\")\n      val label = if (session.buy) 1.0 else 0.0\n\n      val feature = new Array[Double](featureIndex.size)\n      feature(featureIndex(\"landingPage\")) =\n        featureCategoricalIntMap(\"landingPage\")(session.landingPageId).toDouble\n      feature(featureIndex(\"referrer\")) =\n        featureCategoricalIntMap(\"referrer\")(session.referrerId).toDouble\n      feature(featureIndex(\"browser\")) =\n        featureCategoricalIntMap(\"browser\")(session.browser).toDouble\n\n      LabeledPoint(label, Vectors.dense(feature))\n    }.cache()\n\n    logger.debug(s\"labelelPoints count: ${labeledPoints.count()}\")\n    new PreparedData(\n      labeledPoints = labeledPoints,\n      featureIndex = featureIndex,\n      featureCategoricalIntMap = featureCategoricalIntMap)\n  }\n}\n```\n\nPredictionIO passes the returned `PreparedData` object to Algorithm's `train` function.\n\n## Algorithm\n\nIn MyLeadScoring/src/main/scala/***ALSAlgorithm.scala***, the two methods of\nthe algorithm class are `train` and `predict`. `train` is responsible for\ntraining the predictive model; `predict` is\nresponsible for using this model to make prediction.\n\nThe default algorithm is Spark's MLlib [RandomForest algorithm](https://spark.apache.org/docs/latest/mllib-ensembles.html#random-forests).\n\n### Algorithm parameters\n\nThe Algorithm takes the following parameters, as defined by the `AlgorithmParams` case class:\n\n```scala\ncase class RFAlgorithmParams(\n  numTrees: Int,\n  featureSubsetStrategy: String,\n  impurity: String,\n  maxDepth: Int,\n  maxBins: Int,\n  seed: Option[Int]\n) extends Params\n```\n\nYou can find more description of the parameters in MLlib's [RandomForest documentation](https://spark.apache.org/docs/latest/mllib-ensembles.html#random-forests) and [Decision Tree documentation](https://spark.apache.org/docs/latest/mllib-decision-tree.html).\n\nThe values of these parameters can be specified in *algorithms* of\nMyLeadScoring/***engine.json***:\n\n```\n{\n  ...\n  \"algorithms\": [\n    {\n      \"name\": \"randomforest\",\n      \"params\": {\n        \"numClasses\": 3,\n        \"numTrees\": 5,\n        \"featureSubsetStrategy\": \"auto\",\n        \"impurity\": \"variance\",\n        \"maxDepth\": 4,\n        \"maxBins\": 100,\n        \"seed\" : 12345\n      }\n    }\n  ]\n  ...\n}\n```\n\nPredictionIO will automatically loads these values into the constructor of the `RFAlgorithm` class.\n\n```scala\nclass RFAlgorithm(val ap: RFAlgorithmParams)\n  extends P2LAlgorithm[PreparedData, RFModel, Query, PredictedResult] {\n    ...\n}\n```\n\n### train(...)\n\n`train` is called when you run **pio train** to train a predictive model.\n\nThe algorithm first generates the `categoricalFeaturesInfo` which is required by the MLlib. This indicates how many categorical values for each categorical features. Then it calls `RandomForest.trainRegressor()` to train a `RandomForestModel` to predict the probability that the user may convert.\n\n```scala\n\n  def train(sc: SparkContext, pd: PreparedData): RFModel = {\n\n    val categoricalFeaturesInfo = pd.featureCategoricalIntMap\n      .map { case (f, m) =>\n        (pd.featureIndex(f), m.size)\n      }\n\n    logger.info(s\"categoricalFeaturesInfo: ${categoricalFeaturesInfo}\")\n\n    // use random seed if seed is not specified\n    val seed = ap.seed.getOrElse(scala.util.Random.nextInt())\n\n    val forestModel: RandomForestModel = RandomForest.trainRegressor(\n      input = pd.labeledPoints,\n      categoricalFeaturesInfo = categoricalFeaturesInfo,\n      numTrees = ap.numTrees,\n      featureSubsetStrategy = ap.featureSubsetStrategy,\n      impurity = ap.impurity,\n      maxDepth = ap.maxDepth,\n      maxBins = ap.maxBins,\n      seed = seed)\n\n    new RFModel(\n      forest = forestModel,\n      featureIndex = pd.featureIndex,\n      featureCategoricalIntMap = pd.featureCategoricalIntMap\n    )\n  }\n\n```\n\nPredictionIO will automatically store the returned model after training.\n\nThe `RFModel` stores the `RandomForestModel`, and the `featureIndex` and `featureCategoricalIntMap`:\n\n```scala\nclass RFModel(\n  val forest: RandomForestModel,\n  val featureIndex: Map[String, Int],\n  val featureCategoricalIntMap: Map[String, Map[String, Int]]\n) extends Serializable {\n  ...\n}\n```\n\n### predict(...)\n\n`predict` is called when you send a JSON query to\nhttp://localhost:8000/queries.json. PredictionIO converts the query, such as '{ \"landingPageId\" : \"example.com/page9\", \"referrerId\" : \"referrer10.com\", \"browser\": \"Firefox\" }' to the `Query` class you defined previously in `Engine.scala`.\n\nThe `predict()` function does the following:\n\n1. convert the Query to the required feature vector input\n2. use the `RandomForestModel` to predict the probability of conversion given this feature.\n\n```scala\n\n  ...\n\n  def predict(model: RFModel, query: Query): PredictedResult = {\n\n    val featureIndex = model.featureIndex\n    val featureCategoricalIntMap = model.featureCategoricalIntMap\n\n    val landingPageId = query.landingPageId\n    val referrerId = query.referrerId\n    val browser = query.browser\n\n    // look up categorical feature Int for landingPageId\n    val landingFeature = lookupCategoricalInt(\n      featureCategoricalIntMap = featureCategoricalIntMap,\n      feature = \"landingPage\",\n      value = landingPageId,\n      default = \"\"\n    ).toDouble\n\n\n    // look up categorical feature Int for referrerId\n    val referrerFeature = lookupCategoricalInt(\n      featureCategoricalIntMap = featureCategoricalIntMap,\n      feature = \"referrer\",\n      value = referrerId,\n      default = \"\"\n    ).toDouble\n\n    // look up categorical feature Int for brwoser\n    val browserFeature = lookupCategoricalInt(\n      featureCategoricalIntMap = featureCategoricalIntMap,\n      feature = \"browser\",\n      value = browser,\n      default = \"\"\n    ).toDouble\n\n    // create feature Array\n    val feature = new Array[Double](model.featureIndex.size)\n    feature(featureIndex(\"landingPage\")) = landingFeature\n    feature(featureIndex(\"referrer\")) = referrerFeature\n    feature(featureIndex(\"browser\")) = browserFeature\n\n    val score = model.forest.predict(Vectors.dense(feature))\n    new PredictedResult(score)\n  }\n\n  ...\n\n```\n\nPredictionIO passes the returned `PredictedResult` object to *Serving*.\n\n## Serving\n\nThe `serve` method of class `Serving` processes predicted result. It is also\nresponsible for combining multiple predicted results into one if you have more\nthan one predictive model. *Serving* then returns the final predicted result.\nPredictionIO will convert it to a JSON response automatically.\n\nIn MyLeadScoring/src/main/scala/***Serving.scala***,\n\n```scala\nclass Serving extends LServing[Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n```\n\nWhen you send a JSON query to http://localhost:8000/queries.json,\n`PredictedResult` from all models will be passed to `serve` as a sequence, i.e.\n`Seq[PredictedResult]`.\n\nNOTE: An engine can train multiple models if you specify more than one Algorithm\ncomponent in `object LeadScoringEngine` inside ***Engine.scala*** and  the corresponding parameters in ***engine.json***. Since only one algorithm is implemented by default, this `Seq` contains one element.\n"
  },
  {
    "path": "docs/manual/source/templates/leadscoring/quickstart.html.md.erb",
    "content": "---\ntitle: Quick Start - Lead Scoring Engine Template\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Overview\n\nThis engine template predicts the probability of an user will convert (conversion event by user) in the current session.\n\nWARNING: This template requires PredictionIO version >= 0.9.0\n\n## Usage\n\n### Event Data Requirements\n\nBy default, the template requires the following events to be collected:\n\n- 'page view' events with session ID\n- the first page view event can optionally provide the browser and referrer ID\n- user 'buy' event with session ID\n\nThe landing page ID, referrer ID, browser information and user's buy event will be used to train the model.\n\nNOTE: You can customize what the \"conversion\" event is. It's \"buy\" item event by default but it can be modified to others such as \"subscribe\".\n\n### Input Query\n\n- Landing page ID\n- Referrer ID\n- Browser\n\n### Output PredictedResult\n\n- score\n\n## 1. Install and Run PredictionIO\n\n<%= partial 'shared/quickstart/install' %>\n\n## 2. Create a new Engine from an Engine Template\n\n<%= partial 'shared/quickstart/create_engine', locals: { engine_name: 'MyLeadScoring', template_name: 'Lead Scoring Engine Template', template_repo: 'PredictionIO/template-scala-parallel-leadscoring' } %>\n\n## 3. Generate an App ID and Access Key\n\n<%= partial 'shared/quickstart/create_app' %>\n\n## 4. Collecting Data\n\nNext, let's collect training data for this Engine. By default,\n Lead Scoring Engine Template supports the following entities: **user**, **page**, and **item**. An user views a page, and buys an item.\n\nNote that a \"sessionId\" property is required to indicate these events happen in the same session. In the first visit of a user, you should specify the optional \"referrral ID\" and \"browser\" information. These are used to determine where the user comes from and the browser information.\n\nIn summary, this template requires user-view-page event and user-buy-item events with the session ID, referrer ID and browser properties.\n\n<%= partial 'shared/quickstart/collect_data' %>\n\nFor example, when an user with ID u0 views a URL page \"example.com/page0\" on time  `2014-11-02T09:39:45.618-08:00`, with session ID \"akdj230fj8ass\" (current time will be used if eventTime is not specified) you can send the event to Event Server. Run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"view\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u0\",\n  \"targetEntityType\" : \"page\",\n  \"targetEntityId\" : \"example.com/page0\",\n  \"properties\" : {\n    \"sessionId\" : \"akdj230fj8ass\",\n    \"referrerId\" : \"referrer0.com\",\n    \"browser\" : \"Firefox\"\n  }\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\n\nclient = predictionio.EventClient(\n  access_key=<ACCESS KEY>,\n  url=<URL OF EVENTSERVER>,\n  threads=5,\n  qsize=500\n)\n\n# A user views a page\n\nclient.create_event(\n  event=\"view\",\n  entity_type=\"user\",\n  entity_id=<USER ID>,\n  target_entity_type=\"page\",\n  target_entity_id=<PAGE ID>,\n  properties = {\n    \"sessionId\": <SESSION ID>, # required\n    \"referrerId\": <REFERRER ID>, # optinal. but should specify this if you have this information when user views the landing page\n    \"browser\": <BROWSER> # optinal. but should specify this if you have this information when user views the landing page\n  }\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EventClient;\n\n$client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// A user views a page\n$client->createEvent(array(\n  'event' => 'view',\n  'entityType' => 'user',\n  'entityId' => <USER ID>,\n  'targetEntityType' => 'page',\n  'targetEntityId' => <PAGE ID>,\n  'properties' => array(\n    'sessionId' => <SESSION ID>,\n    'referrerId' => <REFERRER ID>,\n    'browser' => <BROWSER>\n  )\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Create a client object.\nclient = PredictionIO::EventClient.new(<ACCESS KEY>, <URL OF EVENTSERVER>)\n\n# A user views a page.\nclient.create_event(\n  'view',\n  'user',\n  <USER ID>, {\n    'targetEntityType' => 'page',\n    'targetEntityId' => <PAGE ID>,\n    'properties' => {\n      'sessionId' => <SESSION ID>,\n      'referrerId' => <REFERRER ID>,\n      'browser' => <BROWSER>\n    }\n  }\n)\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\nimport org.apache.predictionio.Event;\nimport org.apache.predictionio.EventClient;\n\nimport com.google.common.collect.ImmutableList;\n\nEventClient client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// A user views a page\nEvent viewEvent = new Event()\n    .event(\"view\")\n    .entityType(\"user\")\n    .entityId(<USER_ID>)\n    .targetEntityType(\"page\")\n    .targetEntityId(<PAGE_ID>);\n    .property(\"sessionId\", \"<SESSION ID>\")\n    .property(\"referrerId\", \"<REFERRER ID>\")\n    .property(\"browser\", \"<BROWSER>\");\nclient.createEvent(viewEvent);\n```\n  </div>\n</div>\n\n\nIn the same browsing session \"akdj230fj8ass\", the user with ID u0 buys an item i0 on time `2014-11-02T09:42:00.123-08:00` (current time will be used if eventTime is not specified), you can send the following buy event. Run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"buy\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u0\",\n  \"targetEntityType\" : \"item\",\n  \"targetEntityId\" : \"i0\",\n  \"properties\" : {\n    \"sessionId\" : \"akdj230fj8ass\"\n  }\n  \"eventTime\" : \"2014-11-02T09:42:00.123-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# A user buys an item\n\nclient.create_event(\n  event=\"buy\",\n  entity_type=\"user\",\n  entity_id=<USER ID>,\n  target_entity_type=\"item\",\n  target_entity_id=<ITEM ID>,\n  properties = {\n    \"sessionId\": <SESSION ID>, # required\n  }\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\n// A user buys an item\n$client->createEvent(array(\n  'event' => 'buy',\n  'entityType' => 'user',\n  'entityId' => <USER ID>,\n  'targetEntityType' => 'item',\n  'targetEntityId' => <ITEM ID>,\n  'properties' => array(\n    'sessionId' => <SESSION ID>\n  )\n));\n\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# A user buys an item.\nclient.create_event(\n  'buy',\n  'user',\n  <USER ID>, {\n    'targetEntityType' => 'item',\n    'targetEntityId' => <ITEM ID>,\n    'properties' => {\n      'sessionId' => <SESSION ID>\n    }\n  }\n)\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n// A user buys an item\nEvent buyEvent = new Event()\n    .event(\"buy\")\n    .entityType(\"user\")\n    .entityId(<USER_ID>)\n    .targetEntityType(\"item\")\n    .targetEntityId(<ITEM_ID>)\n    .property(\"sessionId\", \"<SESSION ID>\");\nclient.createEvent(buyEvent);\n```\n  </div>\n</div>\n\n<%= partial 'shared/quickstart/query_eventserver' %>\n\n### Import More Sample Data\n\n<%= partial 'shared/quickstart/import_sample_data' %>\n\nA Python import script `import_eventserver.py` is provided to import sample data. The sample data includes 50 sessions of events. In each session, a randomly selected user (with user ID \"u1\" to \"u10\") lands on a page (randomly selected from example.com/page1 to example.com/page20) with referrerId (randomly selected from referrer1.com to referrer10.com) and browser information. The user may view more pages, and may or may not buy an item (with item ID \"i1\" to \"i50\").\n\n<%= partial 'shared/quickstart/install_python_sdk' %>\n\nMake sure you are under the `MyLeadScoring` directory. Execute the following to import the data:\n\n```\n$ cd MyLeadScoring\n$ python data/import_eventserver.py --access_key $ACCESS_KEY\n```\n\nYou should see the following output:\n\n```\n...\nUser u8 buys item i13\nsession c347980abdf711e4b135b8e8560679ba\nUser u5 lands on page example.com/page11 referrer referrer4.com browser Firefox\nUser u5 views page example.com/page8\nUser u5 views page example.com/page17\nUser u5 buys item i5\n166 events are imported.\n```\n\n<%= partial 'shared/quickstart/query_eventserver_short' %>\n\n## 5. Deploy the Engine as a Service\n\n<%= partial 'shared/quickstart/deploy_enginejson', locals: { engine_name: 'MyLeadScoring' } %>\n\n<%= partial 'shared/quickstart/deploy', locals: { engine_name: 'MyLeadScoring' } %>\n\n## 6. Use the Engine\n\nNow, You can retrieve the results. When a user lands on your page \"example.com/page9\", with referrer \"referrer10.com\" and browser \"Firefox\", you can get the predicted lead score by sending this JSON '{ \"landingPageId\" : \"example.com/page9\", \"referrerId\" : \"referrer10.com\", \"browser\": \"Firefox\" }' to the deployed engine. The engine will return a JSON with the score.\n\nSimply send a query by making a HTTP request or through the `EngineClient` of an SDK.\n\nWith the deployed engine running, open another terminal and run the following `curl` command or use SDK to send the query:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{\n  \"landingPageId\" : \"example.com/page9\",\n  \"referrerId\" : \"referrer10.com\",\n  \"browser\": \"Firefox\" }' \\\nhttp://localhost:8000/queries.json\n\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint engine_client.send_query({\n  \"landingPageId\" : \"example.com/page9\",\n  \"referrerId\" : \"referrer10.com\",\n  \"browser\": \"Firefox\"\n})\n```\n  </div>\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EngineClient;\n\n$client = new EngineClient('http://localhost:8000');\n\n$response = $client->sendQuery(array(\n  'landingPageId' => 'example.com/page9',\n  'referrerId' => 'referrer10.com',\n  'browser' => 'Firefox'\n));\n\nprint_r($response);\n\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n\n```ruby\n# Create client object.\nclient = PredictionIO::EngineClient.new('http://localhost:8000')\n\n# Query PredictionIO.\nresponse = client.send_query(\n  'landingPageId' => 'example.com/page9',\n  'referrerId' => 'referrer10.com',\n  'browser' => 'Firefox'\n)\n\nputs response\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n\n```java\nimport com.google.common.collect.ImmutableMap;\nimport com.google.common.collect.ImmutableList;\nimport com.google.gson.JsonObject;\n\nimport org.apache.predictionio.EngineClient;\n\n// create client object\nEngineClient engineClient = new EngineClient(\"http://localhost:8000\");\n\n// query\n\nJsonObject response = engineClient.sendQuery(ImmutableMap.<String, Object>of(\n  \"landingPageId\", \"example.com/page9\",\n  \"referrerId\", \"referrer10.com\",\n  \"browser\", \"Firefox\"\n));\n```\n  </div>\n</div>\n\nThe following is sample JSON response:\n\n```\n{\"score\":0.7466666666666667}\n```\n\n*MyLeadScoring* is now running.\n\n<%= partial 'shared/quickstart/production' %>\n\n#### [Next: DASE Components Explained](/templates/leadscoring/dase/)\n"
  },
  {
    "path": "docs/manual/source/templates/productranking/dase.html.md.erb",
    "content": "---\ntitle: DASE Components Explained (Product Ranking)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n<%= partial 'shared/dase/dase', locals: { template_name: 'Product Ranking Template' } %>\n\n## The Engine Design\n\nAs you can see from the Quick Start, *MyProductRanking* takes a JSON prediction\nquery, e.g.  `{ \"user\": \"u2\", \"items\": [\"i1\", \"i3\", \"i10\", \"i2\", \"i5\", \"i31\", \"i9\"] }`, and return a JSON predicted result.\nIn MyProductRanking/src/main/scala/***Engine.scala***, the `Query` case class\ndefines the format of such **query**:\n\n```scala\ncase class Query(\n  user: String,\n  items: List[String]\n) extends Serializable\n```\n\nThe `PredictedResult` case class defines the format of **predicted result**,\nsuch as\n\n```json\n{\"itemScores\":[\n  {\"item\":\"i5\",\"score\":1.0038217983580324},\n  {\"item\":\"i3\",\"score\":0.00598658734782459},\n  {\"item\":\"i2\",\"score\":0.004048103059012265},\n  {\"item\":\"i9\",\"score\":-1.966935819737517E-4},\n  {\"item\":\"i1\",\"score\":-0.0016841195307744916},\n  {\"item\":\"i31\",\"score\":-0.0019770986240634503},\n  {\"item\":\"i10\",\"score\":-0.0031498317618844918}],\n  \"isOriginal\":false}\n```\n\nwith:\n\n```scala\ncase class PredictedResult(\n  itemScores: Array[ItemScore],\n  isOriginal: Boolean // set to true if the items are not ranked at all.\n) extends Serializable\n\ncase class ItemScore(\n  item: String,\n  score: Double\n) extends Serializable\n```\n\nFinally, `ProductRankingEngine` is the *Engine Factory* that defines the\ncomponents this engine will use: Data Source, Data Preparator, Algorithm(s) and\nServing components.\n\n```scala\nobject ProductRankingEngine extends IEngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"als\" -> classOf[ALSAlgorithm]),\n      classOf[Serving])\n  }\n}\n```\n\n### Spark MLlib\n\nThe PredictionIO Product Ranking Engine Template integrates Spark's MLlib ALS algorithm under the DASE\narchitecture. We will take a closer look at the DASE code below.\n\nThe MLlib ALS algorithm takes training data of RDD type, i.e. `RDD[Rating]` and train a model, which is a `MatrixFactorizationModel` object.\n\nYou can visit [here](https://spark.apache.org/docs/latest/mllib-collaborative-filtering.html) to learn more about MLlib's ALS collaborative filtering algorithm.\n\n\n## Data\n\nIn the DASE architecture, data is prepared by 2 components sequentially: *DataSource* and *DataPreparator*. They take data\nfrom the data store and prepare them for Algorithm.\n\n### Data Source\n\nIn MyProductRanking/src/main/scala/***DataSource.scala***, the `readTraining`\nmethod of class `DataSource` reads and selects data from the *Event Store*\n(data store of the *Event Server*). It returns `TrainingData`.\n\n```scala\ncase class DataSourceParams(appName: String) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    // create a RDD of (entityID, User)\n    val usersRDD: RDD[(String, User)] = PEventStore.aggregateProperties(...) ...\n\n    // create a RDD of (entityID, Item)\n    val itemsRDD: RDD[(String, Item)] = PEventStore.aggregateProperties(...) ...\n\n    // get all \"user\" \"view\" \"item\" events\n    val viewEventsRDD: RDD[ViewEvent] = PEventStore.find(...) ...\n\n    new TrainingData(\n      users = usersRDD,\n      items = itemsRDD,\n      viewEvents = viewEventsRDD\n    )\n  }\n}\n```\n\nPredictionIO automatically loads the parameters of *datasource* specified in MyProductRanking/***engine.json***, including *appName*, to `dsp`.\n\nIn ***engine.json***:\n\n```\n{\n  ...\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  ...\n}\n```\n\nIn `readTraining()`, `PEventStore` is an object which provides function to access data that is collected by PredictionIO Event Server.\n\nThis Product Ranking Engine Template requires \"user\" and \"item\" entities that are set by events.\n\n`PEventStore.aggregateProperties(...)` aggregates properties of the `user` and `item` that are set, unset, or delete by special events **$set**, **$unset** and **$delete**. Please refer to [Event API](/datacollection/eventapi/#note-about-properties) for more details of using these events.\n\nThe following code aggregates the properties of `user` and then map each result to a `User()` object.\n\n```scala\n\n    // create a RDD of (entityID, User)\n    val usersRDD: RDD[(String, User)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"user\"\n    )(sc).map { case (entityId, properties) =>\n      val user = try {\n        // placeholder for expanding user properties\n        User()\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties ${properties} of\" +\n            s\" user ${entityId}. Exception: ${e}.\")\n          throw e\n        }\n      }\n      (entityId, user)\n    }.cache()\n\n```\nIn the template, `User()` object is a simple dummy as a placeholder for you to customize and expand.\n\n\nSimilarly, the following code aggregates the properties of `item` and then map each result to a `Item()` object.\n\n```scala\n    // create a RDD of (entityID, Item)\n    val itemsRDD: RDD[(String, Item)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"item\"\n    )(sc).map { case (entityId, properties) =>\n      val item = try {\n        // placeholder for expanding item properties\n        Item()\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties ${properties} of\" +\n            s\" item ${entityId}. Exception: ${e}.\")\n          throw e\n        }\n      }\n      (entityId, item)\n    }.cache()\n```\n\nIn the template, `Item()` object is a simple dummy as a placeholder for you to customize and expand.\n\n`PEventStore.find(...)` specifies the events that you want to read. In this case, \"user view item\" events are read and then each is mapped to a `ViewEvent()` object.\n\n```scala\n\n    // get all \"user\" \"view\" \"item\" events\n    val viewEventsRDD: RDD[ViewEvent] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"view\")),\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n      // PEventStore.find() returns RDD[Event]\n      .map { event =>\n        val viewEvent = try {\n          event.event match {\n            case \"view\" => ViewEvent(\n              user = event.entityId,\n              item = event.targetEntityId.get,\n              t = event.eventTime.getMillis)\n            case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n          }\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Cannot convert ${event} to ViewEvent.\" +\n              s\" Exception: ${e}.\")\n            throw e\n          }\n        }\n        viewEvent\n      }.cache()\n\n```\n\n`ViewEvent` case class is defined as:\n\n```scala\ncase class ViewEvent(user: String, item: String, t: Long)\n```\n\nINFO: For flexibility, this template is designed to support user ID and item ID in String.\n\n`TrainingData` contains an RDD of `User`, `Item` and `ViewEvent` objects. The class definition of `TrainingData` is:\n\n```scala\nclass TrainingData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent]\n) extends Serializable { ... }\n```\n\nPredictionIO then passes the returned `TrainingData` object to *Data Preparator*.\n\n### Data Preparator\n\nIn MyProductRanking/src/main/scala/***Preparator.scala***, the `prepare` method\nof class `Preparator` takes `TrainingData` as its input and performs any\nnecessary feature selection and data processing tasks. At the end, it returns\n`PreparedData` which should contain the data *Algorithm* needs.\n\nBy default, `prepare` simply copies the unprocessed `TrainingData` data to `PreparedData`:\n\n```scala\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(\n      users = trainingData.users,\n      items = trainingData.items,\n      viewEvents = trainingData.viewEvents)\n  }\n}\n\nclass PreparedData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent]\n) extends Serializable\n```\n\nPredictionIO passes the returned `PreparedData` object to Algorithm's `train` function.\n\n## Algorithm\n\nIn MyProductRanking/src/main/scala/***ALSAlgorithm.scala***, the two methods of\nthe algorithm class are `train` and `predict`. `train` is responsible for\ntraining the predictive model;`predict` is\nresponsible for using this model to make prediction.\n\n### train(...)\n\n`train` is called when you run **pio train**. This is where MLlib ALS algorithm,\ni.e. `ALS.trainImplicit()`, is used to train a predictive model.\n\n\n```scala\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n\n    ...\n\n    // create User and item's String ID to integer index BiMap\n    val userStringIntMap = BiMap.stringInt(data.users.keys)\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    val mllibRatings = data.viewEvents\n      .map { r =>\n        // Convert user and item String IDs to Int index for MLlib\n        val uindex = userStringIntMap.getOrElse(r.user, -1)\n        val iindex = itemStringIntMap.getOrElse(r.item, -1)\n\n        if (uindex == -1)\n          logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n            + \" to Int index.\")\n\n        if (iindex == -1)\n          logger.info(s\"Couldn't convert nonexistent item ID ${r.item}\"\n            + \" to Int index.\")\n\n        ((uindex, iindex), 1)\n      }.filter { case ((u, i), v) =>\n        // keep events with valid user and item index\n        (u != -1) && (i != -1)\n      }.reduceByKey(_ + _) // aggregate all view events of same user-item pair\n      .map { case ((u, i), v) =>\n        // MLlibRating requires integer index for user and item\n        MLlibRating(u, i, v)\n      }\n\n    // MLLib ALS cannot handle empty training data.\n    require(!mllibRatings.take(1).isEmpty,\n      s\"mllibRatings cannot be empty.\" +\n      \" Please check if your events contain valid user and item ID.\")\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    val m = ALS.trainImplicit(\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      alpha = 1.0,\n      seed = seed)\n\n    new ALSModel(\n      rank = m.rank,\n      userFeatures = m.userFeatures.collectAsMap.toMap,\n      productFeatures = m.productFeatures.collectAsMap.toMap,\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap\n    )\n  }\n```\n\n#### Working with Spark MLlib's ALS.trainImplicit(....)\n\nMLlib ALS does not support `String` user ID and item ID. `ALS.trainImplicit` thus also assumes int-only `Rating` object. First, you can rename MLlib's Integer-only `Rating` to `MLlibRating` for clarity:\n\n```\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\n```\n\nIn order to use MLlib's ALS algorithm, we need to convert the `viewEvents` into `MLlibRating`. There are two things we need to handle:\n\n1. Map user and item String ID of the ViewEvent into Integer ID, as required by `MLlibRating`.\n2. `ViewEvent` object is an implicit event that does not have an explicit rating value. `ALS.trainImplicit()` supports implicit preference. If the `MLlibRating` has higher rating value, it means higher confidence that the user prefers the item. Hence we can aggregate how many times the user has viewed the item to indicate the confidence level that the user may prefer the item.\n\nYou create a bi-directional map with `BiMap.stringInt` which maps each String record to an Integer index.\n\n```scala\nval userStringIntMap = BiMap.stringInt(data.users.keys)\nval itemStringIntMap = BiMap.stringInt(data.items.keys)\n```\n\nThen convert the user and item String ID in each ViewEvent to Int with these BiMaps. We use default -1 if the user or item String ID couldn't be found in the BiMap and filter out these events with invalid user and item ID later. After filtering, we use `reduceByKey()` to add up all values for the same key (uindex, iindex) and then finally map to `MLlibRating` object.\n\n```scala\n\nval mllibRatings = data.viewEvents\n  .map { r =>\n    // Convert user and item String IDs to Int index for MLlib\n    val uindex = userStringIntMap.getOrElse(r.user, -1)\n    val iindex = itemStringIntMap.getOrElse(r.item, -1)\n\n    if (uindex == -1)\n      logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n        + \" to Int index.\")\n\n    if (iindex == -1)\n      logger.info(s\"Couldn't convert nonexistent item ID ${r.item}\"\n        + \" to Int index.\")\n\n    ((uindex, iindex), 1)\n  }.filter { case ((u, i), v) =>\n    // keep events with valid user and item index\n    (u != -1) && (i != -1)\n  }.reduceByKey(_ + _) // aggregate all view events of same user-item pair\n  .map { case ((u, i), v) =>\n    // MLlibRating requires integer index for user and item\n    MLlibRating(u, i, v)\n  }\n\n```\n\nIn addition to `RDD[MLlibRating]`, `ALS.trainImplicit` takes the following parameters: *rank*, *iterations*, *lambda* and *seed*.\n\nThe values of these parameters are specified in *algorithms* of\nMyProductRanking/***engine.json***:\n\n```\n{\n  ...\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\": 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n  ...\n}\n```\n\nPredictionIO will automatically loads these values into the constructor `ap`,\nwhich has a corresponding case class `ALSAlgorithmParams`:\n\n```scala\ncase class ALSAlgorithmParams(\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]) extends Params\n```\n\nThe `seed` parameter is an optional parameter, which is used by MLlib ALS algorithm internally to generate random values. If the `seed` is not specified, current system time would be used and hence each train may produce different results. Specify a fixed value for the `seed` if you want to have deterministic result (For example, when you are testing).\n\n`ALS.trainImplicit()` then returns a `MatrixFactorizationModel` model which contains two RDDs: userFeatures and productFeatures. They correspond to the user X latent features matrix and item X latent features matrix, respectively. In this case, we will make use of both userFeatures and productFeatures matrix to rank the items for the user. These matrixes are stored as local model. You could see the `ALSModel` class is defined as:\n\n```scala\nclass ALSModel(\n  val rank: Int,\n  val userFeatures: Map[Int, Array[Double]],\n  val productFeatures: Map[Int, Array[Double]],\n  val userStringIntMap: BiMap[String, Int],\n  val itemStringIntMap: BiMap[String, Int]\n) extends Serializable { ... }\n```\n\nPredictionIO will automatically store the returned model, i.e. `ALSModel` in this example.\n\n### predict(...)\n\n`predict` is called when you send a JSON query to\nhttp://localhost:8000/queries.json. PredictionIO converts the query, such as `{ \"user\": \"u2\", \"items\": [\"i1\", \"i3\", \"i10\", \"i2\", \"i5\", \"i31\", \"i9\"] }` to the `Query` class you defined previously.\n\nTo rank the calculated the ranked scores of the items, we first look up the feature vector of this user (if the user exists). Then we look up the feature vectors of the items in query (if the items exist). The score is the dot product of the user and item feature vectors. The items are then sorted by the score.\n\n```scala\n\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n\n    val itemStringIntMap = model.itemStringIntMap\n    val productFeatures = model.productFeatures\n\n    // default itemScores array if items are not ranked at all\n    lazy val notRankedItemScores =\n      query.items.map(i => ItemScore(i, 0)).toArray\n\n    model.userStringIntMap.get(query.user).map { userIndex =>\n      // lookup userFeature for the user\n      model.userFeatures.get(userIndex)\n    }.flatten // flatten Option[Option[Array[Double]]] to Option[Array[Double]]\n    .map { userFeature =>\n      val scores: Vector[Option[Double]] = query.items.toVector\n        .par // convert to parallel collection for parallel lookup\n        .map { iid =>\n          // convert query item id to index\n          val featureOpt: Option[Array[Double]] = itemStringIntMap.get(iid)\n            // productFeatures may not contain the item\n            .map (index => productFeatures.get(index))\n            // flatten Option[Option[Array[Double]]] to Option[Array[Double]]\n            .flatten\n\n          featureOpt.map(f => dotProduct(f, userFeature))\n        }.seq // convert back to sequential collection\n\n      // check if all scores is None (get rid of all None and see if empty)\n      val isAllNone = scores.flatten.isEmpty\n\n      if (isAllNone) {\n        logger.info(s\"No productFeature for all items ${query.items}.\")\n        PredictedResult(\n          itemScores = notRankedItemScores,\n          isOriginal = true\n        )\n      } else {\n        // sort the score\n        val ord = Ordering.by[ItemScore, Double](_.score).reverse\n        val sorted = query.items.zip(scores).map{ case (iid, scoreOpt) =>\n          ItemScore(\n            item = iid,\n            score = scoreOpt.getOrElse[Double](0)\n          )\n        }.sorted(ord).toArray\n\n        PredictedResult(\n          itemScores = sorted,\n          isOriginal = false\n        )\n      }\n    }.getOrElse {\n      logger.info(s\"No userFeature found for user ${query.user}.\")\n      PredictedResult(\n        itemScores = notRankedItemScores,\n        isOriginal = true\n      )\n    }\n\n  }\n\n```\n\nPredictionIO passes the returned `PredictedResult` object to *Serving*.\n\n## Serving\n\nThe `serve` method of class `Serving` processes predicted result. It is also\nresponsible for combining multiple predicted results into one if you have more\nthan one predictive model. *Serving* then returns the final predicted result.\nPredictionIO will convert it to a JSON response automatically.\n\nIn MyProductRanking/src/main/scala/***Serving.scala***,\n\n```scala\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n```\n\nWhen you send a JSON query to http://localhost:8000/queries.json,\n`PredictedResult` from all models will be passed to `serve` as a sequence, i.e.\n`Seq[PredictedResult]`.\n\n> An engine can train multiple models if you specify more than one Algorithm\ncomponent in `object RecommendationEngine` inside ***Engine.scala***. Since only\none `ALSAlgorithm` is implemented by default, this `Seq` contains one element.\n"
  },
  {
    "path": "docs/manual/source/templates/productranking/quickstart.html.md.erb",
    "content": "---\ntitle: Quick Start - Product Ranking Engine Template\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Overview\n\nThis engine template sorts a list of products for a user based on his/her preference. This is ideal for personalizing the display order of product page, catalog, or menu items if you have large number of options. It creates engagement and early conversion by placing products that a user prefers on the top.\n\n## Usage\n\n### Event Data Requirements\n\nBy default, this template takes the following data from Event Server as Training Data:\n\n- user *$set* events\n- item *$set* events\n- Users' *view* item events\n\nINFO: This template can easily be customized to consider more user events such as *buy*, *rate* and *like*.\n\n### Input Query\n\n- UserID\n- List of ItemIDs, which are the products to be ranked\n\n### Output PredictedResult\n\n- a ranked list of recommended itemIDs\n\n\n## 1. Install and Run PredictionIO\n\n<%= partial 'shared/quickstart/install' %>\n\n## 2. Create a new Engine from an Engine Template\n\n<%= partial 'shared/quickstart/create_engine', locals: { engine_name: 'MyProductRanking', template_name: 'Product Ranking Engine Template', template_repo: 'PredictionIO/template-scala-parallel-productranking' } %>\n\n## 3. Generate an App ID and Access Key\n\n<%= partial 'shared/quickstart/create_app' %>\n\n## 4. Collecting Data\n\nNext, let's collect some training data for the app of this Engine. By default,\nthe Product Ranking Engine Template supports 2 types of entities: **user** and\n**item**, and event **view**. A user can view an item. This template requires '$set' user event, '$set' item event and user-view-item events.\n\n<%= partial 'shared/quickstart/collect_data' %>\n\nFor example, when a new user with id \"u0\" is created in your app on time `2014-11-02T09:39:45.618-08:00` (current time will be used if eventTime is not specified), you can send a `$set` event for this user. To send this event, run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u0\",\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\n\nclient = predictionio.EventClient(\n  access_key=<ACCESS KEY>,\n  url=<URL OF EVENTSERVER>,\n  threads=5,\n  qsize=500\n)\n\n# Create a new user\n\nclient.create_event(\n  event=\"$set\",\n  entity_type=\"user\",\n  entity_id=<USER_ID>\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EventClient;\n\n$client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// Create a new user\n$client->createEvent(array(\n  'event' => '$set',\n  'entityType' => 'user',\n  'entityId' => <USER ID>\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Create a client object.\nclient = PredictionIO::EventClient.new(<ACCESS KEY>, <URL OF EVENTSERVER>)\n\n# Create a new user\nclient.create_event(\n  '$set',\n  'user',\n  <USER ID>\n)\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\nimport org.apache.predictionio.Event;\nimport org.apache.predictionio.EventClient;\n\nimport com.google.common.collect.ImmutableList;\n\nEventClient client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// Create a new user\nEvent userEvent = new Event()\n  .event(\"$set\")\n  .entityType(\"user\")\n  .entityId(<USER_ID>);\nclient.createEvent(userEvent);\n```\n  </div>\n</div>\n\n\nWhen a new item \"i0\" is created in your app on time `2014-11-02T09:39:45.618-08:00` (current time will be used if eventTime is not specified), you can send a `$set` event for the item. Run the following `curl` command:\n\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"item\",\n  \"entityId\" : \"i0\",\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# Create a new item or set existing item's categories\n\nclient.create_event(\n  event=\"$set\",\n  entity_type=\"item\",\n  entity_id=<ITEM_ID>\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\n// Create a new item or set existing item's categories\n$client->createEvent(array(\n  'event' => '$set',\n  'entityType' => 'item',\n  'entityId' => <ITEM ID>\n));\n\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Create a new item or set existing item's categories\nclient.create_event(\n  '$set',\n  'item',\n  <ITEM ID>\n)\n\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n// Create a new item or set existing item's categories\nEvent itemEvent = new Event()\n  .event(\"$set\")\n  .entityType(\"item\")\n  .entityId(<ITEM_ID>);\nclient.createEvent(itemEvent)\n```\n  </div>\n</div>\n\n\nWhen the user \"u0\" view item \"i0\" on time `2014-11-10T12:34:56.123-08:00` (current time will be used if eventTime is not specified), you can send a view event. Run the following `curl` command:\n\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"view\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u0\",\n  \"targetEntityType\" : \"item\",\n  \"targetEntityId\" : \"i0\",\n  \"eventTime\" : \"2014-11-10T12:34:56.123-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# A user views an item\n\nclient.create_event(\n  event=\"view\",\n  entity_type=\"user\",\n  entity_id=<USER ID>,\n  target_entity_type=\"item\",\n  target_entity_id=<ITEM ID>\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\n// A user views an item\n$client->createEvent(array(\n   'event' => 'view',\n   'entityType' => 'user',\n   'entityId' => <USER ID>,\n   'targetEntityType' => 'item',\n   'targetEntityId' => <ITEM ID>\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# A user views an item.\nclient.create_event(\n  'view',\n  'user',\n  <USER ID>, {\n    'targetEntityType' => 'item',\n    'targetEntityId' => <ITEM ID>\n  }\n)\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n// A user views an item\nEvent viewEvent = new Event()\n    .event(\"view\")\n    .entityType(\"user\")\n    .entityId(<USER_ID>)\n    .targetEntityType(\"item\")\n    .targetEntityId(<ITEM_ID>);\nclient.createEvent(viewEvent);\n```\n  </div>\n</div>\n\n<%= partial 'shared/quickstart/query_eventserver' %>\n\n### Import More Sample Data\n\n<%= partial 'shared/quickstart/import_sample_data' %>\n\nA Python import script `import_eventserver.py` is provided to import sample data. It imports 10 users (with user ID \"u1\" to \"u10\") and 50 items (with item ID \"i1\" to \"i50\"). Each user then randomly view 10 items.\n\n<%= partial 'shared/quickstart/install_python_sdk' %>\n\nMake sure you are under the `MyProductRanking` directory. Execute the following to import the data:\n\n```\n$ cd MyProductRanking\n$ python data/import_eventserver.py --access_key $ACCESS_KEY\n```\n\nYou should see the following output:\n\n```\n...\nUser u10 views item i13\nUser u10 views item i19\nUser u10 views item i16\nUser u10 views item i39\nUser u10 views item i47\nUser u10 views item i7\n160 events are imported.\n```\n\n<%= partial 'shared/quickstart/query_eventserver_short' %>\n\n## 5. Deploy the Engine as a Service\n\n<%= partial 'shared/quickstart/deploy_enginejson', locals: { engine_name: 'MyProductRanking' } %>\n\n<%= partial 'shared/quickstart/deploy', locals: { engine_name: 'MyProductRanking' } %>\n\n## 6. Use the Engine\n\nNow, You can retrieve predicted results. To rank items with IDs \"i1\", \"i3\", \"i10\", \"i2\", \"i5\", \"i31\", \"i9\" for user \"u2\". You send this JSON `{ \"user\": \"u2\", \"items\": [\"i1\", \"i3\", \"i10\", \"i2\", \"i5\", \"i31\", \"i9\"] }` to the deployed engine and it will return a JSON of the ranked items. Simply send a query by making a HTTP request or through the `EngineClient` of an SDK.\n\nWith the deployed engine running, open another terminal and run the following `curl` command or use SDK to send the query:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{ \"user\": \"u2\", \"items\": [\"i1\", \"i3\", \"i10\", \"i2\", \"i5\", \"i31\", \"i9\"]}' \\\nhttp://localhost:8000/queries.json\n\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint engine_client.send_query({\n  \"user\": \"u2\",\n  \"items\": [\"i1\", \"i3\", \"i10\", \"i2\", \"i5\", \"i31\", \"i9\"]\n})\n```\n  </div>\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EngineClient;\n\n$client = new EngineClient('http://localhost:8000');\n\n$response = $client->sendQuery(array(\n  'user' => 'u2',\n  'items'=> array('i1', 'i3', 'i10', 'i2', 'i5', 'i31', 'i9') ));\nprint_r($response);\n\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n\n```ruby\n# Create client object.\nclient = PredictionIO::EngineClient.new('http://localhost:8000')\n\n# Query PredictionIO.\nresponse = client.send_query(\n  'user' => 'u2',\n  'items' => ['i1', 'i3', 'i10', 'i2', 'i5', 'i31', 'i9'])\n\nputs response\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n\n```java\nimport com.google.common.collect.ImmutableMap;\nimport com.google.common.collect.ImmutableList;\nimport com.google.gson.JsonObject;\n\nimport org.apache.predictionio.EngineClient;\n\n// create client object\nEngineClient engineClient = new EngineClient(\"http://localhost:8000\");\n\n// query\n\nJsonObject response = engineClient.sendQuery(ImmutableMap.<String, Object>of(\n        \"user\",  \"u2\",\n        \"items\", ImmutableList.of(\"i1\", \"i3\", \"i10\", \"i2\", \"i5\", \"i31\", \"i9\")\n    ));\n```\n  </div>\n</div>\n\nThe following is sample JSON response:\n\n```\n{\n  \"itemScores\":[\n    {\"item\":\"i5\",\"score\":1.0038217983580324},\n    {\"item\":\"i3\",\"score\":0.00598658734782459},\n    {\"item\":\"i2\",\"score\":0.004048103059012265},\n    {\"item\":\"i9\",\"score\":-1.966935819737517E-4},\n    {\"item\":\"i1\",\"score\":-0.0016841195307744916},\n    {\"item\":\"i31\",\"score\":-0.0019770986240634503},\n    {\"item\":\"i10\",\"score\":-0.0031498317618844918}\n  ],\n  \"isOriginal\":false\n}\n```\n\n*MyProductRanking* is now running.\n\n<%= partial 'shared/quickstart/production' %>\n\n#### [Next: DASE Components Explained](/templates/productranking/dase/)\n"
  },
  {
    "path": "docs/manual/source/templates/recommendation/batch-evaluator.html.md",
    "content": "---\ntitle: Batch Persistable Evaluator (Recommendation)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis how-to tutorial would explain how you can also use `$pio eval` to persist predicted result for a batch of queries. Please read the [Evaluation](/templates/recommendation/evaluation/) to understand the usage of DataSource's `readEval()` and the Evaluation component first.\n\nWARNING: This tutorial is based on some experimental and developer features, which may be changed in future release.\n\nNOTE: This tutorial is based on Recommendation template version v0.3.2\n\n\n## 1. Modify DataSource\n\nModify DataSource's `readEval()` to generate the batch Queries which you want to run batch predict.\n\n```scala\n\n  override\n  def readEval(sc: SparkContext)\n  : Seq[(TrainingData, EmptyEvaluationInfo, RDD[(Query, ActualResult)])] = {\n    // This function only return one evaluation data set\n\n    // Create your own queries here. Below are provided as examples.\n    // for example, you may get all distinct user id from the trainingData to create the Query\n    val batchQueries: RDD[Query] = sc.parallelize(\n      Seq(\n        Query(user = \"1\", num = 10),\n        Query(user = \"3\", num = 15),\n        Query(user = \"5\", num = 20)\n      )\n    )\n\n    val queryAndActual: RDD[(Query, ActualResult)] = batchQueries.map (q =>\n      // the ActualResult contain dummy empty rating array\n      // because we not interested in Actual result for batch predict purpose.\n      (q, ActualResult(Array()))\n    )\n\n    val evalDataSet = (\n      readTraining(sc),\n      new EmptyEvaluationInfo(),\n      queryAndActual\n    )\n\n    Seq(evalDataSet)\n  }\n\n```\n\nNOTE: Alternatively, you can create a new DataSource extending original DataSource. Then you can add the new one in Engine.scala and specify which one to use in engine.json.\n\n<!-- TODO add more details -->\n\n## 2. Add a new Evaluator\n\nCreate a new file `BatchPersistableEvaluator.scala`. Unlike the `MetricEvaluator`, this Evaluator simply writes the Query and corresponding PredictedResult to the output directory without performing any metrics calculation.\n\nNote that output directory is specified by the variable `outputDir`.\n\n```scala\npackage org.template.recommendation\n\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.Engine\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.EngineParamsGenerator\nimport org.apache.predictionio.controller.Evaluation\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.core.BaseEvaluator\nimport org.apache.predictionio.core.BaseEvaluatorResult\nimport org.apache.predictionio.workflow.WorkflowParams\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\nimport org.json4s.DefaultFormats\nimport org.json4s.Formats\nimport org.json4s.native.Serialization\n\nimport grizzled.slf4j.Logger\n\nclass BatchPersistableEvaluatorResult extends BaseEvaluatorResult {}\n\nclass BatchPersistableEvaluator extends BaseEvaluator[\n  EmptyEvaluationInfo,\n  Query,\n  PredictedResult,\n  ActualResult,\n  BatchPersistableEvaluatorResult] {\n  @transient lazy val logger = Logger[this.type]\n\n  // A helper object for the json4s serialization\n  case class Row(query: Query, predictedResult: PredictedResult)\n    extends Serializable\n\n  def evaluateBase(\n    sc: SparkContext,\n    evaluation: Evaluation,\n    engineEvalDataSet: Seq[(\n      EngineParams,\n      Seq[(EmptyEvaluationInfo, RDD[(Query, PredictedResult, ActualResult)])])],\n    params: WorkflowParams): BatchPersistableEvaluatorResult = {\n\n    /** Extract the first data, as we are only interested in the first\n      * evaluation. It is possible to relax this restriction, and have the\n      * output logic below to write to different directory for different engine\n      * params.\n      */\n\n    require(\n      engineEvalDataSet.size == 1, \"There should be only one engine params\")\n\n    val evalDataSet = engineEvalDataSet.head._2\n\n    require(evalDataSet.size == 1, \"There should be only one RDD[(Q, P, A)]\")\n\n    val qpaRDD = evalDataSet.head._2\n\n    // qpaRDD contains 3 queries we specified in readEval, the corresponding\n    // predictedResults, and the dummy actual result.\n\n    /** The output directory. Better to use absolute path if you run on cluster.\n      * If your database has a Hadoop interface, you can also convert the\n      * following to write to your database in parallel as well.\n      */\n    val outputDir = \"batch_result\"\n\n    logger.info(\"Writing result to disk\")\n    qpaRDD\n      .map { case (q, p, a) => Row(q, p) }\n      .map { row =>\n        // Convert into a json\n        implicit val formats: Formats = DefaultFormats\n        Serialization.write(row)\n      }\n      .saveAsTextFile(outputDir)\n\n    logger.info(s\"Result can be found in $outputDir\")\n\n    new BatchPersistableEvaluatorResult()\n  }\n}\n```\n\n\n## 3. Define Evaluation and EngineParamsGenerator object\n\n\nCreate a new file `BatchEvaluation.scala`. Note that the new `BatchPersistableEvaluator` is used. The `BatchEngineParamsList` specifies the parameters of the engine.\n\nModify the appName parameter in `DataSourceParams` to match your app name.\n\n```scala\npackage org.template.recommendation\n\nimport org.apache.predictionio.controller.EngineParamsGenerator\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.Evaluation\n\nobject BatchEvaluation extends Evaluation {\n  // Define Engine and Evaluator used in Evaluation\n\n  /**\n    * Specify the new BatchPersistableEvaluator.\n    */\n  engineEvaluator =\n    (RecommendationEngine(), new BatchPersistableEvaluator())\n}\n\nobject BatchEngineParamsList extends EngineParamsGenerator {\n  // We only interest in a single engine params.\n  engineParamsList = Seq(\n    EngineParams(\n      dataSourceParams =\n        DataSourceParams(appName = \"INVALID_APP_NAME\", evalParams = None),\n      algorithmParamsList = Seq((\"als\", ALSAlgorithmParams(\n        rank = 10,\n        numIterations = 20,\n        lambda = 0.01,\n        seed = Some(3L))))))\n}\n\n```\n\n## 4. build and run\n\nRun the following command to build\n\n```\n$ pio build\n```\n\nAfter the build is successful, you should see the following outputs:\n\n```\n[INFO] [Console$] Your engine is ready for training.\n```\n\nTo run the `BatchEvaluation` with `BatchEngineParamsList`, run the following command:\n\n```\n$ pio eval org.template.recommendation.BatchEvaluation   org.template.recommendation.BatchEngineParamsList\n```\n\nYou should see the following outputs:\n\n```\n[INFO] [BatchPersistableEvaluator] Writing result to disk\n[INFO] [BatchPersistableEvaluator] Result can be found in batch_result\n[INFO] [CoreWorkflow$] Updating evaluation instance with result: org.template.recommendation.BatchPersistableEvaluatorResult@2f886889\n[INFO] [CoreWorkflow$] runEvaluation completed\n```\n\nYou should find the batch queries and the predicted results in the output directory `batch_result/`.\n"
  },
  {
    "path": "docs/manual/source/templates/recommendation/blacklist-items.html.md",
    "content": "---\ntitle: Filter Recommended Items by Blacklist in Query (Recommendation)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nLet's say you want to supply a backList for each query to exclude some items from recommendation (For example, in the browsing session, the user just added some items to shopping cart, or you have a list of items you want to filter out, you may want to supply blackList in Query). This how-to will demonstrate how you can do it.\n\nYou can find the complete modified source code [here](https://github.com/apache/predictionio/tree/develop/examples/scala-parallel-recommendation/blacklist-items).\n\nNote that you may also use [E-Commerce Recommendation Template](/templates/ecommercerecommendation/quickstart/) which supports this feature by default.\n\nIf you are looking for filtering out items based on the specific user-to-item events logged by EventServer (eg. filter all items which the user has \"buy\" events on), you can use the [E-Commerce Recommendation Template](/templates/ecommercerecommendation/quickstart/). Please refer to the algorithm parameters \"unseenOnly\" and \"seenEvents\" of the E-Commerce Recommenation Template.\n\n## Add Query Parameter\n\nFirst of all we need to specify query parameter to send items ids that the user has already seen.\nLets modify `case class Query` in MyRecommendation/src/main/scala/***Engine.scala***:\n\n```scala\ncase class Query(\n  user: String,\n  num: Int,\n  blackList: Set[String] // ADDED\n)\n```\n\n## Filter the Data\n\nThen we need to change the code that computes recommendation score to filter out the seen items.\nLets modify class MyRecommendation/src/main/scala/***ALSModel.scala***. Just add the following two methods to that class.\n\n```scala\nimport com.github.fommil.netlib.BLAS.{getInstance => blas} // ADDED\n\n...\n\n  // ADDED\n  def recommendProductsWithFilter(user: Int, num: Int, productIdFilter: Set[Int]) = {\n    val filteredProductFeatures = productFeatures\n      .filter { case (id, _) => !productIdFilter.contains(id) } // (*)\n    recommend(userFeatures.lookup(user).head, filteredProductFeatures, num)\n      .map(t => Rating(user, t._1, t._2))\n  }\n\n  // ADDED\n  private def recommend(\n      recommendToFeatures: Array[Double],\n      recommendableFeatures: RDD[(Int, Array[Double])],\n      num: Int): Array[(Int, Double)] = {\n    val scored = recommendableFeatures.map { case (id, features) =>\n      (id, blas.ddot(features.length, recommendToFeatures, 1, features, 1))\n    }\n    scored.top(num)(Ordering.by(_._2))\n  }\n\n...\n\n```\n\nPlease make attention that method `recommend` is the copy of method `org.apache.spark.mllib.recommendation.MatrixFactorizationModel#recommend`. We can't reuse this because it’s private.\nMethod `recommendProductsWithFilter` is the almost full copy of `org.apache.spark.mllib.recommendation.MatrixFactorizationModel#recommendProducts` method. The difference only is the line with commentary ‘(*)’ where we apply filtering.\n\n## Put It All Together\n\nNext we need to invoke our new method with filtering when we query recommendations.\nLets modify method `predict` in MyRecommendation/src/main/scala/***ALSAlgorithm.scala***:\n\n```scala\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n    // Convert String ID to Int index for Mllib\n    model.userStringIntMap.get(query.user).map { userInt =>\n      // create inverse view of itemStringIntMap\n      val itemIntStringMap = model.itemStringIntMap.inverse\n      // recommendProductsWithFilter() returns Array[MLlibRating], which uses item Int\n      // index. Convert it to String ID for returning PredictedResult\n      val blackList = query.blackList.flatMap(model.itemStringIntMap.get) // ADDED\n      val itemScores = model\n        .recommendProductsWithFilter(userInt, query.num, blackList) // MODIFIED\n        .map (r => ItemScore(itemIntStringMap(r.product), r.rating))\n      PredictedResult(itemScores)\n    }.getOrElse{\n      logger.info(s\"No prediction for unknown user ${query.user}.\")\n      PredictedResult(Array.empty)\n    }\n  }\n```\n\n## Test the Result\n\nThen we can build/train/deploy the engine and test the result:\n\nThe query\n\n```bash\ncurl \\\n-H \"Content-Type: application/json\" \\\n-d '{ \"user\": \"1\", \"num\": 4 }' \\\nhttp://localhost:8000/queries.json\n```\n\nwill return the result\n\n```json\n{\n    \"itemScores\": [{\n        \"item\": \"32\",\n        \"score\": 13.405593705856901\n    }, {\n        \"item\": \"90\",\n        \"score\": 10.980439687813178\n    }, {\n        \"item\": \"75\",\n        \"score\": 10.748973860065737\n    }, {\n        \"item\": \"1\",\n        \"score\": 9.769636099226231\n    }]\n}\n```\n\nLets say that the user has seen the `32` item.\n\n```bash\ncurl \\\n-H \"Content-Type: application/json\" \\\n-d '{ \"user\": \"1\", \"num\": 4, \"blackList\": [\"32\"] }' \\\nhttp://localhost:8000/queries.json\n```\n\nwill return the result\n\n```json\n{\n    \"itemScores\": [{\n        \"item\": \"90\",\n        \"score\": 10.980439687813178\n    }, {\n        \"item\": \"75\",\n        \"score\": 10.748973860065737\n    }, {\n        \"item\": \"1\",\n        \"score\": 9.769636099226231\n    }, {\n        \"item\": \"49\",\n        \"score\": 8.653951817512265\n    }]\n}\n```\n\nwithout item `32`.\n"
  },
  {
    "path": "docs/manual/source/templates/recommendation/customize-data-prep.html.md",
    "content": "---\ntitle: Customizing Data Preparator (Recommendation)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nData Preparator is where pre-processing actions occurs. For example, one may\nwant to remove some very popular items from the training data because she thinks\nthat these items may not help finding individual person's tastes or one may have\na black list of item that she wants to remove from the training data before\nfeeding it to the algorithm.\n\nThis section assumes that you have created a *MyRecommendation* engine based on\nthe [Recommendation Engine Template: QuickStart](/templates/recommendation/quickstart/). We will\ndemonstrate how to add a filtering logic to exclude a list of items in the\ntraining data.\n\nA sample black list file containing the items to be excluded is provided in\n`./data/sample_not_train_data.txt`.\n\nA full end-to-end example can be found on\n[GitHub](https://github.com/apache/predictionio/tree/develop/examples/scala-parallel-recommendation/customize-data-prep).\n\n## The Data Preparator Component\n\nRecall [the DASE Architecture](/customize/), data is prepared by 2\ncomponents sequentially: *Data Source* and *Data Preparator*. *Data Source*\nreads data from the data store of Event Server and then *Data Preparator*\nprepares `RDD[Rating]` for the ALS algorithm.\n\nYou may modify any component in an engine template to fit your needs. This\nexample shows you how to add the filtering logics in Data Preparator.\n\n## Modify the Preparator\n\nThe Data Preparator component can be found in `src/main/scala/Preparator.scala`\nin the \"MyRecommendation\" directory. The unmodified version looks like the\nfollowing:\n\n```scala\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(ratings = trainingData.ratings)\n  }\n}\n```\n\nThe `prepare` method simply passes the ratings from `TrainingData` to\n`PreparedData`.\n\nYou can modify the `prepare` method to read a black list of items from a file\nand remove them from `TrainingData`, so it becomes:\n\n```scala\nimport scala.io.Source // ADDED\n\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    // MODIFIED HERE\n    val noTrainItems = Source.fromFile(\"./data/sample_not_train_data.txt\")\n      .getLines.toSet\n    // exclude noTrainItems from original trainingData\n    val ratings = trainingData.ratings.filter( r =>\n      !noTrainItems.contains(r.item)\n    )\n    new PreparedData(ratings)\n  }\n}\n```\n\n> We will show you how not to hardcode the path\n`./data/sample_not_train_data.txt` soon.\n\n\n## Deploy the Modified Engine\n\nNow you can deploy the modified engine as described in [Quick\nStart](quickstart.html).\n\nMake sure the `appName` defined in the file `engine.json` matches your *App Name*:\n\n```\n...\n\"datasource\": {\n  \"params\" : {\n    \"appName\": \"YourAppName\"\n  }\n},\n...\n```\n\nTo build *MyRecommendation* and deploy it as a service:\n\n```\n$ pio build\n$ pio train\n$ pio deploy\n```\n\nThis will deploy an engine that binds to http://localhost:8000. You can visit\nthat page in your web browser to check its status.\n\nNow, You can try to retrieve predicted results. To recommend 4 movies to user\nwhose ID is 1, send this JSON `{ \"user\": \"1\", \"num\": 4 }` to the deployed engine\n\n```\n$ curl -H \"Content-Type: application/json\" -d '{ \"user\": \"1\", \"num\": 4 }' http://localhost:8000/queries.json\n```\n\nand it will return a JSON of recommended movies.\n\n```json\n{\n  \"itemScores\": [\n    {\"item\": \"22\", \"score\": 4.072304374729956},\n    {\"item\": \"62\", \"score\": 4.058482414005789},\n    {\"item\": \"75\", \"score\": 4.046063009943821},\n    {\"item\": \"68\", \"score\": 3.8153661512945325}\n  ]\n}\n```\n\nCongratulations! You have learned how to add customized logic to your Data\nPreparator!\n\n##  Adding Preparator Parameters\n\nOptionally, you may want to take the hardcoded path\n(`./data/sample_not_train_data.txt`) away from the source code.\n\nPredictionIO offers preparator params so you can read variable values from\n`engine.json` instead.\n\nModify `src/main/scala/Preparator.scala` again in the *MyRecommendation*\ndirectory to:\n\n```scala\nimport org.apache.predictionio.controller.Params // ADDED\n\n// ADDED CustomPreparatorParams case class\ncase class CustomPreparatorParams(\n  filepath: String\n) extends Params\n\nclass Preparator(pp: CustomPreparatorParams) // ADDED CustomPreparatorParams\n  extends PPreparator[TrainingData, PreparedData] {\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    val noTrainItems = Source.fromFile(pp.filepath).getLines.toSet // CHANGED\n    val ratings = trainingData.ratings.filter( r =>\n      !noTrainItems.contains(r.item)\n    )\n    new PreparedData(ratings)\n  }\n}\n\n```\n\nIn `engine.json`, you define the parameters `filepath` for the Data Preparator:\n\n```json\n{\n  ...\n  \"preparator\": {\n    \"params\": {\n      \"filepath\": \"./data/sample_not_train_data.txt\"\n    }\n  },\n  ...\n}\n```\n\nTry to build *MyRecommendation* and deploy it again:\n\n```\n$ pio build\n$ pio train\n$ pio deploy\n```\n\nYou can change the `filepath` value without re-building the code next time.\n\n#### [Next: Customizing Serving Component](customize-serving.html)\n"
  },
  {
    "path": "docs/manual/source/templates/recommendation/customize-serving.html.md",
    "content": "---\ntitle: Customizing Serving Component (Recommendation)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nServing component is where post-processing occurs. For example, if you are\nrecommending items to users, you may want to remove items that are not\ncurrently in stock from the list of recommendation.\n\nThis section is based on the [Recommendation Engine Template](/templates/recommendation/quickstart/).\n\nA full end-to-end example can be found on\n[GitHub](https://github.com/apache/predictionio/tree/develop/examples/scala-parallel-recommendation/customize-serving).\n\n## The Serving Component\n\nRecall [the DASE Architecture](/customize/), a PredictionIO engine has\n4 main components: Data Source, Data Preparator, Algorithm, and Serving\ncomponents. When a Query comes in, it is passed to the Algorithm component for\nmaking Predictions.\n\nThe Engine's serving component can be found in `src/main/scala/Serving.scala` in\nthe *MyRecommendation* directory. By default, it looks like the following:\n\n```scala\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n```\n\nWe will customize the Serving component to remove temporarily disabled items\nfrom the Prediction made by Algorithms.\n\n## Modify the Serving Interface\n\nWe will use a file to specify a list of disabled items. When the `serve` method\nis called, it loads the file and removes items in the disabled list from\n`PredictedResult`. The following code snippet illustrates the logic:\n\n```scala\nimport scala.io.Source  // ADDED\n\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    // MODIFIED HERE\n    // Read the disabled item from file.\n    val disabledProducts: Set[String] = Source\n      .fromFile(\"./data/sample_disabled_items.txt\")\n      .getLines\n      .toSet\n\n    val itemScores = predictedResults.head.itemScores\n    // Remove items from the original predictedResult\n    PredictedResult(itemScores.filter(ps => !disabledProducts(ps.item)))\n  }\n}\n```\nINFO:We will show you how not to hardcode the path\n`./data/sample_disabled_items.txt` soon.\n\nWARNING: This example code uses a local relative path. For remote deployment, it is\nrecommended to use a globally accessible absolute path.\n\nDANGER: This example is only for demonstration purpose. Reading from disk for every\nquery leads to terrible system performance. Use a more efficient\nimplementation for production deployment.\n\n## Deploy the Modified Engine\n\nNow you can deploy the modified engine as described in [Quick\nStart](quickstart.html).\n\nMake sure the `appName` defined in the file `engine.json` matches your *App Name*:\n\n```\n...\n\"datasource\": {\n  \"params\" : {\n    \"appName\": \"YourAppName\"\n  }\n},\n...\n```\n\nTo build *MyRecommendation* and deploy it as a service:\n\n```\n$ pio build\n$ pio train\n$ pio deploy\n```\n\nThis will deploy an engine that binds to http://localhost:8000. You can visit\nthat page in your web browser to check its status.\n\nNow, you can try to retrieve predicted results. To recommend 4 movies to user\nwhose ID is 1, send this JSON `{ \"user\": \"1\", \"num\": 4 }` to the deployed\nengine\n\n```\n$ curl -H \"Content-Type: application/json\" -d '{ \"user\": \"1\", \"num\": 4 }' \\\n  http://localhost:8000/queries.json\n```\n\nand it will return a JSON of recommended movies.\n\n```json\n{\n  \"itemScores\": [\n    {\"item\": \"65\", \"score\": 6.537168137254073},\n    {\"item\": \"69\", \"score\": 6.391430405762495},\n    {\"item\": \"38\", \"score\": 5.829957095096519},\n    {\"item\": \"11\", \"score\": 5.5991291456974}\n  ]\n}\n```\n\nNow, to verify the blacklisting logic, we add the item 69 (the second item)\nto the blacklisting file `data/sample_disabled_items.txt`. Rerun the `curl`\nquery, and the change should take effect immediately as the disabled item\nlist is reloaded every time the `serve` method is called.\n\n```\n$ echo \"69\" >> ./data/sample_disabled_items.txt\n$ curl -H \"Content-Type: application/json\" -d '{ \"user\": \"1\", \"num\": 4 }' \\\n  http://localhost:8000/queries.json\n```\n\n```json\n{\n  \"itemScores\": [\n    {\"item\": \"65\", \"score\": 6.537168137254073},\n    {\"item\": \"38\", \"score\": 5.829957095096519},\n    {\"item\": \"11\", \"score\": 5.5991291456974}\n  ]\n}\n```\n\nCongratulations! You have learned how to add customized realtime blacklisting\nlogic to your Serving component!\n\n## Adding Serving Parameters\n\nOptionally, you may want to take the hardcoded path\n(`./data/sample_disabled_items.txt`) away from the source code.\n\nPredictionIO offers serving params so you can read variable values from\n`engine.json` instead. PredictionIO transforms the JSON object specified in\n`engine.json`'s `serving` field into the `ServingParams` class.\n\nModify `src/main/scala/Serving.scala` again in the *MyRecommendation*\ndirectory to:\n\n```scala\nimport scala.io.Source\n\nimport org.apache.predictionio.controller.Params  // ADDED\n\n// ADDED ServingParams to specify the blacklisting file location.\ncase class ServingParams(filepath: String) extends Params\n\nclass Serving(val params: ServingParams)\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query, predictedResults: Seq[PredictedResult])\n  : PredictedResult = {\n    val disabledProducts: Set[String] = Source\n      .fromFile(params.filepath)\n      .getLines\n      .toSet\n\n    val itemScores = predictedResults.head.itemScores\n    PredictedResult(itemScores.filter(ps => !disabledProducts(ps.item)))\n  }\n}\n```\n\nIn `engine.json`, you define the parameters `serving` for the Serving component:\n\n```json\n{\n  ...\n  \"serving\": {\n    \"params\": {\n      \"filepath\": \"./data/sample_disabled_items.txt\"\n    }\n  },\n  ...\n}\n```\n\nTry to build *MyRecommendation* and deploy it again:\n\n```\n$ pio build\n$ pio train\n$ pio deploy\n```\n\nYou can change the `filepath` value without re-building the code next time.\n\n#### [Next: Training with Implicit Preference](training-with-implicit-preference.html)\n"
  },
  {
    "path": "docs/manual/source/templates/recommendation/dase.html.md.erb",
    "content": "---\ntitle: DASE Components Explained (Recommendation)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n<%= partial 'shared/dase/dase',\nlocals: {\n  template_name: 'Recommendation Engine Template',\n  evaluation_link: '/templates/recommendation/evaluation/'\n} %>\n\n## The Engine Design\n\nAs you can see from the Quick Start, *MyRecommendation* takes a JSON prediction\nquery, e.g. `{ \"user\": \"1\", \"num\": 4 }`, and return a JSON predicted result.\nIn MyRecommendation/src/main/scala/***Engine.scala***, the `Query` case class\ndefines the format of such **query**:\n\n```scala\ncase class Query(\n  user: String,\n  num: Int\n)\n```\n\nThe `PredictedResult` case class defines the format of **predicted result**,\nsuch as\n\n```json\n{\"itemScores\":[\n  {\"item\":22,\"score\":4.07},\n  {\"item\":62,\"score\":4.05},\n  {\"item\":75,\"score\":4.04},\n  {\"item\":68,\"score\":3.81}\n]}\n```\n\nwith:\n\n```scala\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n)\n\ncase class ItemScore(\n  item: String,\n  score: Double\n)\n```\n\nFinally, `RecommendationEngine` is the *Engine Factory* that defines the\ncomponents this engine will use: Data Source, Data Preparator, Algorithm(s) and\nServing components.\n\n```scala\nobject RecommendationEngine extends IEngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"als\" -> classOf[ALSAlgorithm]),\n      classOf[Serving])\n  }\n  ...\n}\n```\n\n### Spark MLlib\n\nSpark's MLlib ALS algorithm takes training data of RDD type, i.e. `RDD[Rating]`\nand train a model, which is a `MatrixFactorizationModel` object.\n\nPredictionIO Recommendation Engine Template, which\n*MyRecommendation* bases on, integrates this algorithm under the DASE\narchitecture. We will take a closer look at the DASE code below.\n\nINFO: [Check this\nout](https://spark.apache.org/docs/latest/mllib-collaborative-filtering.html) to\nlearn more about MLlib's ALS collaborative filtering algorithm.\n\n\n## Data\n\nIn the DASE architecture, data is prepared by 2 components sequentially: *Data\nSource* and *Data Preparator*. *Data Source* and *Data Preparator* takes data\nfrom the data store and prepares `RDD[Rating]` for the ALS algorithm.\n\n### Data Source\n\nIn MyRecommendation/src/main/scala/***DataSource.scala***, the `readTraining`\nmethod of class `DataSource` reads, and selects, data from the *Event Store*\n(data store of the *Event Server*) and returns `TrainingData`.\n\n```scala\ncase class DataSourceParams(appName: String) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  def getRatings(sc: SparkContext): RDD[Rating] = {\n\n    val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"rate\", \"buy\")), // read \"rate\" and \"buy\" event\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n\n    val ratingsRDD: RDD[Rating] = eventsRDD.map { event =>\n      val rating = try {\n        val ratingValue: Double = event.event match {\n          case \"rate\" => event.properties.get[Double](\"rating\")\n          case \"buy\" => 4.0 // map buy event to rating value of 4\n          case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n        }\n        // entityId and targetEntityId is String\n        Rating(event.entityId,\n          event.targetEntityId.get,\n          ratingValue)\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Cannot convert ${event} to Rating. Exception: ${e}.\")\n          throw e\n        }\n      }\n      rating\n    }.cache()\n\n    ratingsRDD\n  }\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n    new TrainingData(getRatings(sc))\n  }\n\n}\n```\n\n`PEventStore` is an object which provides function to access data that is collected by PredictionIO *Event Server*.\n`PEventStore.find(...)` specifies the events that you want to read. PredictionIO\nautomatically loads the parameters of *datasource* specified in\nMyRecommendation/***engine.json***, including *appName*, to `dsp`.\n\nIn ***engine.json***:\n\n```\n{\n  ...\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  ...\n}\n```\n\nEach *rate* and *buy* user event data is read as `Rating`.\nFor flexibility, this Recommendation engine template is designed to support user ID and item ID in `String`.\nSince Spark MLlib's `Rating` class assumes `Int`-only user ID and item ID, you have to define a new `Rating` class:\n\n```scala\ncase class Rating(\n  user: String,\n  item: String,\n  rating: Double\n)\n```\n\n`TrainingData` contains an RDD of all these `Rating` events. The class definition of `TrainingData` is:\n\n```scala\nclass TrainingData(\n  val ratings: RDD[Rating]\n) extends Serializable {...}\n```\nand PredictionIO passes the returned `TrainingData` object to *Data Preparator*.\n\n<!-- TODO\n> HOW-TO:\n>\n> You may modify readTraining function to read from other datastores, such as MongoDB -  [link]\n-->\n\nINFO: You could [modify the DataSource to read custom events](reading-custom-events.html) other than the default **rate** and **buy**.\n\n### Data Preparator\n\nIn MyRecommendation/src/main/scala/***Preparator.scala***, the `prepare` method\nof class `Preparator` takes `TrainingData` as its input and performs any\nnecessary feature selection and data processing tasks. At the end, it returns\n`PreparedData` which should contain the data *Algorithm* needs. For MLlib ALS,\nit is `RDD[Rating]`.\n\nBy default, `prepare` simply copies the unprocessed `TrainingData` data to `PreparedData`:\n\n```scala\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(ratings = trainingData.ratings)\n  }\n}\n\nclass PreparedData(\n  val ratings: RDD[Rating]\n) extends Serializable\n```\n\nPredictionIO passes the returned `PreparedData` object to Algorithm's `train` function.\n\n<!-- TODO\n> HOW-TO:\n>\n> MLlib ALS limitation: user id, item id must be integer - convert [link]\n-->\n\n## Algorithm\n\nIn MyRecommendation/src/main/scala/***ALSAlgorithm.scala***, the two methods of\nthe algorithm class are `train` and `predict`. `train` is responsible for\ntraining a predictive model. PredictionIO will store this model and `predict` is\nresponsible for using this model to make prediction.\n\n### train(...)\n\n`train` is called when you run **pio train**. This is where MLlib ALS algorithm,\ni.e. `ALS.train`, is used to train a predictive model.\n\n\n```scala\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n    ...\n    // Convert user and item String IDs to Int index for MLlib\n    val userStringIntMap = BiMap.stringInt(data.ratings.map(_.user))\n    val itemStringIntMap = BiMap.stringInt(data.ratings.map(_.item))\n    val mllibRatings = data.ratings.map( r =>\n      // MLlibRating requires integer index for user and item\n      MLlibRating(userStringIntMap(r.user), itemStringIntMap(r.item), r.rating)\n    )\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    // If you only have one type of implicit event (Eg. \"view\" event only),\n    // replace ALS.train(...) with\n    //val m = ALS.trainImplicit(\n      //ratings = mllibRatings,\n      //rank = ap.rank,\n      //iterations = ap.numIterations,\n      //lambda = ap.lambda,\n      //blocks = -1,\n      //alpha = 1.0,\n      //seed = seed)\n\n    val m = ALS.train(\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      seed = seed)\n\n    new ALSModel(\n      rank = m.rank,\n      userFeatures = m.userFeatures,\n      productFeatures = m.productFeatures,\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap)\n  }\n```\n\n#### Working with Spark MLlib's ALS.train(....)\n\nAs mentioned above, MLlib's `Rating` does not support `String` user ID and item ID.\nIts `ALS.train` thus also assumes `Int`-only `Rating`.\n\nHere you need to map your String-supported `Rating` to MLlib's Integer-only `Rating`.\nFirst, you can rename MLlib's Integer-only `Rating` to `MLlibRating` for clarity:\n\n```\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\n```\n\nYou then create a bi-directional map with `BiMap.stringInt` which maps each String record to an Integer index.\n\n```\nval userStringIntMap = BiMap.stringInt(data.ratings.map(_.user))\nval itemStringIntMap = BiMap.stringInt(data.ratings.map(_.item))\n```\nFinally, you re-create each `Rating` event as `MLlibRating`:\n\n```\nMLlibRating(userStringIntMap(r.user), itemStringIntMap(r.item), r.rating)\n```\n\n\nIn addition to `RDD[MLlibRating]`, `ALS.train` takes the following parameters: *rank*, *iterations*, *lambda* and *seed*.\n\nThe values of these parameters are specified in *algorithms* of\nMyRecommendation/***engine.json***:\n\n```\n{\n  ...\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\": 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n  ...\n}\n```\n\nPredictionIO will automatically loads these values into the constructor `ap`,\nwhich has a corresponding case class `ALSAlgorithmParams`:\n\n```scala\ncase class ALSAlgorithmParams(\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]) extends Params\n```\n\nThe `seed` parameter is an optional parameter, which is used by MLlib ALS algorithm internally to generate random values. If the `seed` is not specified, current system time would be used and hence each train may produce different results. Specify a fixed value for the `seed` if you want to have deterministic result (For example, when you are testing).\n\n`ALS.train` then returns a `MatrixFactorizationModel` model which contains RDD\ndata. RDD is a distributed collection of items which *does not* persist. To\nstore the model, you convert the model to `ALSModel` class at the end.\n`ALSModel` is a persistable class that extends `MatrixFactorizationModel`.\n\n> The detailed implementation can be found at\nMyRecommendation/src/main/scala/***ALSModel.scala***\n\n\nPredictionIO will automatically store the returned model, i.e. `ALSModel` in this case.\n\n\n### predict(...)\n\n`predict` is called when you send a JSON query to\nhttp://localhost:8000/queries.json. PredictionIO converts the query, such as `{\n\"user\": \"1\", \"num\": 4 }` to the `Query` class you defined previously.\n\nThe predictive model `MatrixFactorizationModel` of MLlib ALS, which is now\nextended as `ALSModel`, offers a method called\n`recommendProducts`. `recommendProducts` takes two parameters: user id (i.e.\nthe `Int` index of `query.user`) and the number of items to be returned (i.e. `query.num`). It\npredicts the top *num* of items a user will like.\n\n```scala\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n    // Convert String ID to Int index for Mllib\n    model.userStringIntMap.get(query.user).map { userInt =>\n      // create inverse view of itemStringIntMap\n      val itemIntStringMap = model.itemStringIntMap.inverse\n      // recommendProducts() returns Array[MLlibRating], which uses item Int\n      // index. Convert it to String ID for returning PredictedResult\n      val itemScores = model.recommendProducts(userInt, query.num)\n        .map (r => ItemScore(itemIntStringMap(r.product), r.rating))\n      PredictedResult(itemScores)\n    }.getOrElse{\n      logger.info(s\"No prediction for unknown user ${query.user}.\")\n      PredictedResult(Array.empty)\n    }\n  }\n```\n\nNote that `recommendProducts` returns the `Int` indices of items. You map them back to `String` with `itemIntStringMap` before they are returned.\n\n> You have defined the class `PredictedResult` earlier.\n\nPredictionIO passes the returned `PredictedResult` object to *Serving*.\n\n## Serving\n\nThe `serve` method of class `Serving` processes predicted result. It is also\nresponsible for combining multiple predicted results into one if you have more\nthan one predictive model. *Serving* then returns the final predicted result.\nPredictionIO will convert it to a JSON response automatically.\n\nIn MyRecommendation/src/main/scala/***Serving.scala***,\n\n```scala\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n```\n\nWhen you send a JSON query to http://localhost:8000/queries.json,\n`PredictedResult` from all models will be passed to `serve` as a sequence, i.e.\n`Seq[PredictedResult]`.\n\n> An engine can train multiple models if you specify more than one Algorithm\ncomponent in `object RecommendationEngine` inside ***Engine.scala***. Since only\none `ALSAlgorithm` is implemented by default, this `Seq` contains one element.\n\n\nNow you should have a good understanding of the DASE model. We will show you an\nexample of customizing the Data Preparator to exclude certain items from your\ntraining set.\n\n#### [Next: Reading Custom Events](reading-custom-events.html)\n"
  },
  {
    "path": "docs/manual/source/templates/recommendation/evaluation.html.md.erb",
    "content": "---\ntitle: Evaluation Explained (Recommendation)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nA PredictionIO engine is instantiated by a set of parameters, these parameters\ndetermines which algorithm is used as well as the parameter for the algorithm.\nIt naturally raises a question of how to choose the best set of parameters.  The\nevaluation module streamlines the process of tuning the engine to the best\nparameter set and deploy it.\n\n## Evaluation Quick Start\n\nWe assume you have run the [Recommendation Quick Start](/templates/recommendation/quickstart/)\nwill skip the data collection / import instructions.\n\n### Edit the AppName\n\nEdit MyRecommendation/src/main/scala/***Evaluation.scala*** to specify the\n*appName* you used to import the data.\n\n```scala\nobject ParamsList extends EngineParamsGenerator {\n  private[this] val baseEP = EngineParams(\n    dataSourceParams = DataSourceParams(\n      appName = \"MyApp1\",\n      ...\n      )\n  ...\n}\n```\n\n### Build and run the evaluation\n\nTo run evaluation, the command `pio eval` is used. It takes two\nmandatory parameter,\n1. the `Evaluation` object, it tells PredictionIO the engine and metric we use\n   for the evaluation; and\n2. the `EngineParamsGenerator`, it contains a list of engine params to test\n   against.\nThe following command kickstarts the evaluation\nworkflow for the classification template (replace \"org.template\" with your package).\n\n```\n$ pio build\n...\n$ pio eval org.template.RecommendationEvaluation \\\n    org.template.EngineParamsList\n```\n\nYou will see the following output:\n\n```\n...\n[INFO 2015-03-31 00:31:53,934] [CoreWorkflow$] runEvaluation started\n...\n[INFO 2015-03-31 00:35:56,782] [CoreWorkflow$] Updating evaluation instance with result: MetricEvaluatorResult:\n  # engine params evaluated: 3\nOptimal Engine Params:\n  {\n  \"dataSourceParams\":{\n    \"\":{\n      \"appName\":\"MyApp1\",\n      \"evalParams\":{\n        \"kFold\":5,\n        \"queryNum\":10\n      }\n    }\n  },\n  \"preparatorParams\":{\n    \"\":{\n\n    }\n  },\n  \"algorithmParamsList\":[\n    {\n      \"als\":{\n        \"rank\":10,\n        \"numIterations\":40,\n        \"lambda\":0.01,\n        \"seed\":3\n      }\n    }\n  ],\n  \"servingParams\":{\n    \"\":{\n\n    }\n  }\n}\nMetrics:\n  Precision@K (k=10, threshold=4.0): 0.15205820105820103\n  PositiveCount (threshold=4.0): 5.753333333333333\n  Precision@K (k=10, threshold=2.0): 0.1542777777777778\n  PositiveCount (threshold=2.0): 6.833333333333333\n  Precision@K (k=10, threshold=1.0): 0.15068518518518517\n  PositiveCount (threshold=1.0): 10.006666666666666\n[INFO 2015-03-31 00:36:01,516] [CoreWorkflow$] runEvaluation completed\n\n```\n\nThe console prints out the evaluation metric score of each engine params, and finally\npretty print the optimal engine params. Amongst the 3 engine params we evaluate,\nthe best Prediction@k has a score of ~0.1521.\n\n\n## The Evaluation Design\n\nWe assume you have read the [Tuning and Evaluation](/evaluation) section. We\nwill cover the evaluation aspects which are specific to the recommendation\nengine.\n\nIn recommendation evaluation, the raw data is a sequence of known ratings.  A\nrating has 3 components: user, item, and a score. We use the $k-fold$ method for\nevaluation, the raw data is sliced into a sequence of (training, validation)\ndata tuple.\n\nIn the validation data, we construct a query for *each user*, and get a list of\nrecommended items from the engine. It is vastly different from the\nclassification tutorial, where there is a one-to-one corresponding between the\ntraining data point and the validation data point. In this evaluation,\nour unit of evaluation is *user*, we evaluate the quality of an engine\nusing the known rating of a user.\n\n### Key assumptions\n\nThere are multiple assumptions we have to make when we evaluate a\nrecommendation engine:\n\n- Definition of 'good'. We want to quantify if the engine is able to recommend\nitems which the user likes, we need to define what is meant by 'good'. In this\nexample, we have two kinds of events: 'rate' and 'buy'. The 'rate' event is\nassociated with a rating value which ranges between 1 to 4, and the 'buy'\nevent is mapped to a rating of 4. When we\nimplement the metric, we have to specify a rating threshold, only the rating\nabove the threshold is considered 'good'.\n\n- The absence of complete rating. It is extremely unlikely that the training\ndata contains rating for all user-item tuples. In contrast, of a system containing\n1000 items, a user may only have rated 20 of them, leaving 980 items unrated. There\nis no way for us to certainly tell if the user likes an unrated product.\nWhen we examine the evaluation result, it is important for us to keep in mind\nthat the final metric is only an approximation of the actual result.\n\n- Recommendation affects user behavior. Suppose you are a e-commerce company and\nwould like to use the recommendation engine to personalize the landing page,\nthe item you show in the landing page directly impacts what the user is going to\npurchase. This is different from weather prediction, whatever the weather\nforecast engine predicts, tomorrow's weather won't be affected.  Therefore, when\nwe conduct offline evaluation for recommendation engines, it is possible that\nthe final user behavior is dramatically different from the evaluation result.\nHowever, in the evaluation, for simplicity, we have to assume that user\nbehavior is homogenous.\n\n\n## Evaluation Data Generation\n\n### Actual Result\n\nIn MyRecommendation/src/main/scala/***Engine.scala***,\nwe define the `ActualResult` which represents the user rating for validation.\nIt stores the list of ratings in the validation set for a user.\n\n```scala\ncase class ActualResult(\n  ratings: Array[Rating]\n)\n```\n\n### Implement Data Generate Method in DataSource\n\nIn MyRecommendation/src/main/scala/***DataSource.scala***,\nthe method `readEval` method reads, and selects, data from datastore\nand returns a sequence of (training, validation) data.\n\n```scala\ncase class DataSourceEvalParams(kFold: Int, queryNum: Int)\n\ncase class DataSourceParams(\n  appName: String,\n  evalParams: Option[DataSourceEvalParams]) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, ActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  def getRatings(sc: SparkContext): RDD[Rating] = {\n\n    val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"rate\", \"buy\")), // read \"rate\" and \"buy\" event\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n\n    val ratingsRDD: RDD[Rating] = eventsRDD.map { event =>\n      val rating = try {\n        val ratingValue: Double = event.event match {\n          case \"rate\" => event.properties.get[Double](\"rating\")\n          case \"buy\" => 4.0 // map buy event to rating value of 4\n          case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n        }\n        // entityId and targetEntityId is String\n        Rating(event.entityId,\n          event.targetEntityId.get,\n          ratingValue)\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Cannot convert ${event} to Rating. Exception: ${e}.\")\n          throw e\n        }\n      }\n      rating\n    }.cache()\n\n    ratingsRDD\n  }\n\n  ...\n\n  override\n  def readEval(sc: SparkContext)\n  : Seq[(TrainingData, EmptyEvaluationInfo, RDD[(Query, ActualResult)])] = {\n    require(!dsp.evalParams.isEmpty, \"Must specify evalParams\")\n    val evalParams = dsp.evalParams.get\n\n    val kFold = evalParams.kFold\n    val ratings: RDD[(Rating, Long)] = getRatings(sc).zipWithUniqueId\n\n    (0 until kFold).map { idx => {\n      val trainingRatings = ratings.filter(_._2 % kFold != idx).map(_._1)\n      val testingRatings = ratings.filter(_._2 % kFold == idx).map(_._1)\n\n      val testingUsers: RDD[(String, Iterable[Rating])] = testingRatings.groupBy(_.user)\n\n      (new TrainingData(trainingRatings),\n        new EmptyEvaluationInfo(),\n        testingUsers.map {\n          case (user, ratings) => (Query(user, evalParams.queryNum), ActualResult(ratings.toArray))\n        }\n      )\n    }}\n  }\n}\n```\n\nThe evaluation data generate is controlled by two parameters\nin the `DataSourceEvalParams`. The first parameter `kFold` is the number of\nfold we use for evaluation, the second parameter `queryNum` is used for\nquery construction.\n\n\nThe `getRating` method is factored out from the `readTraining` method as\nthey both serve the same function of reading from data source and\nperform a user-item action into a rating (lines 22 - 40).\n\nThe `readEval` method is a k-fold evaluation implementation.\nWe annotate each rating in the raw data by an index (line 54), then\nin each fold, the rating goes to either the training or testing set\nbased on the modulus value.\nWe group ratings by user, and one query is constructed *for each user* (line 60).\n\n## Evaluation Metrics\n\nIn the [evaluation and tuning tutorial](/evaluation/), we use ***Metric*** to\ncompute the quality of an engine variant.\nHowever, in actual use cases like recommendation, as we have made many\nassumptions in our model, using a single metric may lead to a biased evaluation.\nWe will discuss using multiple\n***Metrics*** to generate a comprehensive evaluation, to generate a more global view\nof the engine.\n\n### Precision@K\n\nPrecision@K measures the portion of *relevant* items amongst the first *k* items.\nRecommendation engine usually wants to make sure the top few items recommended\nare appealing to the user. Think about Google search, we usually give up after\nlooking at the first and second result pages.\n\n### Precision@K Parameters\n\nThere are two questions associated with it.\n\n1. How do we define *relevant*?\n2. What is a good value of *k*?\n\nBefore we answer these questions, we need to understand what constitute a good metric.\nIt is like exams, if everyone get full scores, the exam fails its goal to\ndetermine what the candidates don't know; if everyone fails, the exam fails its goal\nto determine what the candidates know.\nA good metric should be able to distinguish the good from the bad.\n\nA way to define relevant is to use the notion of rating threshold. If the user\nrating for an item is higher than a certain threshold, we say it is relevant.\nHowever, without looking at the data, it is hard to pick a reasonable threshold.\nWe can set the threshold be as high as the maximum rating of 4.0, but it may\nseverely limit the relevant set size, and the precision scores will be close to\nzero or undefined (precision is undefined if there is no relevant data).\nOn the other hand, we can set the threshold be as low as the minimum rating, but\nit makes the precision metric uninformative as well since all scores will be close\nto 1.\nSimilar argument applies to picking a good value of *k* too.\n\nA method to choose a good parameter is *not* to choose one, but instead test\nout *a whole sprectrum of parameters*. If an engine variant is good, it should\nrobustly perform well across different metric parameters.\nThe evaluation module supports multiple metrics. The following code\nsnippets demonstrates a sample usage.\n\n```scala\nobject ComprehensiveRecommendationEvaluation extends Evaluation {\n  val ratingThresholds = Seq(0.0, 2.0, 4.0)\n  val ks = Seq(1, 3, 10)\n\n  engineEvaluator = (\n    RecommendationEngine(),\n    MetricEvaluator(\n      metric = PrecisionAtK(k = 3, ratingThreshold = 2.0),\n      otherMetrics = (\n        (for (r <- ratingThresholds) yield PositiveCount(ratingThreshold = r)) ++\n        (for (r <- ratingThresholds; k <- ks) yield PrecisionAtK(k = k, ratingThreshold = r))\n      )))\n}\n```\n\nWe have two types of `Metric`s.\n\n- `PositiveCount` is a helper metrics that returns the average\nnumber of positive samples for a specific rating threshold, therefore we get some\nidea about the *demographic* of the data. If `PositiveCount` is too low or too\nhigh for certain threshold, we know that it should not be used.\nWe have three thresholds (line 2), and three instances of\n`PositiveCount` metric are instantiated (line 10), one for each threshold.\n\n- `Precision@K` is the actual metrics we use.\nWe have two lists of parameters (lines 2 to 3): `ratingThreshold` defines what rating is good,\nand `k` defines how many items we evaluate in the `PredictedResult`.\nWe generate a list of all combinations (line 11).\n\nThese metrics are specified as `otherMetrics` (lines 9 to 11), they\nwill be calculated and generated on the evaluation UI.\n\nTo run this evaluation, you can:\n\n```\n$ pio eval org.template.ComprehensiveRecommendationEvaluation \\\n  org.template.EngineParamsList\n```\n"
  },
  {
    "path": "docs/manual/source/templates/recommendation/how-to.html.md",
    "content": "---\ntitle: How-To  (Recommendation)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nHere are the pages that show you how you can customize the Recommendation engine template.\n\n- [Read Custom Events](/templates/recommendation/reading-custom-events/)\n- [Customize Data Preparator](/templates/recommendation/customize-data-prep/)\n- [Customize Serving](/templates/recommendation/customize-serving/)\n- [Train with Implicit Preference](/templates/recommendation/training-with-implicit-preference/)\n- [Filter Recommended Items by Blacklist in Query](/templates/recommendation/blacklist-items/)\n- [Batch Persistable Evaluator](/templates/recommendation/batch-evaluator/)\n"
  },
  {
    "path": "docs/manual/source/templates/recommendation/quickstart.html.md.erb",
    "content": "---\ntitle: Quick Start - Recommendation Engine Template\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Overview\n\nThis Recommendation Engine Template has integrated **Apache Spark MLlib**'s\nCollaborative Filtering algorithm by default. You can customize it easily to fit\nyour specific needs.\n\nWe are going to show you how to create your own recommendation engine for\nproduction use based on this template.\n\n## Usage\n\n### Event Data Requirements\n\nBy default, the template requires the following events to be collected:\n\n- user 'rate' item events\n- user 'buy' item events\n\nNOTE: You can customize this engine to use other events.\n\n### Input Query\n\n- user ID\n- number of recommended items\n\n### Output PredictedResult\n\n- a ranked list of recommended itemIDs\n\n\n## 1. Install and Run PredictionIO\n\n<%= partial 'shared/quickstart/install' %>\n\n## 2. Create a new Engine from an Engine Template\n\n<%= partial 'shared/quickstart/create_engine', locals: { engine_name: 'MyRecommendation', template_name: 'Recommendation Engine Template', template_repo: 'apache/predictionio-template-recommender' } %>\n\n## 3. Generate an App ID and Access Key\n\n<%= partial 'shared/quickstart/create_app' %>\n\n## 4. Collecting Data\n\nNext, let's collect some training data. By default,\nthe Recommendation Engine Template supports 2 types of events: **rate** and\n**buy**. A user can give a rating score to an item or buy an item. This template requires user-view-item and user-buy-item events.\n\nINFO: This template can easily be customized to consider more user events such as *like*, *dislike*, etc.\n\n<%= partial 'shared/quickstart/collect_data' %>\n\n### Example **rate** event\n\nA user (ID \"u0\") gives an item (ID \"i0\") a rating of 5 at `2014-11-02T09:39:45.618-08:00` (current time will be used if eventTime is not specified)\n\nRun the following `curl` command to send the `rate` event:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"rate\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u0\",\n  \"targetEntityType\" : \"item\",\n  \"targetEntityId\" : \"i0\",\n  \"properties\" : {\n    \"rating\" : 5\n  }\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\n\nclient = predictionio.EventClient(\n    access_key=<ACCESS KEY>,\n    url=<URL OF EVENTSERVER>,\n    threads=5,\n    qsize=500\n)\n\n# A user rates an item\nclient.create_event(\n    event=\"rate\",\n    entity_type=\"user\",\n    entity_id=<USER ID>,\n    target_entity_type=\"item\",\n    target_entity_id=<ITEM ID>,\n    properties= { \"rating\" : float(<RATING>) }\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EventClient;\n\n$client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// A user rates an item\n$client->createEvent(array(\n   'event' => 'rate',\n   'entityType' => 'user',\n   'entityId' => <USER ID>,\n   'targetEntityType' => 'item',\n   'targetEntityId' => <ITEM ID>,\n   'properties' => array('rating'=> <RATING>)\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Create a client object.\nclient = PredictionIO::EventClient.new(<ACCESS KEY>, <URL OF EVENTSERVER>)\n\n# A user rates an item.\nclient.create_event(\n  'rate',\n  'user',\n  <USER ID>, {\n    'targetEntityType' => 'item',\n    'targetEntityId' => <ITEM ID>,\n    'properties' => { 'rating' => <RATING (float)> }\n  }\n)\n\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\nimport org.apache.predictionio.Event;\nimport org.apache.predictionio.EventClient;\n\nEventClient client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// A user rates an item\nEvent rateEvent = new Event()\n    .event(\"rate\")\n    .entityType(\"user\")\n    .entityId(<USER_ID>)\n    .targetEntityType(\"item\")\n    .targetEntityId(<ITEM_ID>)\n    .property(\"rating\", new Float(<RATING>));\nclient.createEvent(rateEvent);\n\n```\n  </div>\n</div>\n\n### Example **buy** event\n\nA user (ID \"u1\") buys an item (ID \"i2\") at `2014-11-10T12:34:56.123-08:00` (current time will be used if eventTime is not specified)\n\nRun the following `curl` command to send the `buy` event:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"buy\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u1\",\n  \"targetEntityType\" : \"item\",\n  \"targetEntityId\" : \"i2\",\n  \"eventTime\" : \"2014-11-10T12:34:56.123-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# A user buys an item\nclient.create_event(\n    event=\"buy\",\n    entity_type=\"user\",\n    entity_id=<USER ID>,\n    target_entity_type=\"item\",\n    target_entity_id=<ITEM ID>\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\n// A user buys an item\n$client->createEvent(array(\n   'event' => 'buy',\n   'entityType' => 'user',\n   'entityId' => <USER ID>,\n   'targetEntityType' => 'item',\n   'targetEntityId' => <ITEM ID>\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# A user buys an item.\nclient.create_event(\n  'buy',\n  'user',\n  <USER ID>, {\n    'targetEntityType' => 'item',\n    'targetEntityId' => <ITEM ID>\n  }\n)\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n// A user buys an item\nEvent buyEvent = new Event()\n    .event(\"buy\")\n    .entityType(\"user\")\n    .entityId(<USER_ID>)\n    .targetEntityType(\"item\")\n    .targetEntityId(<ITEM_ID>);\nclient.createEvent(buyEvent);\n```\n  </div>\n</div>\n\n<%= partial 'shared/quickstart/query_eventserver' %>\n\n### Import More Sample Data\n\n<%= partial 'shared/quickstart/import_sample_data' %>\n\nA Python import script `import_eventserver.py` is provided in the template to import the data to the\nEvent Server using the Python SDK. Please upgrade to the latest Python SDK.\n\n<%= partial 'shared/quickstart/install_python_sdk' %>\n\nExecute the following to import the data:\n\nWARNING: These commands must be executed in the Engine directory, for example: `MyRecomendation`.\n\n```\n$ cd MyRecommendation\n$ curl https://raw.githubusercontent.com/apache/spark/master/data/mllib/sample_movielens_data.txt --create-dirs -o data/sample_movielens_data.txt\n$ python data/import_eventserver.py --access_key $ACCESS_KEY\n```\n\nYou should see the following output:\n\n```\nImporting data...\n1501 events are imported.\n```\n\nNow the movie ratings data is stored as events inside the Event Store.\n\n<%= partial 'shared/quickstart/query_eventserver_short' %>\n\nINFO: By default, the template trains the model with \"rate\" events (explicit rating). You can customize the engine to [read other custom events](/templates/recommendation/reading-custom-events/) and [handle events of implicit preference (such as, view, buy)](/templates/recommendation/training-with-implicit-preference/)\n\n## 5. Deploy the Engine as a Service\n\n<%= partial 'shared/quickstart/deploy_enginejson', locals: { engine_name: 'MyRecommendation' } %>\n\n<%= partial 'shared/quickstart/deploy', locals: { engine_name: 'MyRecommendation' } %>\n\n## 6. Use the Engine\n\nNow, you can try to retrieve predicted results. To recommend 4 movies to a user\nwhose id is 1, you send this JSON `{ \"user\": \"1\", \"num\": 4 }` to the deployed\nengine and it will return a JSON result of the recommended movies. Simply send a query\nby making an HTTP request or through the `EngineClient` of an SDK.\n\nWith the deployed engine running, open another terminal and run the following `curl` command or use an SDK to send the query:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{ \"user\": \"1\", \"num\": 4 }' http://localhost:8000/queries.json\n\n```\n  </div>\n\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint engine_client.send_query({\"user\": \"1\", \"num\": 4})\n```\n  </div>\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EngineClient;\n\n$client = new EngineClient('http://localhost:8000');\n\n$response = $client->sendQuery(array('user'=> 1, 'num'=> 4));\nprint_r($response);\n\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Create client object.\nclient = PredictionIO::EngineClient.new(<ENGINE DEPLOY URL>)\n\n# Query PredictionIO.\nresponse = client.send_query('user' => <USER ID>, 'num' => <NUMBER (integer)>)\n\nputs response\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\nimport com.google.common.collect.ImmutableMap;\nimport com.google.gson.JsonObject;\n\nimport org.apache.predictionio.EngineClient;\n\n// create client object\nEngineClient engineClient = new EngineClient(<ENGINE DEPLOY URL>);\n\n// query\nJsonObject response = engineClient.sendQuery(ImmutableMap.<String, Object>of(\n        \"user\", \"1\",\n        \"num\",  4\n    ));\n```\n  </div>\n</div>\n\nThe following is sample JSON response:\n\n```\n{\n  \"itemScores\":[\n    {\"item\":\"22\",\"score\":4.072304374729956},\n    {\"item\":\"62\",\"score\":4.058482414005789},\n    {\"item\":\"75\",\"score\":4.046063009943821},\n    {\"item\":\"68\",\"score\":3.8153661512945325}\n  ]\n}\n```\n\nCongratulations, *MyRecommendation* is now running!\n\n<%= partial 'shared/quickstart/production' %>\n\n#### [Next: DASE Components Explained](/templates/recommendation/dase/)\n"
  },
  {
    "path": "docs/manual/source/templates/recommendation/reading-custom-events.html.md",
    "content": "---\ntitle: Reading Custom Events (Recommendation)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nYou can modify the [default DataSource](dase.html#data) to read\n\n- Custom events other than the default **rate** and **buy** events.\n- Events which involve different entity types other than the default **user** and **item**.\n\nYou can find the complete modified source code [here](https://github.com/apache/predictionio/tree/develop/examples/scala-parallel-recommendation/reading-custom-events).\n\n\n## Add the Custom Event\nTo read custom events, modify the function call `PEventStore.find()` in MyRecommendation/src/main/scala/***DataSource.scala***:\n\n- Specify the names of events in `eventNames` parameters\n- Specify the entity types involved in the events in the `entityType` and `targetEntityType` parameters accordingly\n\nIn this example below, we modify DataSource to read custom **like** and **dislike** events where a customer likes or dislikes a product. The event has new entityType **customer** and targetEntityType **product**:\n\n\n```scala\nval eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"customer\"), // MODIFIED\n      eventNames = Some(List(\"like\", \"dislike\")), // MODIFIED\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"product\")))(sc) // MODIFIED\n```\n\n## Map the Custom Event\n\nThe ALS algorithm uses `Rating` object as input, so it is necessary to specify the mapping of your custom event to the Rating object. You can do so in MyRecommendation/src/main/scala/***DataSource.scala***.\n\nTo map **like** and **dislike** event to a Rating object with value of 4 and 1, respectively :\n\n```scala\nval ratingsRDD: RDD[Rating] = eventsRDD.map { event =>\n      val rating = try {\n        val ratingValue: Double = event.event match {\n          // MODIFIED\n          case \"like\" => 4.0 // map a like event to a rating of 4.0\n          case \"dislike\" => 1.0  // map a like event to a rating of 1.0\n          case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n        }\n        // entityId and targetEntityId is String\n        Rating(event.entityId,\n          event.targetEntityId.get,\n          ratingValue)\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Cannot convert ${event} to Rating. Exception: ${e}.\")\n          throw e\n        }\n      }\n      rating\n    }.cache()\n```\n\nThat's it! Your engine can read custom **like** and **dislike** event.\n\n\n\n#### [Next: Customizing Data Preparator](customize-data-prep.html)\n"
  },
  {
    "path": "docs/manual/source/templates/recommendation/training-with-implicit-preference.html.md",
    "content": "---\ntitle: Training with Implicit Preference (Recommendation)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThere are two types of user preferences:\n\n- explicit preference (also referred as \"explicit feedback\"), such as \"rating\" given to item by users.\n- implicit preference (also referred as \"implicit feedback\"), such as \"view\" and \"buy\" history.\n\nMLlib ALS provides the `setImplicitPrefs()` function to set whether to use implicit preference. The ALS algorithm takes RDD[Rating] as training data input. The Rating class is defined in Spark MLlib library as:\n\n```\ncase class Rating(user: Int, product: Int, rating: Double)\n```\n\nBy default, the recommendation template sets `setImplicitPrefs()` to `false` which expects explicit rating values which the user has rated the item.\n\nTo handle implicit preference, you can set `setImplicitPrefs()` to `true`. In this case, the \"rating\" value input to ALS is used to calculate the confidence level that the user likes the item. Higher \"rating\" means a stronger indication that the user likes the item.\n\nThe following provides an example of using implicit preference. You can find the complete modified source code [here](https://github.com/apache/predictionio/tree/develop/examples/scala-parallel-recommendation/train-with-view-event).\n\n### Training with view events\n\nFor example, if the more number of times the user has viewed the item, the higher confidence that the user likes the item. We can aggregate the number of views and use this as the \"rating\" value.\n\nFirst, we can modify `DataSource.scala` to aggregate the number of views of the user on the same item:\n\n```scala\n\n  def getRatings(sc: SparkContext): RDD[Rating] = {\n\n    val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"view\")), // MODIFIED\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n\n    val ratingsRDD: RDD[Rating] = eventsRDD.map { event =>\n      try {\n        val ratingValue: Double = event.event match {\n          case \"view\" => 1.0 // MODIFIED\n          case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n        }\n        // MODIFIED\n        // key is (user id, item id)\n        // value is the rating value, which is 1.\n        ((event.entityId, event.targetEntityId.get), ratingValue)\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Cannot convert ${event} to Rating. Exception: ${e}.\")\n          throw e\n        }\n      }\n    }\n    // MODIFIED\n    // sum all values for the same user id and item id key\n    .reduceByKey { case (a, b) => a + b }\n    .map { case ((uid, iid), r) =>\n      Rating(uid, iid, r)\n    }.cache()\n\n    ratingsRDD\n  }\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n    new TrainingData(getRatings(sc))\n  }\n\n```\n\nNOTE: You may put the view count aggregation logic in `ALSAlgorithm`'s `train()` instead, depending on your needs.\n\n\nThen, we can modify ALSAlgorithm.scala to set `setImplicitPrefs` to `true`:\n\n```scala\n\nclass ALSAlgorithm(val ap: ALSAlgorithmParams)\n  extends PAlgorithm[PreparedData, ALSModel, Query, PredictedResult] {\n\n  ...\n\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n\n    ...\n\n    // If you only have one type of implicit event (Eg. \"view\" event only),\n    // set implicitPrefs to true\n    // MODIFIED\n    val implicitPrefs = true\n    val als = new ALS()\n    als.setUserBlocks(-1)\n    als.setProductBlocks(-1)\n    als.setRank(ap.rank)\n    als.setIterations(ap.numIterations)\n    als.setLambda(ap.lambda)\n    als.setImplicitPrefs(implicitPrefs)\n    als.setAlpha(1.0)\n    als.setSeed(seed)\n    als.setCheckpointInterval(10)\n    val m = als.run(mllibRatings)\n\n    new ALSModel(\n      rank = m.rank,\n      userFeatures = m.userFeatures,\n      productFeatures = m.productFeatures,\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap)\n  }\n\n  ...\n\n}\n\n```\n\nNow the recommendation engine can train a model with implicit preference events.\n\n#### [Next: Filter Recommended Items by Blacklist in Query](blacklist-items.html)\n"
  },
  {
    "path": "docs/manual/source/templates/similarproduct/dase.html.md.erb",
    "content": "---\ntitle: DASE Components Explained (Similar Product)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n<%= partial 'shared/dase/dase', locals: { template_name: 'Similar Product Engine Template' } %>\n\n## The Engine Design\n\nAs you can see from the Quick Start, *MySimilarProduct* takes a JSON prediction\nquery, e.g. `{ \"items\": [\"i1\"], \"num\": 4 }`, and return a JSON predicted result.\nIn MySimilarProduct/src/main/scala/***Engine.scala***, the `Query` case class\ndefines the format of such **query**:\n\n```scala\ncase class Query(\n  items: List[String],\n  num: Int,\n  categories: Option[Set[String]],\n  whiteList: Option[Set[String]],\n  blackList: Option[Set[String]]\n)\n```\n\nThe `PredictedResult` case class defines the format of **predicted result**,\nsuch as\n\n```json\n{\"itemScores\":[\n  {\"item\":22,\"score\":4.07},\n  {\"item\":62,\"score\":4.05},\n  {\"item\":75,\"score\":4.04},\n  {\"item\":68,\"score\":3.81}\n]}\n```\n\nwith:\n\n```scala\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n)\n\ncase class ItemScore(\n  item: String,\n  score: Double\n)\n```\n\nFinally, `SimilarProductEngine` is the *Engine Factory* that defines the\ncomponents this engine will use: Data Source, Data Preparator, Algorithm(s) and\nServing components.\n\n```scala\nobject SimilarProductEngine extends IEngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"als\" -> classOf[ALSAlgorithm]),\n      classOf[Serving])\n  }\n}\n```\n\n### Spark MLlib\n\nThe PredictionIO Similar Product Engine Template integrates Spark's MLlib ALS algorithm under the DASE\narchitecture. We will take a closer look at the DASE code below.\n\nThe MLlib ALS algorithm takes training data of RDD type, i.e. `RDD[Rating]` and train a model, which is a `MatrixFactorizationModel` object.\n\nYou can visit [here](https://spark.apache.org/docs/latest/mllib-collaborative-filtering.html) to learn more about MLlib's ALS collaborative filtering algorithm.\n\n\n## Data\n\nIn the DASE architecture, data is prepared by 2 components sequentially: *DataSource* and *DataPreparator*. They take data\nfrom the data store and prepare them for Algorithm.\n\n### Data Source\n\nIn MySimilarProduct/src/main/scala/***DataSource.scala***, the `readTraining`\nmethod of class `DataSource` reads and selects data from the *Event Store*\n(data store of the *Event Server*). It returns `TrainingData`.\n\n```scala\ncase class DataSourceParams(appName: String) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    // create a RDD of (entityID, User)\n    val usersRDD: RDD[(String, User)] = PEventStore.aggregateProperties(...) ...\n\n    // create a RDD of (entityID, Item)\n    val itemsRDD: RDD[(String, Item)] = PEventStore.aggregateProperties(...) ...\n\n    // get all \"user\" \"view\" \"item\" events\n    val viewEventsRDD: RDD[ViewEvent] = PEventStore.find(...) ...\n\n    new TrainingData(\n      users = usersRDD,\n      items = itemsRDD,\n      viewEvents = viewEventsRDD\n    )\n  }\n}\n```\n\nPredictionIO automatically loads the parameters of *datasource* specified in MySimilarProduct/***engine.json***, including *appName*, to `dsp`.\n\nIn ***engine.json***:\n\n```\n{\n  ...\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  ...\n}\n```\n\nIn `readTraining()`, `PEventStore` is an object which provides function to access data that is collected by PredictionIO Event Server.\n\nThis Similar Product Engine Template requires \"user\" and \"item\" entities that are set by events.\n\n`PEventStore.aggregateProperties(...)` aggregates properties of the `user` and `item` that are set, unset, or delete by special events **$set**, **$unset** and **$delete**. Please refer to [Event API](/datacollection/eventapi/#note-about-properties) for more details of using these events.\n\nThe following code aggregates the properties of `user` and then map each result to a `User()` object.\n\n```scala\n\n  // create a RDD of (entityID, User)\n  val usersRDD: RDD[(String, User)] = PEventStore.aggregateProperties(\n    appName = dsp.appName,\n    entityType = \"user\"\n  )(sc).map { case (entityId, properties) =>\n    val user = try {\n      User()\n    } catch {\n      case e: Exception => {\n        logger.error(s\"Failed to get properties ${properties} of\" +\n          s\" user ${entityId}. Exception: ${e}.\")\n        throw e\n      }\n    }\n    (entityId, user)\n  }.cache()\n\n```\nIn the template, `User()` object is a simple dummy as a placeholder for you to customize and expand.\n\n\nSimilarly, the following code aggregates `item` properties and then map each result to an `Item()` object. By default, this template assumes each item has an optional property `categories`, which is a list of String.\n\n```scala\n  // create a RDD of (entityID, Item)\n  val itemsRDD: RDD[(String, Item)] = PEventStore.aggregateProperties(\n    appName = dsp.appName,\n    entityType = \"item\"\n  )(sc).map { case (entityId, properties) =>\n    val item = try {\n      // Assume categories is optional property of item.\n      Item(categories = properties.getOpt[List[String]](\"categories\"))\n    } catch {\n      case e: Exception => {\n        logger.error(s\"Failed to get properties ${properties} of\" +\n          s\" item ${entityId}. Exception: ${e}.\")\n        throw e\n      }\n    }\n    (entityId, item)\n  }.cache()\n```\n\nThe `Item` case class is defined as\n\n```scala\ncase class Item(categories: Option[List[String]])\n```\n\n`PEventStore.find(...)` specifies the events that you want to read. In this case, \"user view item\" events are read and then each is mapped to a `ViewEvent()` object.\n\n```scala\n\n  // get all \"user\" \"view\" \"item\" events\n  val viewEventsRDD: RDD[ViewEvent] = PEventStore.find(\n    appName = dsp.appName,\n    entityType = Some(\"user\"),\n    eventNames = Some(List(\"view\")),\n    // targetEntityType is optional field of an event.\n    targetEntityType = Some(Some(\"item\")))(sc)\n    // PEventStore.find() returns RDD[Event]\n    .map { event =>\n      val viewEvent = try {\n        event.event match {\n          case \"view\" => ViewEvent(\n            user = event.entityId,\n            item = event.targetEntityId.get,\n            t = event.eventTime.getMillis)\n          case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n        }\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Cannot convert ${event} to ViewEvent.\" +\n            s\" Exception: ${e}.\")\n          throw e\n        }\n      }\n      viewEvent\n    }.cache()\n\n```\n\n`ViewEvent` case class is defined as:\n\n```scala\ncase class ViewEvent(user: String, item: String, t: Long)\n```\n\nINFO: For flexibility, this template is designed to support user ID and item ID in String.\n\n`TrainingData` contains an RDD of `User`, `Item` and `ViewEvent` objects. The class definition of `TrainingData` is:\n\n```scala\nclass TrainingData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent]\n) extends Serializable { ... }\n```\n\nPredictionIO then passes the returned `TrainingData` object to *Data Preparator*.\n\nNOTE: You could modify the DataSource to [read other event types](/templates/similarproduct/multi-events-multi-algos/) other than the default **view**.\n\n### Data Preparator\n\nIn MySimilarProduct/src/main/scala/***Preparator.scala***, the `prepare` method\nof class `Preparator` takes `TrainingData` as its input and performs any\nnecessary feature selection and data processing tasks. At the end, it returns\n`PreparedData` which should contain the data *Algorithm* needs.\n\nBy default, `prepare` simply copies the unprocessed `TrainingData` data to `PreparedData`:\n\n```scala\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(\n      users = trainingData.users,\n      items = trainingData.items,\n      viewEvents = trainingData.viewEvents)\n  }\n}\n\nclass PreparedData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent]\n) extends Serializable\n```\n\nPredictionIO passes the returned `PreparedData` object to Algorithm's `train` function.\n\n## Algorithm\n\nIn MySimilarProduct/src/main/scala/***ALSAlgorithm.scala***, the two methods of\nthe algorithm class are `train` and `predict`. `train` is responsible for\ntraining the predictive model;`predict` is\nresponsible for using this model to make prediction.\n\n### train(...)\n\n`train` is called when you run **pio train**. This is where MLlib ALS algorithm,\ni.e. `ALS.trainImplicit()`, is used to train a predictive model.\n\n\n```scala\n\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n    ...\n\n    // create User and item's String ID to integer index BiMap\n    val userStringIntMap = BiMap.stringInt(data.users.keys)\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    // collect Item as Map and convert ID to Int index\n    val items: Map[Int, Item] = data.items.map { case (id, item) =>\n      (itemStringIntMap(id), item)\n    }.collectAsMap.toMap\n\n    val mllibRatings = data.viewEvents\n      .map { r =>\n        // Convert user and item String IDs to Int index for MLlib\n        val uindex = userStringIntMap.getOrElse(r.user, -1)\n        val iindex = itemStringIntMap.getOrElse(r.item, -1)\n\n        if (uindex == -1)\n          logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n            + \" to Int index.\")\n\n        if (iindex == -1)\n          logger.info(s\"Couldn't convert nonexistent item ID ${r.item}\"\n            + \" to Int index.\")\n\n        ((uindex, iindex), 1)\n      }.filter { case ((u, i), v) =>\n        // keep events with valid user and item index\n        (u != -1) && (i != -1)\n      }.reduceByKey(_ + _) // aggregate all view events of same user-item pair\n      .map { case ((u, i), v) =>\n        // MLlibRating requires integer index for user and item\n        MLlibRating(u, i, v)\n      }\n      .cache()\n\n    // MLLib ALS cannot handle empty training data.\n    require(!mllibRatings.take(1).isEmpty,\n      s\"mllibRatings cannot be empty.\" +\n      \" Please check if your events contain valid user and item ID.\")\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    val m = ALS.trainImplicit(\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      alpha = 1.0,\n      seed = seed)\n\n    new ALSModel(\n      productFeatures = m.productFeatures.collectAsMap.toMap,\n      itemStringIntMap = itemStringIntMap,\n      items = items\n    )\n  }\n\n```\n\n#### Working with Spark MLlib's ALS.trainImplicit(....)\n\nMLlib ALS does not support `String` user ID and item ID. `ALS.trainImplicit` thus also assumes int-only `Rating` object. First, you can rename MLlib's Integer-only `Rating` to `MLlibRating` for clarity:\n\n```\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\n```\n\nIn order to use MLlib's ALS algorithm, we need to convert the `viewEvents` into `MLlibRating`. There are two things we need to handle:\n\n1. Map user and item String ID of the ViewEvent into Integer ID, as required by `MLlibRating`.\n2. `ViewEvent` object is an implicit event that does not have an explicit rating value. `ALS.trainImplicit()` supports implicit preference. If the `MLlibRating` has higher rating value, it means higher confidence that the user prefers the item. Hence we can aggregate how many times the user has viewed the item to indicate the confidence level that the user may prefer the item.\n\nYou create a bi-directional map with `BiMap.stringInt` which maps each String record to an Integer index.\n\n```scala\nval userStringIntMap = BiMap.stringInt(data.users.keys)\nval itemStringIntMap = BiMap.stringInt(data.items.keys)\n```\n\nThen convert the user and item String ID in each ViewEvent to Int with these BiMaps. We use default -1 if the user or item String ID couldn't be found in the BiMap and filter out these events with invalid user and item ID later. After filtering, we use `reduceByKey()` to add up all values for the same key (uindex, iindex) and then finally map to `MLlibRating` object.\n\n```scala\n\nval mllibRatings = data.viewEvents\n  .map { r =>\n    // Convert user and item String IDs to Int index for MLlib\n    val uindex = userStringIntMap.getOrElse(r.user, -1)\n    val iindex = itemStringIntMap.getOrElse(r.item, -1)\n\n    if (uindex == -1)\n      logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n        + \" to Int index.\")\n\n    if (iindex == -1)\n      logger.info(s\"Couldn't convert nonexistent item ID ${r.item}\"\n        + \" to Int index.\")\n\n    ((uindex, iindex), 1)\n  }.filter { case ((u, i), v) =>\n    // keep events with valid user and item index\n    (u != -1) && (i != -1)\n  }.reduceByKey(_ + _) // aggregate all view events of same user-item pair\n  .map { case ((u, i), v) =>\n    // MLlibRating requires integer index for user and item\n    MLlibRating(u, i, v)\n  }\n\n```\n\nIn addition to `RDD[MLlibRating]`, `ALS.trainImplicit` takes the following parameters: *rank*, *iterations*, *lambda* and *seed*.\n\nThe values of these parameters are specified in *algorithms* of\nMySimilarProduct/***engine.json***:\n\n```\n{\n  ...\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\": 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n  ...\n}\n```\n\nPredictionIO will automatically loads these values into the constructor `ap`,\nwhich has a corresponding case class `ALSAlgorithmParams`:\n\n```scala\ncase class ALSAlgorithmParams(\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]) extends Params\n```\n\nThe `seed` parameter is an optional parameter, which is used by MLlib ALS algorithm internally to generate random values. If the `seed` is not specified, current system time would be used and hence each train may produce different results. Specify a fixed value for the `seed` if you want to have deterministic result (For example, when you are testing).\n\n`ALS.trainImplicit()` then returns a `MatrixFactorizationModel` model which contains two RDDs: userFeatures and productFeatures. They correspond to the user X latent features matrix and item X latent features matrix, respectively. In this case, we will make use of the productFeatures matrix to find similar products by comparing the similarity of the latent features. Hence, we store this productFeatures as defined in `ALSModel` class:\n\n```scala\nclass ALSModel(\n  val productFeatures: Map[Int, Array[Double]],\n  val itemStringIntMap: BiMap[String, Int],\n  val items: Map[Int, Item]\n) extends Serializable { ... }\n```\n\nPredictionIO will automatically store the returned model, i.e. `ALSModel` in this example.\n\n### predict(...)\n\n`predict` is called when you send a JSON query to\nhttp://localhost:8000/queries.json. PredictionIO converts the query, such as `{ \"items\": [\"i1\"], \"num\": 4 }` to the `Query` class you defined previously.\n\nWe can use the productFeatures stored in ALSModel to calculate the similarity between the items in query and other items. Cosine Similarity is used in this case.\n\nThis template also supports additional business logic features, such as filtering items by categories, recommending items in the white list or excluding items in the black list.\n\nThe `predict()` function first calculate the similarities scores of the queries items in query versus all other items and then filtering items satisfying the `isCandidate()` condition. Then we take the top N items.\n\nINFO: You can easily modify `isCandidate()` checking or `whiteList` generation if you have different requirements or condition to determine if an item is a candidate item to be recommended.\n\n```scala\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n\n    val productFeatures = model.productFeatures\n\n    // convert items to Int index\n    val queryList: Set[Int] = query.items.map(model.itemStringIntMap.get(_))\n      .flatten.toSet\n\n    val queryFeatures: Vector[Array[Double]] = queryList.toVector\n      // productFeatures may not contain the requested item\n      .map { item => productFeatures.get(item) }\n      .flatten\n\n    val whiteList: Option[Set[Int]] = query.whiteList.map( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n    val blackList: Option[Set[Int]] = query.blackList.map ( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n\n    val ord = Ordering.by[(Int, Double), Double](_._2).reverse\n\n    val indexScores: Array[(Int, Double)] = if (queryFeatures.isEmpty) {\n      logger.info(s\"No productFeatures vector for query items ${query.items}.\")\n      Array[(Int, Double)]()\n    } else {\n      productFeatures.par // convert to parallel collection\n        .mapValues { f =>\n          queryFeatures.map{ qf =>\n            cosine(qf, f)\n          }.reduce(_ + _)\n        }\n        .filter(_._2 > 0) // keep items with score > 0\n        .seq // convert back to sequential collection\n        .toArray\n    }\n\n    val filteredScore = indexScores.view.filter { case (i, v) =>\n      isCandidateItem(\n        i = i,\n        items = model.items,\n        categories = query.categories,\n        queryList = queryList,\n        whiteList = whiteList,\n        blackList = blackList\n      )\n    }\n\n    val topScores = getTopN(filteredScore, query.num)(ord).toArray\n\n    val itemScores = topScores.map { case (i, s) =>\n      ItemScore(\n        item = model.itemIntStringMap(i),\n        score = s\n      )\n    }\n\n    PredictedResult(itemScores)\n  }\n```\n\nNote that the item IDs in top N results are the `Int` indices. You map them back to `String` with `itemIntStringMap` before they are returned:\n\n```scala\n  val itemScores = topScores.map { case (i, s) =>\n    ItemScore(\n      item = model.itemIntStringMap(i),\n      score = s\n    )\n  }\n\n  PredictedResult(itemScores)\n```\n\nPredictionIO passes the returned `PredictedResult` object to *Serving*.\n\n## Serving\n\nThe `serve` method of class `Serving` processes predicted result. It is also\nresponsible for combining multiple predicted results into one if you have more\nthan one predictive model. *Serving* then returns the final predicted result.\nPredictionIO will convert it to a JSON response automatically.\n\nIn MySimilarProduct/src/main/scala/***Serving.scala***,\n\n```scala\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n```\n\nWhen you send a JSON query to http://localhost:8000/queries.json,\n`PredictedResult` from all models will be passed to `serve` as a sequence, i.e.\n`Seq[PredictedResult]`.\n\n> An engine can train multiple models if you specify more than one Algorithm\ncomponent in `object RecommendationEngine` inside ***Engine.scala***. Since only\none `ALSAlgorithm` is implemented by default, this `Seq` contains one element.\n\n\n#### [Next: Multiple Events and Multiple Algorithms](multi-events-multi-algos.html)\n"
  },
  {
    "path": "docs/manual/source/templates/similarproduct/how-to.html.md",
    "content": "---\ntitle: How-To  (Similar Product)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nHere are the pages that show you how you can customize the Similar Product engine template.\n\n- [Multiple Events and Multiple Algorithms](/templates/similarproduct/multi-events-multi-algos/)\n- [Returns Item Properties](/templates/similarproduct/return-item-properties/)\n- [Train with Rate Event](/templates/similarproduct/train-with-rate-event/)\n- [Get Rid of Events for Users](/templates/similarproduct/rid-user-set-event/)\n- [Recommend Users](/templates/similarproduct/recommended-user/)\n"
  },
  {
    "path": "docs/manual/source/templates/similarproduct/multi-events-multi-algos.html.md.erb",
    "content": "---\ntitle: Multiple Events and Multiple Algorithms\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis is more advanced example, we recommend you go through the [DASE](dase.html) explanation first.\n\nThe [default algorithm described in DASE](dase.html#algorithm) uses user-to-item view events as training data. However, your application may have more than one type of events which you want to take into account, such as buy, rate and like events. One way to incorporate other types of events to improve the system is to add another algorithm to process these events, build a separated model and then combine the outputs of multiple algorithms during Serving.\n\nIn this example, we will add another algorithm to process like/dislike events. The final PredictedResults will be the combined outputs of both algorithms.\n\nNOTE: This is just one of the ways to handle multiple types of events. We use this use case to demonstrate how one can build an engine with multiple algorithms. You may also build one single algorithm which takes different events into account without using multiple algorithms.\n\nThis example will demonstrate the following:\n\n- Read multiple types of events\n- Use positive and negative implicit events such as like and dislike with MLlib ALS algorithm\n- Integrate multiple algorithms into one engine\n\nThe complete source code of this example can be found in [here](https://github.com/apache/predictionio/tree/develop/examples/scala-parallel-similarproduct/multi-events-multi-algos).\n\n### Step 1. Read \"like\" and \"dislike\" events as TrainingData\n\nModify the following in DataSource.scala:\n\n- In addition to the original `ViewEvent` class, add a new class `LikeEvent` which has a boolean `like` field to represent it's like or dislike event.\n- Add a new field `likeEvents` into `TrainingData` class to store the `RDD[LikeEvent]`.\n- Modify DataSource's `readTraining()` function to read \"like\" and \"dislike\" events from the Event Store.\n\nThe modification is shown below:\n\n```scala\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    ...\n\n    // get all \"user\" \"view\" \"item\" events\n    val viewEventsRDD: RDD[ViewEvent] = ...\n\n    // ADDED\n    // get all \"user\" \"like\" and \"dislike\" \"item\" events\n    val likeEventsRDD: RDD[LikeEvent] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"like\", \"dislike\")),\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n      // eventsDb.find() returns RDD[Event]\n      .map { event =>\n        val likeEvent = try {\n          event.event match {\n            case \"like\" | \"dislike\" => LikeEvent(\n              user = event.entityId,\n              item = event.targetEntityId.get,\n              t = event.eventTime.getMillis,\n              like = (event.event == \"like\"))\n            case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n          }\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Cannot convert ${event} to LikeEvent.\" +\n              s\" Exception: ${e}.\")\n            throw e\n          }\n        }\n        likeEvent\n      }.cache()\n\n    new TrainingData(\n      users = usersRDD,\n      items = itemsRDD,\n      viewEvents = viewEventsRDD,\n      likeEvents = likeEventsRDD // ADDED\n    )\n  }\n}\n\n...\n\ncase class LikeEvent( // ADDED\n  user: String,\n  item: String,\n  t: Long,\n  like: Boolean // true: like. false: dislike\n)\n\nclass TrainingData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent],\n  val likeEvents: RDD[LikeEvent] // ADDED\n) extends Serializable {\n  override def toString = {\n    s\"users: [${users.count()} (${users.take(2).toList}...)]\" +\n    s\"items: [${items.count()} (${items.take(2).toList}...)]\" +\n    s\"viewEvents: [${viewEvents.count()}] (${viewEvents.take(2).toList}...)\" +\n    // ADDED\n    s\"likeEvents: [${likeEvents.count()}] (${likeEvents.take(2).toList}...)\"\n  }\n}\n```\n\n### Step 2. Modify Preparator and PreparedData\n\nIn Preparator.scala, simply pass the newly added `likeEvents` from `TrainingData` to `PreparedData`, as shown the code below:\n\n```scala\n\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(\n      users = trainingData.users,\n      items = trainingData.items,\n      viewEvents = trainingData.viewEvents,\n      likeEvents = trainingData.likeEvents) // ADDED\n  }\n}\n\nclass PreparedData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent],\n  val likeEvents: RDD[LikeEvent] // ADDED\n) extends Serializable\n\n```\n\n### Step 3. Add a new algorithm to train model with `likeEvents`\n\nFor demonstration purpose, we also use MLlib ALS to train model with `likeEvents` and hence the new algorithm class will share many common logic of the original algorithm. The only difference will be the `train()` function - the original one trains model with `viewEvents` while the new one uses `likeEvents`. In this case, we can simply create a new algorithm which extends the original `ALSAlgorithm` class and override the `train()` function.\n\nNOTE: You may also create a completely new algorithm to process `likeEvents` or other types of events without extending any existing algorithm, or simply modifying the same algorithm to take new events into account.\n\n#### Using positive and negative implicit events (without explicit rating) with MLlib ALS\n\nIn the original `ALSAlgorithm`, the `train()` function calculates the number of times that the user has viewed the same item and then map it to `MLlibRating` object. However, like/dislike event is boolean and one time preference, so it doesn't make sense to aggregate the events if the user has multiple like/dislike events on the same item. However, a user may like an item and change her mind to dislike the same item later, or vice versa. In this case, we could simply use the latest like or dislike event of the user for the same item.\n\nIn addition, MLlib ALS can handle negative preference with `ALS.trainImplicit()`. Hence, we can map a dislike to rating of -1 and like to 1.\n\nINFO: Negative preference does not work if use `ALS.train()` instead, which is for explicit rating such as \"rate\" event.\n\nIn summary, this new `LikeAlgorithm` does the following:\n\n- Extends original `ALSAlgorithm` class\n- Override `train()` to process the `likeEvents` in `PreparedData`\n- Use the latest event if the user likes/dislikes the same item multiple times\n- Map dislike to a `MLlibRating` object with rating of -1 and like to rating of 1\n- Use the `MLlibRating` to train the `ALSModel` in the same way as the original `ALSAlgorithm`\n- The `predict()` function is the same as the original `ALSAlgorithm`\n\nIt is shown in the code below:\n\n```scala\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.mllib.recommendation.ALS\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\n\nimport grizzled.slf4j.Logger\n\n// ADDED\n// Extend original ALSAlgorithm and override train() function to handle\n// like and dislike events\nclass LikeAlgorithm(ap: ALSAlgorithmParams) extends ALSAlgorithm(ap) {\n\n  @transient lazy override val logger = Logger[this.type]\n\n  override\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n    require(!data.likeEvents.take(1).isEmpty,\n      s\"likeEvents in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(!data.users.take(1).isEmpty,\n      s\"users in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(!data.items.take(1).isEmpty,\n      s\"items in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    // create User and item's String ID to integer index BiMap\n    val userStringIntMap = BiMap.stringInt(data.users.keys)\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    // collect Item as Map and convert ID to Int index\n    val items: Map[Int, Item] = data.items.map { case (id, item) =>\n      (itemStringIntMap(id), item)\n    }.collectAsMap.toMap\n\n    val mllibRatings = data.likeEvents\n      .map { r =>\n        // Convert user and item String IDs to Int index for MLlib\n        val uindex = userStringIntMap.getOrElse(r.user, -1)\n        val iindex = itemStringIntMap.getOrElse(r.item, -1)\n\n        if (uindex == -1)\n          logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n            + \" to Int index.\")\n\n        if (iindex == -1)\n          logger.info(s\"Couldn't convert nonexistent item ID ${r.item}\"\n            + \" to Int index.\")\n\n        // key is (uindex, iindex) tuple, value is (like, t) tuple\n        ((uindex, iindex), (r.like, r.t))\n      }.filter { case ((u, i), v) =>\n        // keep events with valid user and item index\n        (u != -1) && (i != -1)\n      }.reduceByKey { case (v1, v2) => // MODIFIED\n        // An user may like an item and change to dislike it later,\n        // or vice versa. Use the latest value for this case.\n        val (like1, t1) = v1\n        val (like2, t2) = v2\n        // keep the latest value\n        if (t1 > t2) v1 else v2\n      }.map { case ((u, i), (like, t)) => // MODIFIED\n        // With ALS.trainImplicit(), we can use negative value to indicate\n        // nagative siginal (ie. dislike)\n        val r = if (like) 1 else -1\n        // MLlibRating requires integer index for user and item\n        MLlibRating(u, i, r)\n      }\n      .cache()\n\n    // MLLib ALS cannot handle empty training data.\n    require(!mllibRatings.take(1).isEmpty,\n      s\"mllibRatings cannot be empty.\" +\n      \" Please check if your events contain valid user and item ID.\")\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    val m = ALS.trainImplicit(\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      alpha = 1.0,\n      seed = seed)\n\n    new ALSModel(\n      productFeatures = m.productFeatures.collectAsMap.toMap,\n      itemStringIntMap = itemStringIntMap,\n      items = items\n    )\n  }\n\n}\n\n```\n\n### Step 4. Modify Serving to combine multiple algorithms' outputs\n\nWhen the engine is deployed, the Query is sent to all algorithms of the engine. The PredictedResults returned by all algorithms are passed to Serving component for further processing, as you could see that the argument `predictedResults` of the `serve()` function is type of `Seq[PredictedResult]`.\n\nIn this example, the `serve()` function at first standardizes the PredictedResults of each algorithm so that we can combine the scores of multiple algorithms by adding the scores of the same item. Then we can take the top N items as defined in `query`.\n\nModify Serving.scala as shown below:\n\n```scala\n\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n\n    // MODFIED\n    val standard: Seq[Array[ItemScore]] = if (query.num == 1) {\n      // if query 1 item, don't standardize\n      predictedResults.map(_.itemScores)\n    } else {\n      // Standardize the score before combine\n      val mvList: Seq[MeanAndVariance] = predictedResults.map { pr =>\n        meanAndVariance(pr.itemScores.map(_.score))\n      }\n\n      predictedResults.zipWithIndex\n        .map { case (pr, i) =>\n          pr.itemScores.map { is =>\n            // standardize score (z-score)\n            // if standard deviation is 0 (when all items have the same score,\n            // meaning all items are ranked equally), return 0.\n            val score = if (mvList(i).stdDev == 0) {\n              0\n            } else {\n              (is.score - mvList(i).mean) / mvList(i).stdDev\n            }\n\n            ItemScore(is.item, score)\n          }\n        }\n    }\n\n    // sum the standardized score if same item\n    val combined = standard.flatten // Array of ItemScore\n      .groupBy(_.item) // groupBy item id\n      .mapValues(itemScores => itemScores.map(_.score).reduce(_ + _))\n      .toArray // array of (item id, score)\n      .sortBy(_._2)(Ordering.Double.reverse)\n      .take(query.num)\n      .map { case (k,v) => ItemScore(k, v) }\n\n    PredictedResult(combined)\n  }\n}\n\n```\n\nNOTE: You may combine results of multiple algorithms in different ways based on your requirements.\n\n\n### Step 5. Modify Engine.scala and engine.json\n\nModify Engine.scala to include the new algorithm `LikeAlgorithm` class and give it the name `\"likealgo\"` as shown below:\n\n\n```scala\n...\n\nobject SimilarProductEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\n        \"als\" -> classOf[ALSAlgorithm],\n        \"cooccurrence\" -> classOf[CooccurrenceAlgorithm],\n        \"likealgo\" -> classOf[LikeAlgorithm]), // ADDED\n      classOf[Serving])\n  }\n}\n\n...\n\n```\n\nNext, in order to train and deploy two algorithms for this engine, we also need to modify engine.json to include the new algorithm. The `\"algorithms\"` parameter is an array of each algorithm's name and parameters. By default, it has the one algorithm `\"als\"` and its parameter. Add another JSON for the new algorithm named `\"likealgo\"` and its parameter to the `\"algorithms\"` array, as shown below:\n\n\n```json\n\n{\n  ...\n\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\" : 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    },\n    {\n      \"name\": \"likealgo\",\n      \"params\": {\n        \"rank\": 8,\n        \"numIterations\" : 15,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n}\n\n```\n\nINFO: You may notice that the parameters of the new `\"likealgo\"` contains the same fields as `\"als\"`. It is just because the `LikeAlgorithm` class extends the original `ALSAlgorithm` class and shares the same algorithm parameter class definition. If the other algorithm you add has its own parameter class, you just need to specify them inside its `params` field accordingly.\n\nThat's it! Now you have a engine configured with two algorithms.\n\n\n### Sample data with \"like\" and \"dislike\" events\n\nFor demonstration purpose, a sample import script is also provided for you to quickly test this engine. The script is modified from the original one used in [Quick Start](quickstart.html#import-more-sample-data) with the addition of importing like and dislike events.\n\nYou could find the import script in `data/import_eventserver.py`.\n\nMake sure you are under the App directory. Execute the following to import the data (Replace the value of access_key parameter with your **Access Key**):\n\n```\n$ python data/import_eventserver.py --access_key 3mZWDzci2D5YsqAnqNnXH9SB6Rg3dsTBs8iHkK6X2i54IQsIZI1eEeQQyMfs7b3F\n```\n\nWARNING: If you see error **TypeError: __init__() got an unexpected keyword argument 'access_key'**,\nplease update the Python SDK to the latest version.\n\n\nYou are ready to run pio build, train and deploy as described in the [Quick Start](quickstart.html#5.-deploy-the-engine-as-a-service).\n"
  },
  {
    "path": "docs/manual/source/templates/similarproduct/quickstart.html.md.erb",
    "content": "---\ntitle: Quick Start - Similar Product Engine Template\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Overview\n\nThis engine template recommends products that are \"similar\" to the input product(s).\nSimilarity is not defined by user or item attributes but by users' previous actions. By default, it uses 'view' action such that product A and B are considered similar if most users who view A also view B. The template can be customized to support other action types such as buy, rate, like..etc.\n\nThis template is ideal for recommending products to customers based on their recent actions.\nUsing the IDs of the recently viewed products of a customer as the *Query*,\nthe engine will predict other products that this customer may also like.\n\nThis approach works perfectly for customers who are **first-time visitors** or have not signed in.\nRecommendations are made dynamically in *real-time* based on the most recent product preference you provide in the *Query*.\nYou can, therefore, recommend products to visitors without knowing a long history about them.\n\nYou can also use this template to build the popular feature of Amazon: **\"Customers Who Viewed This Item Also Viewed...\"** quickly.\nHelp your customers explore more products that they like, and sell more products.\n\n## Usage\n\n### Event Data Requirements\n\nBy default, this template takes the following data from Event Server as Training Data:\n\n- User *$set* events\n- Item *$set* events with *categories* properties\n- Users' *view* item events\n\nINFO: This template can easily be customized to consider more user events such as *buy*, *rate* and *like*.\nYou can offer features like \"Customers Who Bought This Item Also Bought....\".\n\n### Input Query\n\n- List of ItemIDs, which are the targeted products\n- N (number of items to be recommended)\n- List of white-listed item categories (optional)\n- List of white-listed ItemIds (optional)\n- List of black-listed ItemIds (optional)\n\nThe template also supports black-list and white-list. If a white-list is provided, the engine will include only those products in the recommendation.\nLikewise, if a black-list is provided, the engine will exclude those products in the recommendation.\n\n### Output PredictedResult\n\n- a ranked list of recommended itemIDs\n\n## 1. Install and Run PredictionIO\n\n<%= partial 'shared/quickstart/install' %>\n\n## 2. Create a new Engine from an Engine Template\n\n<%= partial 'shared/quickstart/create_engine', locals: { engine_name: 'MySimilarProduct', template_name: 'Similar Product Engine Template', template_repo: 'apache/predictionio-template-similar-product' } %>\n\n## 3. Generate an App ID and Access Key\n\n<%= partial 'shared/quickstart/create_app' %>\n\n## 4. Collecting Data\n\nNext, let's collect some training data for the app of this Engine. By default,\nthe Similar Product Engine Template supports 2 types of entities: **user** and\n**item**, and event **view**. An item has the **categories** property, which is a list of category names (String). A user can view an item. Respectively, this template requires '$set' user event, '$set' item event, and user-view-item events.\n\nINFO: This template can easily be customized to consider more user events such as *buy*, *rate* and *like*.\n\n<%= partial 'shared/quickstart/collect_data' %>\n\nFor example, when a new user with id \"u0\" is created in your app on time `2014-11-02T09:39:45.618-08:00` (current time will be used if eventTime is not specified), you can send a `$set` event for this user. To send this event, run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u0\",\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\nfrom datetime import datetime\n\nclient = predictionio.EventClient(\n  access_key=<ACCESS KEY>,\n  url=<URL OF EVENTSERVER>,\n  threads=5,\n  qsize=500\n)\n\n# Create a new user\n\nclient.create_event(\n  event=\"$set\",\n  entity_type=\"user\",\n  entity_id=<USER_ID>,\n  # current time will be used if event_time is not specified\n  event_time=datetime(\n    2014, 11, 02, 09, 39, 45, 618000, pytz.timezone('US/Pacific')\n)\n```\n  </div>\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EventClient;\n\n$client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// Create a new user\n$client->createEvent(array(\n  'event' => '$set',\n  'entityType' => 'user',\n  'entityId' => <USER ID>\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Create a client object.\nclient = PredictionIO::EventClient.new(<ACCESS KEY>, <URL OF EVENTSERVER>)\n\n# Create a new user\nclient.create_event(\n  '$set',\n  'user',\n  <USER ID>\n)\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\nimport org.apache.predictionio.Event;\nimport org.apache.predictionio.EventClient;\n\nimport com.google.common.collect.ImmutableList;\n\nEventClient client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);\n\n// Create a new user\nEvent userEvent = new Event()\n  .event(\"$set\")\n  .entityType(\"user\")\n  .entityId(<USER_ID>);\nclient.createEvent(userEvent);\n```\n  </div>\n</div>\n\nWhen a new item \"i0\" is created in your app on time `2014-11-02T09:39:45.618-08:00` (current time will be used if eventTime is not specified), you can send a `$set` event for the item. Note that the item is set with categories properties: `\"c1\"` and `\"c2\"`. Run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"$set\",\n  \"entityType\" : \"item\",\n  \"entityId\" : \"i0\",\n  \"properties\" : {\n    \"categories\" : [\"c1\", \"c2\"]\n  }\n  \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# Create a new item or set existing item's categories\n\nclient.create_event(\n  event=\"$set\",\n  entity_type=\"item\",\n  entity_id=item_id,\n  properties={\n    \"categories\" : [\"<CATEGORY_1>\", \"<CATEGORY_2>\"]\n  }\n)\n```\n  </div>\n\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n\n```php\n<?php\n\n// Create a new item or set existing item's categories\n$client->createEvent(array(\n  'event' => '$set',\n  'entityType' => 'item',\n  'entityId' => <ITEM ID>\n  'properties' => array('categories' => array('<CATEGORY_1>', '<CATEGORY_2>'))\n));\n\n?>\n```\n  </div>\n\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Create a new item or set existing item's categories\nclient.create_event(\n  '$set',\n  'item',\n  <ITEM ID>, {\n    'properties' => { 'categories' => ['<CATEGORY_1>', '<CATEGORY_2>'] }\n  }\n)\n\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\n// Create a new item or set existing item's categories\nEvent itemEvent = new Event()\n  .event(\"$set\")\n  .entityType(\"item\")\n  .entityId(<ITEM_ID>)\n  .property(\"categories\", ImmutableList.of(\"<CATEGORY_1>\", \"<CATEGORY_2>\"));\nclient.createEvent(itemEvent)\n```\n  </div>\n\n</div>\n\nWhen the user \"u0\" view item \"i0\" on time `2014-11-10T12:34:56.123-08:00` (current time will be used if eventTime is not specified), you can send a view event. Run the following `curl` command:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n\n```\n$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \\\n-H \"Content-Type: application/json\" \\\n-d '{\n  \"event\" : \"view\",\n  \"entityType\" : \"user\",\n  \"entityId\" : \"u0\",\n  \"targetEntityType\" : \"item\",\n  \"targetEntityId\" : \"i0\",\n  \"eventTime\" : \"2014-11-10T12:34:56.123-08:00\"\n}'\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\n# A user views an item\n\nclient.create_event(\n  event=\"view\",\n  entity_type=\"user\",\n  entity_id=<USER ID>,\n  target_entity_type=\"item\",\n  target_entity_id=<ITEM ID>\n)\n```\n  </div>\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n\n```php\n<?php\n// A user views an item\n$client->createEvent(array(\n   'event' => 'view',\n   'entityType' => 'user',\n   'entityId' => <USER ID>,\n   'targetEntityType' => 'item',\n   'targetEntityId' => <ITEM ID>\n));\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# A user views an item.\nclient.create_event(\n  'view',\n  'user',\n  <USER ID>, {\n    'targetEntityType' => 'item',\n    'targetEntityId' => <ITEM ID>\n  }\n)\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n\n```java\n// A user views an item\nEvent viewEvent = new Event()\n    .event(\"view\")\n    .entityType(\"user\")\n    .entityId(<USER_ID>)\n    .targetEntityType(\"item\")\n    .targetEntityId(<ITEM_ID>);\nclient.createEvent(viewEvent);\n\n```\n  </div>\n</div>\n\n<%= partial 'shared/quickstart/query_eventserver' %>\n\n### Import More Sample Data\n\n<%= partial 'shared/quickstart/import_sample_data' %>\n\nA Python import script `import_eventserver.py` is provided to import sample data. It imports 10 users (with user ID \"u1\" to \"u10\") and 50 items (with item ID \"i1\" to \"i50\") with some random assigned categories ( with categories \"c1\" to \"c6\"). Each user then randomly view 10 items.\n\n<%= partial 'shared/quickstart/install_python_sdk' %>\n\nMake sure you are under the `MySimilarProduct` directory. Execute the following to import the data:\n\n```\n$ cd MySimilarProduct\n$ python data/import_eventserver.py --access_key $ACCESS_KEY\n```\n\nYou should see the following output:\n\n```\n...\nUser u10 views item i20\nUser u10 views item i17\nUser u10 views item i22\nUser u10 views item i31\nUser u10 views item i18\nUser u10 views item i29\n160 events are imported.\n```\n\n<%= partial 'shared/quickstart/query_eventserver_short' %>\n\n## 5. Deploy the Engine as a Service\n\n<%= partial 'shared/quickstart/deploy_enginejson', locals: { engine_name: 'MySimilarProduct' } %>\n\n<%= partial 'shared/quickstart/deploy', locals: { engine_name: 'MySimilarProduct' } %>\n\n## 6. Use the Engine\n\nNow, You can retrieve predicted results. To retrieve 4 items which are similar to item ID \"i1\". You send this JSON `{ \"items\": [\"i1\"], \"num\": 4 }` to the deployed engine and it will return a JSON of the recommended items. Simply send a query by making a HTTP request or through the `EngineClient` of an SDK.\n\nWith the deployed engine running, open another terminal and run the following `curl` command or use SDK to send the query:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{ \"items\": [\"i1\"], \"num\": 4 }' \\\nhttp://localhost:8000/queries.json\n\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint engine_client.send_query({\"items\": [\"i1\"], \"num\": 4})\n```\n  </div>\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EngineClient;\n\n$client = new EngineClient('http://localhost:8000');\n\n$response = $client->sendQuery(array('items'=> array('i1'), 'num'=> 4));\nprint_r($response);\n\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n\n```ruby\n# Create client object.\nclient = PredictionIO::EngineClient.new('http://localhost:8000')\n\n# Query PredictionIO.\nresponse = client.send_query('items' => ['i1'], 'num' => 4)\n\nputs response\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n\n```java\nimport com.google.common.collect.ImmutableMap;\nimport com.google.common.collect.ImmutableList;\nimport com.google.gson.JsonObject;\n\nimport org.apache.predictionio.EngineClient;\n\n// create client object\nEngineClient engineClient = new EngineClient(\"http://localhost:8000\");\n\n// query\n\nJsonObject response = engineClient.sendQuery(ImmutableMap.<String, Object>of(\n        \"items\", ImmutableList.of(\"i1\"),\n        \"num\",  4\n    ));\n```\n  </div>\n</div>\n\nThe following is sample JSON response:\n\n```\n{\n  \"itemScores\":[\n    {\"item\":\"i43\",\"score\":0.7071067811865475},\n    {\"item\":\"i21\",\"score\":0.7071067811865475},\n    {\"item\":\"i46\",\"score\":0.5773502691896258},\n    {\"item\":\"i8\",\"score\":0.5773502691896258}\n  ]\n}\n```\n\n*MySimilarProduct* is now running.\n\n<%= partial 'shared/quickstart/production' %>\n\n\n## Advanced Query\n\n### Recommend items which are similar to multiple items:\n\n```\ncurl -H \"Content-Type: application/json\" \\\n-d '{ \"items\": [\"i1\", \"i3\"], \"num\": 10}' \\\nhttp://localhost:8000/queries.json\n\n{\"itemScores\":[{\"item\":\"i12\",\"score\":1.1700499715209998},{\"item\":\"i21\",\"score\":1.1153550716504106},{\"item\":\"i43\",\"score\":1.1153550716504106},{\"item\":\"i14\",\"score\":1.0773502691896257},{\"item\":\"i39\",\"score\":1.0773502691896257},{\"item\":\"i26\",\"score\":1.0773502691896257},{\"item\":\"i44\",\"score\":1.0773502691896257},{\"item\":\"i38\",\"score\":0.9553418012614798},{\"item\":\"i36\",\"score\":0.9106836025229592},{\"item\":\"i46\",\"score\":0.9106836025229592}]}\n```\n\nIn addition, the Query support the following optional parameters `categories`, `whiteList` and `blackList`.\n\n### Recommend items in selected categories:\n\n```\ncurl -H \"Content-Type: application/json\" \\\n-d '{\n  \"items\": [\"i1\", \"i3\"],\n  \"num\": 10,\n  \"categories\" : [\"c4\", \"c3\"]\n}' \\\nhttp://localhost:8000/queries.json\n\n{\"itemScores\":[{\"item\":\"i21\",\"score\":1.1153550716504106},{\"item\":\"i14\",\"score\":1.0773502691896257},{\"item\":\"i26\",\"score\":1.0773502691896257},{\"item\":\"i39\",\"score\":1.0773502691896257},{\"item\":\"i44\",\"score\":1.0773502691896257},{\"item\":\"i45\",\"score\":0.7886751345948129},{\"item\":\"i47\",\"score\":0.7618016810571367},{\"item\":\"i9\",\"score\":0.7618016810571367},{\"item\":\"i28\",\"score\":0.7618016810571367},{\"item\":\"i6\",\"score\":0.7618016810571367}]}\n```\n\n### Recommend items in the whiteList:\n\n```\ncurl -H \"Content-Type: application/json\" \\\n-d '{\n  \"items\": [\"i1\", \"i3\"],\n  \"num\": 10,\n  \"categories\" : [\"c4\", \"c3\"],\n  \"whiteList\": [\"i21\", \"i26\", \"i40\"]\n}' \\\nhttp://localhost:8000/queries.json\n\n{\"itemScores\":[{\"item\":\"i21\",\"score\":1.1153550716504106},{\"item\":\"i26\",\"score\":1.0773502691896257}]}\n```\n\n### Recommend items not in the blackList:\n\n```\ncurl -H \"Content-Type: application/json\" \\\n-d '{\n  \"items\": [\"i1\", \"i3\"],\n  \"num\": 10,\n  \"categories\" : [\"c4\", \"c3\"],\n  \"blackList\": [\"i21\", \"i26\", \"i40\"]\n}' \\\nhttp://localhost:8000/queries.json\n\n{\"itemScores\":[{\"item\":\"i39\",\"score\":1.0773502691896257},{\"item\":\"i44\",\"score\":1.0773502691896257},{\"item\":\"i14\",\"score\":1.0773502691896257},{\"item\":\"i45\",\"score\":0.7886751345948129},{\"item\":\"i47\",\"score\":0.7618016810571367},{\"item\":\"i6\",\"score\":0.7618016810571367},{\"item\":\"i28\",\"score\":0.7618016810571367},{\"item\":\"i9\",\"score\":0.7618016810571367},{\"item\":\"i29\",\"score\":0.6220084679281463},{\"item\":\"i30\",\"score\":0.5386751345948129}]}\n```\n\n#### [Next: DASE Components Explained](/templates/similarproduct/dase/)\n"
  },
  {
    "path": "docs/manual/source/templates/similarproduct/recommended-user.html.md.erb",
    "content": "---\ntitle: Recommend Users (Similar Product)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis examples demonstrates how to recommend users instead of items.\n\nInstead of using user-to-item events to find similar items, user-to-user events are used to find similar users you may also follow, like, etc (depending on which events are used in training and how the events are used). By default, \"follow\" events are used.\n\nYou can find the complete modified source code [here](https://github.com/apache/predictionio/tree/develop/examples/scala-parallel-similarproduct/recommended-user).\n\n\n## Modification\n\n### Engine.scala\n\nIn Query, change `items` to `users` and remove categories. Change `ItemScore` case class to SimilarUserScore. In PredictedResult, change `Array[ItemScore]` to `Array[SimilarUserScore]`.\n\n```scala\ncase class Query(\n  users: List[String],\n  num: Int,\n  whiteList: Option[Set[String]],\n  blackList: Option[Set[String]]\n)\n\ncase class PredictedResult(\n  similarUserScores: Array[SimilarUserScore]\n){\n  override def toString: String = similarUserScores.mkString(\",\")\n}\n\ncase class SimilarUserScore(\n  user: String,\n  score: Double\n)\n```\n\n### DataSource.scala\n\nIn DataSource, change `ViewEvent` case class to FollowEvent. Remove `Item` case class.\n\nChange\n\n```scala\ncase class ViewEvent(user: String, item: String, t: Long)\n```\n\nto\n\n```scala\n// MODIFIED\ncase class FollowEvent(user: String, followedUser: String, t: Long)\n```\n\nModify TrainingData class to use followEvent\n\n```scala\nclass TrainingData(\n  val users: RDD[(String, User)],\n  val followEvents: RDD[FollowEvent] // MODIFIED\n) extends Serializable {\n  override def toString = {\n    s\"users: [${users.count()} (${users.take(2).toList}...)]\" +\n    // MODIFIED\n    s\"followEvents: [${followEvents.count()}] (${followEvents.take(2).toList}...)\"\n  }\n}\n```\n\nModify `readTraining()` function of `DataSource` to read \"follow\" events (commented with \"// MODIFIED\"). Remove the RDD of (entityID, Item):\n\n```scala\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    // create a RDD of (entityID, User)\n    val usersRDD: RDD[(String, User)] = ...\n\n    // MODIFIED\n    // get all \"user\" \"follow\" \"followedUser\" events\n    val followEventsRDD: RDD[FollowEvent] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"follow\")),\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"user\")))(sc)\n      // eventsDb.find() returns RDD[Event]\n      .map { event =>\n        val followEvent = try {\n          event.event match {\n            case \"follow\" => FollowEvent(\n              user = event.entityId,\n              followedUser = event.targetEntityId.get,\n              t = event.eventTime.getMillis)\n            case _ => throw new Exception(s\"Unexpected event $event is read.\")\n          }\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Cannot convert $event to FollowEvent.\" +\n              s\" Exception: $e.\")\n            throw e\n          }\n        }\n        followEvent\n      }.cache()\n\n    new TrainingData(\n      users = usersRDD,\n      followEvents = followEventsRDD // MODIFIED\n    )\n  }\n```\n\n### Preparator.scala\n\nModify Preparator to pass followEvents to algorithm as PreparedData.\n\nModify Preparator's `parpare()` method:\n\n```scala\n\n  ...\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(\n      users = trainingData.users,\n      followEvents = trainingData.followEvents) // MODIFIED\n  }\n```\n\nModify `PreparedData` class:\n\n```scala\nclass PreparedData(\n  val users: RDD[(String, User)],\n  val followEvents: RDD[FollowEvent] // MODIFIED\n) extends Serializable\n\n```\n\n### ALSAlgorithm.scala\n\nModify ALSModel class to use similar user. Modify `train()` method to train with follow event. Modify `predict()` method to predict similar users.\n\n### Test the Result\n\nThen we can build/train/deploy the engine and test the result:\n\nThe query\n\n```bash\n$ curl -H \"Content-Type: application/json\" \\\n-d '{ \"users\": [\"u1\"], \"num\": 4 }' \\\nhttp://localhost:8000/queries.json\n```\n\nwill return the result\n\n```json\n{\n  \"similarUserScores\":[\n    {\"user\":\"u3\",\"score\":0.7574200014043541},\n    {\"user\":\"u10\",\"score\":0.6484507108863744},\n    {\"user\":\"u43\",\"score\":0.64741489488357},\n    {\"user\":\"u29\",\"score\":0.5767264820728124}\n  ]\n}\n```\n\nThat's it! Now your engine can recommend users.\n"
  },
  {
    "path": "docs/manual/source/templates/similarproduct/return-item-properties.html.md.erb",
    "content": "---\ntitle: Returns Item Properties (Similar Product)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nYou can modify the [default DataSource](dase.html#data) to read your custom properties or different Entity Type.\n\nThis explains how to add user defined properties to items returned by your engine. We add properties \"title\", \"date\" and \"imdbUrl\" for entity type \"item\".\n\nYou can find the complete modified source code [here](https://github.com/apache/predictionio/tree/develop/examples/scala-parallel-similarproduct/return-item-properties).\n\n>> Note: you also need import events with these properties accordingly.\n\n## Modification\n\n### DataSource.scala\n\n- modify the `Item` parameters\n- modify how to create the `Item` object using the entity properties\n\n```scala\n\n// MODIFIED\ncase class Item(\n     title: String,\n     date: String,\n     imdbUrl: String,\n     categories: Option[List[String]])\n\n...\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n    ...\n    // create a RDD of (entityID, Item)\n    val itemsRDD: RDD[(String, Item)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"item\"\n    )(sc).map { case (entityId, properties) =>\n      val item = try {\n        // Assume categories is optional property of item.\n        // MODIFIED\n        Item(\n          title = properties.get[String](\"title\"),\n          date = properties.get[String](\"date\"),\n          imdbUrl = properties.get[String](\"imdbUrl\"),\n          categories = properties.getOpt[List[String]](\"categories\"))\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties ${properties} of\" +\n            s\" item ${entityId}. Exception: ${e}.\")\n          throw e\n        }\n      }\n      (entityId, item)\n    }.cache()\n\n    ...\n  }\n```\n\n### Engine.scala\n\nModify the `ItemScore` parameters too.\n\n```scala\n// MODIFIED\ncase class ItemScore(\n  item: String,\n  title: String,\n  date: String,\n  imdbUrl: String,\n  score: Double\n) extends Serializable\n```\n\n### ALSAlgorithm.scala\n\nModify how to create the ItemScore object using the properties.\n\n```scala\n\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n    ...\n\n    val itemScores = topScores.map { case (i, s) =>\n      // MODIFIED\n      val it = model.items(i)\n      ItemScore(\n        item = model.itemIntStringMap(i),\n        title = it.title,\n        date = it.date,\n        imdbUrl = it.imdbUrl,\n        score = s\n      )\n    }\n\n    ...\n  }\n\n```\n\nUsing `model.items(i)` you can receive corresponding object of the `Item` class, and now you can access its properties which you created during previous step.\n\n### Test the Result\n\nThen we can build/train/deploy the engine and test the result:\n\nThe query\n\n```bash\n$ curl -H \"Content-Type: application/json\" \\\n-d '{ \"items\": [\"i1\"], \"num\": 4 }' \\\nhttp://localhost:8000/queries.json\n```\n\nwill return the result\n\n```json\n{\n  \"itemScores\":[\n    {\"item\":\"i3\",\"title\":\"title for movie i3\",\"date\":\"1947\",\"imdbUrl\":\"http://imdb.com/fake-url/i3\",\"score\":0.5865418718902017},\n    {\"item\":\"i44\",\"title\":\"title for movie i44\",\"date\":\"1941\",\"imdbUrl\":\"http://imdb.com/fake-url/i44\",\"score\":0.5740199916714374},\n    {\"item\":\"i37\",\"title\":\"title for movie i37\",\"date\":\"1940\",\"imdbUrl\":\"http://imdb.com/fake-url/i37\",\"score\":0.5576820095310056},\n    {\"item\":\"i6\",\"title\":\"title for movie i6\",\"date\":\"1947\",\"imdbUrl\":\"http://imdb.com/fake-url/i6\",\"score\":0.45856345689769473}\n  ]\n}\n```\n\nThat's it! Your engine can return more information.\n"
  },
  {
    "path": "docs/manual/source/templates/similarproduct/rid-user-set-event.html.md.erb",
    "content": "---\ntitle: Get Rid of Events for Users (Similar Product)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nIn some cases, if you don't need to keep track the user ID being created/deleted or user properties changes with events, then you can simplify the template as described in this example to get rid of '$set' events for users.\n\nYou can find the complete modified source code [here](https://github.com/apache/predictionio/tree/develop/examples/scala-parallel-similarproduct/rid-user-set-event).\n\n\n## Modification\n\n### DataSource.scala\n\nModify TrainingData class to remove the RDD of users.\n\n```scala\nclass TrainingData(\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent]\n) extends Serializable {\n  override def toString = {\n    s\"items: [${items.count()} (${items.take(2).toList}...)]\" +\n    s\"viewEvents: [${viewEvents.count()}] (${viewEvents.take(2).toList}...)\"\n  }\n}\n```\n\nModify `readTraining()` function of `DataSource` to remove the RDD of (entityID, User).\n\n```scala\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    // REMOVED\n    // create a RDD of (entityID, User)\n    val usersRDD: RDD[(String, User)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"user\"\n    )(sc).map { case (entityId, properties) =>\n      val user = try {\n        User()\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties ${properties} of\" +\n            s\" user ${entityId}. Exception: ${e}.\")\n          throw e\n        }\n      }\n      (entityId, user)\n    }.cache()\n\n    ...\n\n    new TrainingData(\n      items = itemsRDD,\n      viewEvents = viewEventsRDD\n    )\n  }\n```\n\n### Preparator.scala\n\nModify Preparator to remove the RDD of users.\n\nModify Preparator's `parpare()` method:\n\n```scala\n\n  ...\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(\n      items = trainingData.items,\n      viewEvents = trainingData.viewEvents)\n  }\n```\n\nModify `PreparedData` class:\n\n```scala\nclass PreparedData(\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent]\n) extends Serializable\n```\n\n### ALSAlgorithm.scala\n\nModify `train()` method:\n\n- remove the check of users in PreparedData\n- modify user index BiMap to extract the user ID from the `viewEvents`\n\n```scala\n\n  def train(sc: SparkContext, data: PreparedData): ECommModel = {\n    ...\n    // REMOVED\n    require(!data.users.take(1).isEmpty,\n      s\"users in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    ...\n    // create User and item's String ID to integer index BiMap\n    val userStringIntMap = BiMap.stringInt(data.viewEvents.map(_.user)) // MODIFIED\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    ...\n\n  }\n\n```\n\nYou are ready to run pio build, train and deploy as described in the Quick Start. Simply send the same queries as described in the Quick Start. The result will be the same.\n\nThat's it! Now your engine can get rid of '$set' events for users.\n"
  },
  {
    "path": "docs/manual/source/templates/similarproduct/train-with-rate-event.html.md.erb",
    "content": "---\ntitle: Train with Rate Event (Similar Product)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nBy default, the similar product template uses implicit preference, such as \"view\" event.\n\nTo handle explicit preference, such as \"rating\" given to item by users, you can customize the template. Higher \"rating\" means a stronger indication that the user likes the item.\n\nThis examples demonstrates how to modify similar product template to use \"rate\" event as Training Data. You can find the complete modified source code [here](https://github.com/apache/predictionio/tree/develop/examples/scala-parallel-similarproduct/train-with-rate-event).\n\n\n## Modification\n\n### DataSource.scala\n\nIn DataSource, change `ViewEvent` case class to RateEvent. Add `rating: Double` is added to the RateEvent.\n\nChange\n\n```scala\ncase class ViewEvent(user: String, item: String, t: Long)\n```\n\nto\n\n```scala\n// MODIFIED\ncase class RateEvent(user: String, item: String, rating: Double, t: Long)\n```\n\nModify TrainingData class to use rateEvent\n\n```scala\nclass TrainingData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val rateEvents: RDD[RateEvent] // MODIFIED\n) extends Serializable {\n  override def toString = {\n    s\"users: [${users.count()} (${users.take(2).toList}...)]\" +\n    s\"items: [${items.count()} (${items.take(2).toList}...)]\" +\n    // MODIFIED\n    s\"rateEvents: [${rateEvents.count()}] (${rateEvents.take(2).toList}...)\"\n  }\n}\n```\n\nModify `readTraining()` function of `DataSource` to read \"rate\" events (commented with \"// MODIFIED\"). Replace all `ViewEvent` with `RateEvent`. Replace all `viewEventsRDD` with `rateEventsRDD`. Retrieve the rating value from the event properties:\n\n```scala\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n    ...\n\n    // get all \"user\" \"rate\" \"item\" events\n    val rateEventsRDD: RDD[RateEvent] = PEventStore.find( // MODIFIED\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"rate\")), // MODIFIED\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n      // eventsDb.find() returns RDD[Event]\n      .map { event =>\n        val rateEvent = try { // MODIFIED\n          event.event match {\n            case \"rate\" => RateEvent( // MODIFIED\n              user = event.entityId,\n              item = event.targetEntityId.get,\n              rating = event.properties.get[Double](\"rating\"), // ADDED\n              t = event.eventTime.getMillis)\n            case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n          }\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Cannot convert ${event} to RateEvent.\" + // MODIFIED\n              s\" Exception: ${e}.\")\n            throw e\n          }\n        }\n        rateEvent // MODIFIED\n      }.cache()\n\n    new TrainingData(\n      users = usersRDD,\n      items = itemsRDD,\n      rateEvents = rateEventsRDD // MODIFIED\n    )\n  }\n```\n\n### Preparator.scala\n\nModify Preparator to pass rateEvents to algorithm as PreparedData (Replace all `ViewEvent` with `RateEvent`. Replace all `viewEvents` with `rateEvents`)\n\nModify Preparator's `parpare()` method:\n\n```scala\n\n  ...\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(\n      users = trainingData.users,\n      items = trainingData.items,\n      rateEvents = trainingData.rateEvents) // MODIFIED\n  }\n```\n\nModify `PreparedData` class:\n\n```scala\nclass PreparedData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val rateEvents: RDD[RateEvent] // MODIFIED\n) extends Serializable\n\n```\n\n### ALSAlgorithm.scala\n\nModify `train()` method to train with rate event.\n\n```scala\n\n  def train(sc: SparkContext, data: PreparedData): ECommModel = {\n    require(!data.rateEvents.take(1).isEmpty, // MODIFIED\n      s\"rateEvents in PreparedData cannot be empty.\" + // MODIFIED\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n\n    ...\n\n    val mllibRatings = data.rateEvents // MODIFIED\n      .map { r =>\n        ...\n\n        ((uindex, iindex), (r.rating,r.t)) //MODIFIED\n      }.filter { case ((u, i), v) =>\n        // keep events with valid user and item index\n        (u != -1) && (i != -1)\n      }\n      .reduceByKey { case (v1, v2) => // MODIFIED\n        // if a user may rate same item with different value at different times,\n        // use the latest value for this case.\n        // Can remove this reduceByKey() if no need to support this case.\n        val (rating1, t1) = v1\n        val (rating2, t2) = v2\n        // keep the latest value\n        if (t1 > t2) v1 else v2\n      }\n      .map { case ((u, i), (rating, t)) => // MODIFIED\n        // MLlibRating requires integer index for user and item\n        MLlibRating(u, i, rating) // MODIFIED\n      }\n      .cache()\n\n    ...\n\n  }\n\n```\n\nModify `train()` method to use `ALS.trainImplicit()`:\n\nChange the following from:\n\n```scala\n    ...\n\n    val m = ALS.trainImplicit(\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      alpha = 1.0,\n      seed = seed)\n    ...\n\n```\n\nto:\n\n```scala\n    ...\n\n    val m = ALS.train( // MODIFIED\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      seed = seed)\n    ...\n\n```\n\nThat's it! Now your engine can train model with rate events.\n"
  },
  {
    "path": "docs/manual/source/templates/vanilla/dase.html.md.erb",
    "content": "---\ntitle: DASE Components Explained (Vanilla)\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n<%= partial 'shared/dase/dase', locals: { template_name: 'Vanilla Engine Template' } %>\n\nBefore you use Vanilla template to develop your engine, it's recommended that you go through the DASE explanation of one of the other templates (e.g. *Recommemdation template*, *Classification template*) to see a concrete example of how the DASE components are used.\n\n## Algorithm\n\nPredictionIO supports two types of algorithms:\n\n- **P2LAlgorithm**: trains a Model which does not contain RDD\n- **PAlgorithm**: trains a Model which contains RDD\n\n### P2LAlgorithm\n\nBy default, the Algorithm of the Vanilla template trains a simple model which does not contain RDD, as you can see in Algorithm.scala:\n\n```scala\n\nclass Model(val mc: Int) extends Serializable {\n  override def toString = s\"mc=${mc}\"\n}\n\n```\n\nIn this case, the `Algorithm` class extends `P2LAlgorithm`, as you can see in Algorithm.scala:\n\n```scala\n\nclass Algorithm(val ap: AlgorithmParams)\n  // extends PAlgorithm if Model contains RDD[]\n  extends P2LAlgorithm[PreparedData, Model, Query, PredictedResult] {\n\n  ...\n\n  def train(sc: SparkContext, data: PreparedData): Model = {\n    // Simply count number of events\n    // and multiple it by the algorithm parameter\n    // and store the number as model\n    val count = data.events.count().toInt * ap.mult\n    new Model(mc = count)\n  }\n\n  ...\n\n}\n\n```\n\nFor `P2LAlgorithm, the Model is automatically serialized and persisted by PredictionIO after training.\n\nNOTE: You may also refer to Classification engine template for another example of P2LAlgorithm.\n\n### PAlgorithm\n\n`PAlgorithm` should be used when your Model contains RDD. The model produced by `PAlgorithm` is not persisted by default. To persist the model, you need to do the following:\n\n- The Model class should extend the `IPersistentModel` trait and implement the `save()` method for saving the model. The trait `IPersistentModel` requires a type parameter which is the class type of algorithm parameter.\n- Implement a Model factory object which extends the `IPersistentModelLoader` trait and implement the `apply()` for loading the model. The trait `IPersistentModelLoader` requires two type parameters which are the types of algorithm parameter and the model produced by the algorithm.\n\nFor example, let's say we add a new field `mRdd` which is type of `RDD[Int]` to the Vanilla template's `Model` class. The `Model` class is modified as following:\n\n```scala\n\nclass Model(\n  val mc: Int,\n  val mRdd: RDD[Int] // ADDED\n  ) extends IPersistentModel[AlgorithmParams] with Serializable { // ADDED\n\n    // ADDED\n    def save(id: String, params: AlgorithmParams,\n      sc: SparkContext): Boolean = {\n\n      sc.parallelize(Seq(mc)).saveAsObjectFile(s\"/tmp/${id}/mc\")\n      mRdd.saveAsObjectFile(s\"/tmp/${id}/mRdd\")\n      true\n    }\n\n    override  def toString = {\n      s\"mc=${mc}\" +\n      // ADDED for debugging\n      s\"mRdd=[${mRdd.count()}] (${mRdd.take(2).toList}...)\"\n    }\n}\n\n```\n\nNotice that it extends `IPersistentModel[AlgorithmParams]` and implement the `save()` method.\n\nNext, we need to implement a Model factory object to load back the persisted model and return the Model instance:\n\n```scala\n\n// ADDED\nobject Model\n  extends IPersistentModelLoader[AlgorithmParams, Model] {\n  def apply(id: String, params: AlgorithmParams,\n    sc: Option[SparkContext]) = {\n    new Model(\n      mc = sc.get.objectFile[Int](s\"/tmp/${id}/mc\").first,\n      mRdd = sc.get.objectFile(s\"/tmp/${id}/mRdd\")\n    )\n  }\n}\n\n```\n\nAt last, the `Algorithm` class needs to extend `PAlgorithm` and generate the RDD data for the new `mRdd` field in `Model`:\n\n```scala\n\nclass Algorithm(val ap: AlgorithmParams)\n  extends PAlgorithm[PreparedData, Model, Query, PredictedResult] { // MODIFIED\n\n  ...\n\n  def train(sc: SparkContext, data: PreparedData): Model = {\n    // Simply count number of events\n    // and multiple it by the algorithm parameter\n    // and store the number as model\n    val count = data.events.count().toInt * ap.mult\n\n    // ADDED\n    // get the spark context\n    val sc = data.events.context\n    // create dummy RDD[Int] for demonstration purpose\n    val mRdd = sc.parallelize(Seq(1,2,3))\n\n    new Model(\n      mc = count,\n      mRdd = mRdd // ADDED\n    )\n  }\n\n  ...\n\n}\n\n```\n\nNOTE: You may also refer to Similar Product engine template for another example of PAlgorithm.\n"
  },
  {
    "path": "docs/manual/source/templates/vanilla/quickstart.html.md.erb",
    "content": "---\ntitle: Quick Start - Vanilla Engine Template\n---\n\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Overview\n\nThe purpose of the Vanilla template is for developing new engine when you find other engine templates do not fit your needs. This template provides a skeleton to kick start new engine development.\n\nBy default, it simply reads the \"EVENT\" involving \"ENTITY_TYPE\" and \"TARGET_ENTITY_TYPE\". The Algorithm counts the number of events and multiple it by the algorithm parameter and store it as Model. During serving, the Query is prefixed with the Model and return as PredictedResult.\n\n\n## Usage\n\n### Event Data Requirements\n\nNo special event requirement\n\n### Input Query\n\n- a string\n\n### Output PredictedResult\n\n- a string\n\n## 1. Install and Run PredictionIO\n\n<%= partial 'shared/quickstart/install' %>\n\n## 2. Create a new Engine from an Engine Template\n\n<%= partial 'shared/quickstart/create_engine', locals: { engine_name: 'MyNewEngine', template_name: 'Vanilla Engine Template', template_repo: 'apache/predictionio-template-skeleton' } %>\n\n## 3. Generate an App ID and Access Key\n\n<%= partial 'shared/quickstart/create_app' %>\n\n## 4a. Collecting Data\n\nAs long as the appId is valid, this Vanilla Engine template does not require event data in your app in order to work.\n\n## 4b. Import Sample Data\n\nAs long as the appId is valid, this Vanilla Engine template does not require event data in your app in order to work.\n\n## 5. Deploy the Engine as a Service\n\n<%= partial 'shared/quickstart/deploy_enginejson', locals: { engine_name: 'MyNewEngine' } %>\n\n<%= partial 'shared/quickstart/deploy', locals: { engine_name: 'MyNewEngine' } %>\n\n## 6. Use the Engine\n\nNow, You can try to retrieve predicted results. The engine accepts JSON query with the field 'q': `{ \"q\" : \"foo\" }`. A prefix is added to the query data and return as PredictedResult.\n\nWith the deployed engine running, open another terminal and run the following `curl` command or use SDK to send the query:\n\n<div class=\"tabs\">\n  <div data-tab=\"REST API\" data-lang=\"json\">\n```\n$ curl -H \"Content-Type: application/json\" \\\n-d '{ \"q\": \"foo\" }' http://localhost:8000/queries.json\n\n```\n  </div>\n  <div data-tab=\"Python SDK\" data-lang=\"python\">\n```python\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint engine_client.send_query({ \"q\": \"foo\" })\n```\n  </div>\n  <div data-tab=\"PHP SDK\" data-lang=\"php\">\n```php\n<?php\nrequire_once(\"vendor/autoload.php\");\nuse predictionio\\EngineClient;\n\n$client = new EngineClient('http://localhost:8000');\n\n$response = $client->sendQuery(array('q'=> \"foo\"));\nprint_r($response);\n\n?>\n```\n  </div>\n  <div data-tab=\"Ruby SDK\" data-lang=\"ruby\">\n```ruby\n# Create client object.\nclient = PredictionIO::EngineClient.new(<ENGINE DEPLOY URL>)\n\n# Query PredictionIO.\nresponse = client.send_query('q' => 'foo')\n\nputs response\n```\n  </div>\n  <div data-tab=\"Java SDK\" data-lang=\"java\">\n```java\nimport com.google.common.collect.ImmutableMap;\nimport com.google.gson.JsonObject;\n\nimport org.apache.predictionio.EngineClient;\n\n// create client object\nEngineClient engineClient = new EngineClient(<ENGINE DEPLOY URL>);\n\n// query\nJsonObject response = engineClient.sendQuery(ImmutableMap.<String, Object>of(\n        \"q\", \"foo\"\n    ));\n```\n  </div>\n</div>\n\nThe following is sample JSON response:\n\n```\n{\"p\":\"0-foo\"}\n```\n\n*MyNewEngine* is now running. You can start modifying it to build your new engine!\n\n<%= partial 'shared/quickstart/production' %>\n\n#### [Next: DASE Components Explained](/templates/vanilla/dase/)\n"
  },
  {
    "path": "docs/manual/source/tryit/index.html.slim",
    "content": "---\nlayout: tryit\ntitle: Try PredictionIO\n---\n\n.codepicnic\n  .tutorial\n\n    div class=\"jcarousel-wrapper\"\n      ul class=\"jcarousel-pagination\"\n      div class=\"jcarousel\"\n        ul\n          li\n            h2 Introduction\n            p To see how PredictionIO works, you can follow this quick tutorial to:\n            p Start the PredictionIO server\n            p Build, train, and deploy a predictive model with pre-imported data.\n            p Query prediction results.\n            p #{link_to 'Start', '#', class: 'button-primary', id: 'tryit-start'}\n\n          li\n            h2 Start\n            p Start PredictionIO and it's dependencies with:\n            p <code>$ pio-start-all</code>\n            h2 Check Status\n            p At any time you can run:\n            p <code>$ pio status</code>\n            p Which checks the status of PredictionIO and it's dependencies.\n\n          li\n            h2 Build\n            p All engine commands need to be run from within the engine directory:\n            p <code>$ cd ~/PredictionIO/MyEngine</code>\n\n            p Build the engine with:\n            p <code>$ pio build</code>\n          li\n            h2 Train and Deploy\n\n            h3 Train the engine\n            p #{link_to 'Sample data', 'http://grouplens.org/datasets/movielens/'} is already imported, you can train the model the data by running:\n            p <code>$ pio train</code>\n\n            h3 Deploy the trained engine\n            p Deploy the trained engine as a background process:\n            p <code>$ pio deploy &</code>\n\n          li\n            h2 Querying Results\n            p You can query results using cURL with after you see \"Ready to serve\":\n            p <code>$ curl -H \"Content-Type: application/json\" -d '{ \"items\": [\"296\"], \"num\": 5 }' http://localhost:8000/queries.json</code>\n            p This will return 5 recommended movies for movie 296 (Pulp Fiction).\n\n          li\n            h2 Next Steps\n            p #{link_to 'Install', 'http://predictionio.apache.org/install/'} PredictionIO on your own computer!\n            h3 Support\n            p Get support on our #{link_to 'Google Group', 'https://groups.google.com/forum/#!forum/predictionio-user'} or on Twitter #{link_to '@PredictionIO', 'https://twitter.com/predictionio/'}\n        .jcarousel-controls\n          a href=\"#\" class=\"jcarousel-control-prev\"\n            | &lsaquo; Prev\n          a href=\"#\" class=\"jcarousel-control-next\"\n            | Next &rsaquo;\n\n\n\n  .iframe\n    iframe src=\"https://codepicnic.com/bites/predictionio-0-9-1/embed?sidebar=closed&hostname=predictionio&hide=readme\"\n"
  },
  {
    "path": "docs/scaladoc/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nScala API Documentation\n=======================\n\n1.  Run this command at the project's root.\n    ```\n    $ sbt/sbt pioUnidoc\n    ```\n\n2.  Point your web browser at `target/scala-2.11/unidoc/index.html`.\n"
  },
  {
    "path": "docs/scaladoc/api-docs.css",
    "content": ".developer {\n  background-color: #44751E;\n}\n\n.experimental {\n  background-color: #257080;\n}\n\n.badge {\n  font-family: Arial, san-serif;\n  float: right;\n}\n"
  },
  {
    "path": "docs/scaladoc/api-docs.js",
    "content": "$(document).ready(function() {\n  var annotations = $(\"dt:contains('Annotations')\").next(\"dd\").children(\"span.name\");\n  addBadges(annotations, \"DeveloperApi\", \":: DeveloperApi ::\", '<span class=\"developer badge\">Core Developer API</span>');\n  addBadges(annotations, \"Experimental\", \":: Experimental ::\", '<span class=\"experimental badge\">Experimental</span>');\n});\n\nfunction addBadges(allAnnotations, name, tag, html) {\n  var annotations = allAnnotations.filter(\":contains('\" + name + \"')\")\n  var tags = $(\".cmt:contains(\" + tag + \")\")\n\n  // Remove identifier tags from comments\n  tags.each(function(index) {\n    var oldHTML = $(this).html();\n    var newHTML = oldHTML.replace(tag, \"\");\n    $(this).html(newHTML);\n  });\n\n  // Add badges to all containers\n  tags.prevAll(\"h4.signature\")\n    .add(annotations.closest(\"div.fullcommenttop\"))\n    .add(annotations.closest(\"div.fullcomment\").prevAll(\"h4.signature\"))\n    .prepend(html);\n}\n"
  },
  {
    "path": "docs/scaladoc/rootdoc.txt",
    "content": "This is the API documentation of Apache PredictionIO.\n\n== Package Structure ==\n\n - [[org.apache.predictionio.controller]] - The common starting point. Building blocks of a prediction engine.\n - [[org.apache.predictionio.data.store]] - Event Store API.\n\n== Experimental Features ==\n\nClasses and methods marked with <span class=\"experimental badge\" style=\"float:\nnone;\">Experimental</span> are user-facing features which have not been\nofficially adopted by the PredictionIO project. These are subject to change or\nremoval in minor releases.\n\n== Developer API ==\n\nClasses and methods marked with <span class=\"developer badge\" style=\"float:\nnone;\">Core Developer API</span> are intended for advanced users who want to\nextend PredictionIO through lower level interfaces. These are subject to changes\nor removal in minor releases.\n"
  },
  {
    "path": "e2/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport PIOBuild._\n\nname := \"apache-predictionio-e2\"\n\nparallelExecution in Test := false\n\nlibraryDependencies ++= Seq(\n  \"org.apache.spark\" %% \"spark-mllib\" % sparkVersion.value % \"provided\",\n  \"org.scalatest\"    %% \"scalatest\" % \"2.2.5\" % \"test\")\n\npomExtra := childrenPomExtra.value\n"
  },
  {
    "path": "e2/src/main/scala/org/apache/predictionio/e2/engine/BinaryVectorizer.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.e2.engine\n\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.mllib.linalg.Vectors\nimport org.apache.spark.mllib.linalg.Vector\nimport scala.collection.immutable.HashMap\nimport scala.collection.immutable.HashSet\n\nclass BinaryVectorizer(propertyMap : HashMap[(String, String), Int])\nextends Serializable {\n\n  val properties: Array[(String, String)] = propertyMap.toArray.sortBy(_._2).map(_._1)\n  val numFeatures = propertyMap.size\n\n  override def toString: String = {\n    s\"BinaryVectorizer($numFeatures): \" + properties.map(e => s\"(${e._1}, ${e._2})\").mkString(\",\")\n  }\n\n  def toBinary(map :  Array[(String, String)]) : Vector = {\n    val mapArr : Seq[(Int, Double)] = map.flatMap(\n      e => propertyMap.get(e).map(idx => (idx, 1.0))\n    )\n\n    Vectors.sparse(numFeatures, mapArr)\n  }\n}\n\n\nobject BinaryVectorizer {\n  def apply (input : RDD[HashMap[String, String]], properties : HashSet[String])\n  : BinaryVectorizer = {\n    new BinaryVectorizer(HashMap(\n      input.flatMap(identity)\n        .filter(e => properties.contains(e._1))\n        .distinct\n        .collect\n        .zipWithIndex : _*\n    ))\n  }\n\n  def apply(input: Seq[(String, String)]): BinaryVectorizer = {\n    val indexed: Seq[((String, String), Int)] = input.zipWithIndex\n    new BinaryVectorizer(HashMap(indexed:_*))\n  }\n}\n\n"
  },
  {
    "path": "e2/src/main/scala/org/apache/predictionio/e2/engine/CategoricalNaiveBayes.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.e2.engine\n\nimport org.apache.spark.rdd.RDD\n\n/** Class for training a naive Bayes model with categorical variables */\nobject CategoricalNaiveBayes {\n\n  /** Train with data points and return the model\n    *\n    * @param points training data points\n    */\n  def train(points: RDD[LabeledPoint]): CategoricalNaiveBayesModel = {\n    val labelCountFeatureLikelihoods = points.map { p =>\n      (p.label, p.features)\n    }.combineByKey[(Long, Array[Map[String, Long]])](\n        createCombiner =\n          (features: Array[String]) => {\n            val featureCounts = features.map { feature =>\n              Map[String, Long]().withDefaultValue(0L).updated(feature, 1L)\n            }\n\n            (1L, featureCounts)\n          },\n        mergeValue =\n          (c: (Long, Array[Map[String, Long]]), features: Array[String]) => {\n            (c._1 + 1L, c._2.zip(features).map { case (m, feature) =>\n              m.updated(feature, m(feature) + 1L)\n            })\n          },\n        mergeCombiners =\n          (\n            c1: (Long, Array[Map[String, Long]]),\n            c2: (Long, Array[Map[String, Long]])) => {\n            val labelCount1 = c1._1\n            val labelCount2 = c2._1\n            val featureCounts1 = c1._2\n            val featureCounts2 = c2._2\n\n            (labelCount1 + labelCount2,\n              featureCounts1.zip(featureCounts2).map { case (m1, m2) =>\n                m2 ++ m2.map { case (k, v) => k -> (v + m2(k))}\n              })\n          }\n      ).mapValues { case (labelCount, featureCounts) =>\n      val featureLikelihoods = featureCounts.map { featureCount =>\n        // mapValues does not return a serializable map\n        featureCount.mapValues(count => math.log(count.toDouble / labelCount))\n          .map(identity)\n      }\n\n      (labelCount, featureLikelihoods)\n    }.collect().toMap\n\n    val noOfPoints = labelCountFeatureLikelihoods.map(_._2._1).sum\n    val priors =\n      labelCountFeatureLikelihoods.mapValues { countFeatureLikelihoods =>\n        math.log(countFeatureLikelihoods._1 / noOfPoints.toDouble)\n      }\n    val likelihoods = labelCountFeatureLikelihoods.mapValues(_._2)\n\n    CategoricalNaiveBayesModel(priors, likelihoods)\n  }\n}\n\n/** Model for naive Bayes classifiers with categorical variables.\n  *\n  * @param priors log prior probabilities\n  * @param likelihoods log likelihood probabilities\n  */\ncase class CategoricalNaiveBayesModel(\n  priors: Map[String, Double],\n  likelihoods: Map[String, Array[Map[String, Double]]]) extends Serializable {\n\n  val featureCount = likelihoods.head._2.size\n\n  /** Calculate the log score of having the given features and label\n    *\n    * @param point label and features\n    * @param defaultLikelihood a function that calculates the likelihood when a\n    *                          feature value is not present. The input to the\n    *                          function is the other feature value likelihoods.\n    * @return log score when label is present. None otherwise.\n    */\n  def logScore(\n    point: LabeledPoint,\n    defaultLikelihood: (Seq[Double]) => Double = ls => Double.NegativeInfinity\n    ): Option[Double] = {\n    val label = point.label\n    val features = point.features\n\n    if (!priors.contains(label)) {\n      None\n    } else {\n      Some(logScoreInternal(label, features, defaultLikelihood))\n    }\n  }\n\n  private def logScoreInternal(\n    label: String,\n    features: Array[String],\n    defaultLikelihood: (Seq[Double]) => Double = ls => Double.NegativeInfinity\n    ): Double = {\n\n    val prior = priors(label)\n    val likelihood = likelihoods(label)\n\n    val likelihoodScores = features.zip(likelihood).map {\n      case (feature, featureLikelihoods) =>\n        featureLikelihoods.getOrElse(\n          feature,\n          defaultLikelihood(featureLikelihoods.values.toSeq)\n        )\n    }\n\n    prior + likelihoodScores.sum\n  }\n\n  /** Return the label that yields the highest score\n    *\n    * @param features features for classification\n    *\n    */\n  def predict(features: Array[String]): String = {\n    priors.keySet.map { label =>\n      (label, logScoreInternal(label, features))\n    }.toSeq\n      .sortBy(_._2)(Ordering.Double.reverse)\n      .take(1)\n      .head\n      ._1\n  }\n}\n\n/** Class that represents the features and labels of a data point.\n  *\n  * @param label Label of this data point\n  * @param features Features of this data point\n  */\ncase class LabeledPoint(label: String, features: Array[String]) {\n  override def toString: String = {\n    val featuresString = features.mkString(\"[\", \",\", \"]\")\n\n    s\"($label, $featuresString)\"\n  }\n\n  override def equals(other: Any): Boolean = other match {\n    case that: LabeledPoint => that.toString == this.toString\n    case _ => false\n  }\n\n  override def hashCode(): Int = {\n    this.toString.hashCode\n  }\n\n}\n"
  },
  {
    "path": "e2/src/main/scala/org/apache/predictionio/e2/engine/MarkovChain.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.e2.engine\n\nimport org.apache.spark.mllib.linalg.distributed.CoordinateMatrix\nimport org.apache.spark.mllib.linalg.{SparseVector, Vectors}\nimport org.apache.spark.rdd.RDD\n\n/** Class for training a Markov Chain model */\nobject MarkovChain {\n\n  /** Train a Markov Chain model\n    *\n    * @param matrix Tally of all state transitions\n    * @param topN Use the top-N tally for each state\n    */\n  def train(matrix: CoordinateMatrix, topN: Int): MarkovChainModel = {\n    val noOfStates = matrix.numCols().toInt\n    val transitionVectors = matrix.entries\n      .keyBy(_.i.toInt)\n      .groupByKey()\n      .mapValues { rowEntries =>\n      val total = rowEntries.map(_.value).sum\n      val sortedTopN = rowEntries.toSeq\n        .sortBy(_.value)(Ordering.Double.reverse)\n        .take(topN)\n        .map(me => (me.j.toInt, me.value / total))\n        .sortBy(_._1)\n\n      new SparseVector(\n        noOfStates,\n        sortedTopN.map(_._1).toArray,\n        sortedTopN.map(_._2).toArray)\n    }\n\n    new MarkovChainModel(\n      transitionVectors,\n      topN)\n  }\n}\n\n/** Markov Chain model\n  *\n  * @param transitionVectors transition vectors\n  * @param n top N used to construct the model\n  */\ncase class MarkovChainModel(\n  transitionVectors: RDD[(Int, SparseVector)],\n  n: Int) {\n\n  /** Calculate the probabilities of the next state\n    *\n    * @param currentState probabilities of the current state\n    */\n  def predict(currentState: Seq[Double]): Seq[Double] = {\n    // multiply the input with transition matrix row by row\n    val nextStateVectors = transitionVectors.map { case (rowIndex, vector) =>\n        val values = vector.indices.map { index =>\n          vector(index) * currentState(rowIndex)\n        }\n\n        Vectors.sparse(currentState.size, vector.indices, values)\n    }.collect()\n\n    // sum up to get the total probabilities\n    (0 until currentState.size).map { index =>\n      nextStateVectors.map { vector =>\n        vector(index)\n      }.sum\n    }\n  }\n}\n"
  },
  {
    "path": "e2/src/main/scala/org/apache/predictionio/e2/engine/PythonEngine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.e2.engine\n\nimport java.util.Arrays\n\nimport org.apache.predictionio.controller._\nimport org.apache.predictionio.workflow.KryoInstantiator\nimport org.apache.spark.SparkContext\nimport org.apache.spark.ml.PipelineModel\nimport org.apache.spark.sql.catalyst.expressions.Literal\nimport org.apache.spark.sql.types.{StructField, StructType}\nimport org.apache.spark.sql.{Row, SparkSession}\n\n\nobject PythonEngine extends EngineFactory {\n\n  private[engine] type Query = Map[String, Any]\n\n  def apply(): Engine[EmptyTrainingData, EmptyEvaluationInfo, EmptyPreparedData,\n    Query, Row, EmptyActualResult] = {\n    new Engine(\n      classOf[PythonDataSource],\n      classOf[PythonPreparator],\n      Map(\"default\" -> classOf[PythonAlgorithm]),\n      classOf[PythonServing])\n  }\n\n  def models(model: PipelineModel): Array[Byte] = {\n    val kryo = KryoInstantiator.newKryoInjection\n    kryo(Seq(model))\n  }\n\n}\n\nimport PythonEngine.Query\n\nclass PythonDataSource extends\n  PDataSource[EmptyTrainingData, EmptyEvaluationInfo, Query, EmptyActualResult] {\n  def readTraining(sc: SparkContext): EmptyTrainingData = new SerializableClass()\n}\n\nclass PythonPreparator extends PPreparator[EmptyTrainingData, EmptyPreparedData] {\n  def prepare(sc: SparkContext, trainingData: EmptyTrainingData): EmptyPreparedData =\n    new SerializableClass()\n}\n\nobject PythonServing {\n  private[engine] val columns = \"PythonPredictColumns\"\n\n  case class Params(columns: Seq[String]) extends org.apache.predictionio.controller.Params\n}\n\nclass PythonServing(params: PythonServing.Params) extends LFirstServing[Query, Row] {\n  override def supplement(q: Query): Query = {\n    q + (PythonServing.columns -> params.columns)\n  }\n}\n\nclass PythonAlgorithm extends\n  P2LAlgorithm[EmptyPreparedData, PipelineModel, Query, Row] {\n\n  def train(sc: SparkContext, data: EmptyPreparedData): PipelineModel = ???\n\n  def predict(model: PipelineModel, query: Query): Row = {\n    val selectCols = query(PythonServing.columns).asInstanceOf[Seq[String]]\n    val (colNames, data) = (query - PythonServing.columns).toList.unzip\n\n    val rows = Arrays.asList(Row.fromSeq(data))\n    val schema = StructType(colNames.zipWithIndex.map { case (col, i) =>\n      StructField(col, Literal(data(i)).dataType)\n    })\n\n    val spark = SparkSession.builder.getOrCreate()\n    val df = spark.createDataFrame(rows, schema)\n    model.transform(df)\n      .select(selectCols.head, selectCols.tail: _*)\n      .first()\n  }\n\n}\n"
  },
  {
    "path": "e2/src/main/scala/org/apache/predictionio/e2/evaluation/CrossValidation.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.e2.evaluation\n\nimport scala.reflect.ClassTag\nimport org.apache.spark.rdd.RDD\n\n/** Common helper functions */\nobject CommonHelperFunctions {\n\n  /** Split a data set into evalK folds for crossvalidation.\n    * Apply to data sets supplied to evaluation.\n    *\n    * @tparam D Data point class.\n    * @tparam TD Training data class.\n    * @tparam EI Evaluation Info class.\n    * @tparam Q Input query class.\n    * @tparam A Actual value class.\n    */\n\n  def splitData[D: ClassTag, TD, EI, Q, A](\n\n     evalK: Int,\n     dataset: RDD[D],\n     evaluatorInfo: EI,\n     trainingDataCreator: RDD[D] => TD,\n     queryCreator: D => Q,\n     actualCreator: D => A): Seq[(TD, EI, RDD[(Q, A)])] = {\n\n    val indexedPoints = dataset.zipWithIndex\n\n    def selectPoint(foldIdx: Int, pt: D, idx: Long, k: Int, isTraining: Boolean): Option[D] = {\n      if ((idx % k == foldIdx) ^ isTraining) Some(pt)\n      else None\n    }\n\n    (0 until evalK).map { foldIdx =>\n      val trainingPoints = indexedPoints.flatMap { case(pt, idx) =>\n        selectPoint(foldIdx, pt, idx, evalK, true)\n      }\n      val testingPoints = indexedPoints.flatMap { case(pt, idx) =>\n        selectPoint(foldIdx, pt, idx, evalK, false)\n      }\n\n      (\n        trainingDataCreator(trainingPoints),\n        evaluatorInfo,\n        testingPoints.map { d => (queryCreator(d), actualCreator(d)) }\n      )\n    }\n  }\n}\n"
  },
  {
    "path": "e2/src/main/scala/org/apache/predictionio/e2/package.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.e2\n\n/** Collection of engine libraries that have no dependency on PredictionIO */\npackage object engine {}\n\n/** Collection of evaluation libraries that have no dependency on PredictionIO */\npackage object evaluation {}\n"
  },
  {
    "path": "e2/src/main/scala/org/apache/predictionio/package.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio\n\n/** Independent library of code that is useful for engine development and\n  * evaluation\n  */\npackage object e2 {}\n"
  },
  {
    "path": "e2/src/test/scala/org/apache/predictionio/e2/engine/BinaryVectorizerTest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.e2.engine\n\nimport org.apache.predictionio.e2.fixture.BinaryVectorizerFixture\nimport org.apache.predictionio.e2.fixture.SharedSparkContext\nimport org.apache.spark.mllib.linalg.Vectors\nimport org.apache.spark.rdd.RDD\nimport org.scalatest.FlatSpec\nimport org.scalatest.Matchers\nimport scala.collection.immutable.HashMap\n\n\nimport scala.language.reflectiveCalls\n\nclass BinaryVectorizerTest extends FlatSpec with Matchers with SharedSparkContext\nwith BinaryVectorizerFixture{\n\n  \"toBinary\" should \"produce the following summed values:\" in {\n    val testCase = BinaryVectorizer(sc.parallelize(base.maps), base.properties)\n    val vectorTwoA = testCase.toBinary(testArrays.twoA)\n    val vectorTwoB = testCase.toBinary(testArrays.twoB)\n\n\n    // Make sure vectors produced are the same size.\n    vectorTwoA.size should be (vectorTwoB.size)\n\n    // // Test case for checking food value not listed in base.maps.\n    testCase.toBinary(testArrays.one).toArray.sum should be (1.0)\n\n    // Test cases for making sure indices are preserved.\n    val sumOne = vecSum(vectorTwoA, vectorTwoB)\n\n    exactly (1, sumOne) should be (2.0)\n    exactly (2,sumOne) should be (0.0)\n    exactly (2, sumOne) should be (1.0)\n\n    val sumTwo = vecSum(Vectors.dense(sumOne), testCase.toBinary(testArrays.twoC))\n\n    exactly (3, sumTwo) should be (1.0)\n  }\n\n}\n"
  },
  {
    "path": "e2/src/test/scala/org/apache/predictionio/e2/engine/CategoricalNaiveBayesTest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.e2.engine\n\nimport org.apache.predictionio.e2.fixture.{NaiveBayesFixture, SharedSparkContext}\nimport org.scalatest.{Matchers, FlatSpec}\n\nimport scala.language.reflectiveCalls\n\nclass CategoricalNaiveBayesTest extends FlatSpec with Matchers\nwith SharedSparkContext with NaiveBayesFixture {\n  val Tolerance = .0001\n  val labeledPoints = fruit.labeledPoints\n\n  \"Model\" should \"have log priors and log likelihoods\" in {\n    val labeledPointsRdd = sc.parallelize(labeledPoints)\n    val model = CategoricalNaiveBayes.train(labeledPointsRdd)\n\n    model.priors(fruit.Banana) should be(-.7885 +- Tolerance)\n    model.priors(fruit.Orange) should be(-1.7047 +- Tolerance)\n    model.priors(fruit.OtherFruit) should be(-1.0116 +- Tolerance)\n\n    model.likelihoods(fruit.Banana)(0)(fruit.Long) should\n      be(-.2231 +- Tolerance)\n    model.likelihoods(fruit.Banana)(0)(fruit.NotLong) should\n      be(-1.6094 +- Tolerance)\n    model.likelihoods(fruit.Banana)(1)(fruit.Sweet) should\n      be(-.2231 +- Tolerance)\n    model.likelihoods(fruit.Banana)(1)(fruit.NotSweet) should\n      be(-1.6094 +- Tolerance)\n    model.likelihoods(fruit.Banana)(2)(fruit.Yellow) should\n      be(-.2231 +- Tolerance)\n    model.likelihoods(fruit.Banana)(2)(fruit.NotYellow) should\n      be(-1.6094 +- Tolerance)\n\n    model.likelihoods(fruit.Orange)(0) should not contain key(fruit.Long)\n    model.likelihoods(fruit.Orange)(0)(fruit.NotLong) should be(0.0)\n    model.likelihoods(fruit.Orange)(1)(fruit.Sweet) should\n      be(-.6931 +- Tolerance)\n    model.likelihoods(fruit.Orange)(1)(fruit.NotSweet) should\n      be(-.6931 +- Tolerance)\n    model.likelihoods(fruit.Orange)(2)(fruit.NotYellow) should be(0.0)\n    model.likelihoods(fruit.Orange)(2) should not contain key(fruit.Yellow)\n\n    model.likelihoods(fruit.OtherFruit)(0)(fruit.Long) should\n      be(-.6931 +- Tolerance)\n    model.likelihoods(fruit.OtherFruit)(0)(fruit.NotLong) should\n      be(-.6931 +- Tolerance)\n    model.likelihoods(fruit.OtherFruit)(1)(fruit.Sweet) should\n      be(-.2877 +- Tolerance)\n    model.likelihoods(fruit.OtherFruit)(1)(fruit.NotSweet) should\n      be(-1.3863 +- Tolerance)\n    model.likelihoods(fruit.OtherFruit)(2)(fruit.Yellow) should\n      be(-1.3863 +- Tolerance)\n    model.likelihoods(fruit.OtherFruit)(2)(fruit.NotYellow) should\n      be(-.2877 +- Tolerance)\n  }\n\n  \"Model's log score\" should \"be the log score of the given point\" in {\n    val labeledPointsRdd = sc.parallelize(labeledPoints)\n    val model = CategoricalNaiveBayes.train(labeledPointsRdd)\n\n    val score = model.logScore(LabeledPoint(\n      fruit.Banana,\n      Array(fruit.Long, fruit.NotSweet, fruit.NotYellow))\n    )\n\n    score should not be None\n    score.get should be(-4.2304 +- Tolerance)\n  }\n\n  it should \"be negative infinity for a point with a non-existing feature\" in {\n    val labeledPointsRdd = sc.parallelize(labeledPoints)\n    val model = CategoricalNaiveBayes.train(labeledPointsRdd)\n\n    val score = model.logScore(LabeledPoint(\n      fruit.Banana,\n      Array(fruit.Long, fruit.NotSweet, \"Not Exist\"))\n    )\n\n    score should not be None\n    score.get should be(Double.NegativeInfinity)\n  }\n\n  it should \"be none for a point with a non-existing label\" in {\n    val labeledPointsRdd = sc.parallelize(labeledPoints)\n    val model = CategoricalNaiveBayes.train(labeledPointsRdd)\n\n    val score = model.logScore(LabeledPoint(\n      \"Not Exist\",\n      Array(fruit.Long, fruit.NotSweet, fruit.Yellow))\n    )\n\n    score should be(None)\n  }\n\n  it should \"use the provided default likelihood function\" in {\n    val labeledPointsRdd = sc.parallelize(labeledPoints)\n    val model = CategoricalNaiveBayes.train(labeledPointsRdd)\n\n    val score = model.logScore(\n      LabeledPoint(\n        fruit.Banana,\n        Array(fruit.Long, fruit.NotSweet, \"Not Exist\")\n      ),\n      ls => ls.min - math.log(2)\n    )\n\n    score should not be None\n    score.get should be(-4.9236 +- Tolerance)\n  }\n\n  \"Model predict\" should \"return the correct label\" in {\n    val labeledPointsRdd = sc.parallelize(labeledPoints)\n    val model = CategoricalNaiveBayes.train(labeledPointsRdd)\n\n    val label = model.predict(Array(fruit.Long, fruit.Sweet, fruit.Yellow))\n    label should be(fruit.Banana)\n  }\n}\n"
  },
  {
    "path": "e2/src/test/scala/org/apache/predictionio/e2/engine/MarkovChainTest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.e2.engine\n\nimport org.apache.predictionio.e2.fixture.{MarkovChainFixture, SharedSparkContext}\nimport org.apache.spark.mllib.linalg.Vectors\nimport org.apache.spark.mllib.linalg.distributed.CoordinateMatrix\nimport org.scalatest.{FlatSpec, Matchers}\n\nimport scala.language.reflectiveCalls\n\nclass MarkovChainTest extends FlatSpec with Matchers with SharedSparkContext\nwith MarkovChainFixture {\n\n  \"Markov chain training\" should \"produce a model\" in {\n    val matrix =\n      new CoordinateMatrix(sc.parallelize(twoByTwoMatrix.matrixEntries))\n    val model = MarkovChain.train(matrix, 2)\n\n    model.n should be(2)\n    model.transitionVectors.collect() should contain theSameElementsAs Seq(\n      (0, Vectors.sparse(2, Array(0, 1), Array(0.3, 0.7))),\n      (1, Vectors.sparse(2, Array(0, 1), Array(0.5, 0.5)))\n    )\n  }\n\n  it should \"contains probabilities of the top N only\" in {\n    val matrix =\n      new CoordinateMatrix(sc.parallelize(fiveByFiveMatrix.matrixEntries))\n    val model = MarkovChain.train(matrix, 2)\n\n    model.n should be(2)\n    (0, Vectors.sparse(5, Array(1, 2), Array(.6, .4)))\n    model.transitionVectors.collect() should contain theSameElementsAs Seq(\n      (0, Vectors.sparse(5, Array(1, 2), Array(.6, .4))),\n      (1, Vectors.sparse(5, Array(2, 4), Array(9.0 / 25, 8.0 / 25))),\n      (2, Vectors.sparse(5, Array(1, 4), Array(10.0 / 28, 10.0 / 28))),\n      (3, Vectors.sparse(5, Array(3, 4), Array(3.0 / 9, 4.0 / 9))),\n      (4, Vectors.sparse(5, Array(3, 4), Array(8.0 / 25, 0.4)))\n    )\n  }\n\n  \"Model predict\" should \"calculate the probablities of new states\" in {\n    val matrix =\n      new CoordinateMatrix(sc.parallelize(twoByTwoMatrix.matrixEntries))\n    val model = MarkovChain.train(matrix, 2)\n    val nextState = model.predict(Seq(0.4, 0.6))\n\n    nextState should contain theSameElementsInOrderAs Seq(0.42, 0.58)\n  }\n}\n"
  },
  {
    "path": "e2/src/test/scala/org/apache/predictionio/e2/evaluation/CrossValidationTest.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.e2.evaluation\n\nimport org.scalatest.{Matchers, Inspectors, FlatSpec}\nimport org.apache.spark.rdd.RDD\nimport org.apache.predictionio.e2.fixture.SharedSparkContext\nimport org.apache.predictionio.e2.engine.LabeledPoint\n\nobject CrossValidationTest {\n  case class TrainingData(labeledPoints: Seq[LabeledPoint])\n  case class Query(features: Array[String])\n  case class ActualResult(label: String)\n\n  case class EmptyEvaluationParams()\n\n  def toTrainingData(labeledPoints: RDD[LabeledPoint]) = TrainingData(labeledPoints.collect().toSeq)\n  def toQuery(labeledPoint: LabeledPoint) = Query(labeledPoint.features)\n  def toActualResult(labeledPoint: LabeledPoint) = ActualResult(labeledPoint.label)\n\n}\n\n\nclass CrossValidationTest extends FlatSpec with Matchers with Inspectors\nwith SharedSparkContext{\n\n\n  val Label1 = \"l1\"\n  val Label2 = \"l2\"\n  val Label3 = \"l3\"\n  val Label4 = \"l4\"\n  val Attribute1 = \"a1\"\n  val NotAttribute1 = \"na1\"\n  val Attribute2 = \"a2\"\n  val NotAttribute2 = \"na2\"\n\n  val labeledPoints = Seq(\n    LabeledPoint(Label1, Array(Attribute1, Attribute2)),\n    LabeledPoint(Label2, Array(NotAttribute1, Attribute2)),\n    LabeledPoint(Label3, Array(Attribute1, NotAttribute2)),\n    LabeledPoint(Label4, Array(NotAttribute1, NotAttribute2))\n  )\n\n  val dataCount = labeledPoints.size\n  val evalKs = (1 to dataCount)\n  val emptyParams = new CrossValidationTest.EmptyEvaluationParams()\n  type Fold = (\n    CrossValidationTest.TrainingData,\n    CrossValidationTest.EmptyEvaluationParams,\n    RDD[(CrossValidationTest.Query, CrossValidationTest.ActualResult)])\n\n  def toTestTrain(dataSplit: Fold): (Seq[LabeledPoint], Seq[LabeledPoint]) = {\n    val trainingData = dataSplit._1.labeledPoints\n    val queryActual = dataSplit._3\n    val testingData = queryActual.map { case (query, actual) =>\n      LabeledPoint(actual.label, query.features)\n    }\n    (trainingData, testingData.collect().toSeq)\n  }\n\n  def splitData(k: Int, labeledPointsRDD: RDD[LabeledPoint]): Seq[Fold] = {\n    CommonHelperFunctions.splitData[\n      LabeledPoint,\n      CrossValidationTest.TrainingData,\n      CrossValidationTest.EmptyEvaluationParams,\n      CrossValidationTest.Query,\n      CrossValidationTest.ActualResult](\n        k,\n        labeledPointsRDD,\n        emptyParams,\n        CrossValidationTest.toTrainingData,\n        CrossValidationTest.toQuery,\n        CrossValidationTest.toActualResult)\n  }\n\n  \"Fold count\" should \"equal evalK\" in {\n    val labeledPointsRDD = sc.parallelize(labeledPoints)\n    val lengths = evalKs.map(k => splitData(k, labeledPointsRDD).length)\n    lengths should be(evalKs)\n  }\n\n  \"Testing data size\" should  \"be within 1 of total / evalK\" in {\n    val labeledPointsRDD = sc.parallelize(labeledPoints)\n    val splits = evalKs.map(k => k -> splitData(k, labeledPointsRDD))\n    val diffs = splits.map { case (k, folds) =>\n      folds.map(fold => fold._3.count() - dataCount / k)\n    }\n    forAll(diffs) {foldDiffs => foldDiffs.max should be <=  1L}\n    diffs.map(folds => folds.sum) should be(evalKs.map(k => dataCount % k))\n  }\n\n  \"Training + testing\" should \"equal original dataset\" in {\n    val labeledPointsRDD = sc.parallelize(labeledPoints)\n    forAll(evalKs) {k =>\n      val split = splitData(k, labeledPointsRDD)\n      forAll(split) {fold =>\n        val(training, testing) = toTestTrain(fold)\n        (training ++ testing).toSet should be(labeledPoints.toSet)\n      }\n    }\n  }\n\n  \"Training and testing\" should \"be disjoint\" in {\n    val labeledPointsRDD = sc.parallelize(labeledPoints)\n    forAll(evalKs) { k =>\n      val split = splitData(k, labeledPointsRDD)\n      forAll(split) { fold =>\n        val (training, testing) = toTestTrain(fold)\n        training.toSet.intersect(testing.toSet) should be('empty)\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "e2/src/test/scala/org/apache/predictionio/e2/fixture/BinaryVectorizerFixture.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.e2.fixture\n\nimport scala.collection.immutable.HashMap\nimport scala.collection.immutable.HashSet\nimport org.apache.spark.mllib.linalg.Vector\n\ntrait BinaryVectorizerFixture {\n\n  def base = {\n    new {\n      val maps : Seq[HashMap[String, String]] = Seq(\n        HashMap(\"food\" -> \"orange\", \"music\" -> \"rock\", \"hobby\" -> \"scala\"),\n        HashMap(\"food\" -> \"orange\", \"music\" -> \"pop\", \"hobby\" ->\"running\"),\n        HashMap(\"food\" -> \"banana\", \"music\" -> \"rock\", \"hobby\" -> \"guitar\"),\n        HashMap(\"food\" -> \"banana\", \"music\" -> \"rock\", \"hobby\" -> \"guitar\")\n      )\n\n      val properties = HashSet(\"food\", \"hobby\")\n    }\n  }\n\n\n  def testArrays = {\n    new {\n      // Test case for checking food value not listed in base.maps, and\n      // property not in properties.\n      val one = Array((\"food\", \"burger\"), (\"music\", \"rock\"), (\"hobby\", \"scala\"))\n\n      // Test case for making sure indices are preserved.\n      val twoA = Array((\"food\", \"orange\"), (\"hobby\", \"scala\"))\n      val twoB = Array((\"food\", \"banana\"), (\"hobby\", \"scala\"))\n      val twoC = Array((\"hobby\", \"guitar\"))\n    }\n  }\n\n  def vecSum (vec1 : Vector, vec2 : Vector) : Array[Double] = {\n    (0 until vec1.size).map(\n      k => vec1(k) + vec2(k)\n    ).toArray\n  }\n\n}\n\n\n"
  },
  {
    "path": "e2/src/test/scala/org/apache/predictionio/e2/fixture/MarkovChainFixture.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.e2.fixture\n\nimport org.apache.spark.mllib.linalg.distributed.MatrixEntry\n\ntrait MarkovChainFixture {\n  def twoByTwoMatrix = {\n    new {\n      val matrixEntries = Seq(\n        MatrixEntry(0, 0, 3),\n        MatrixEntry(0, 1, 7),\n        MatrixEntry(1, 0, 10),\n        MatrixEntry(1, 1, 10)\n      )\n    }\n  }\n  \n  def fiveByFiveMatrix = {\n    new {\n      val matrixEntries = Seq(\n        MatrixEntry(0, 1, 12),\n        MatrixEntry(0, 2, 8),\n        MatrixEntry(1, 0, 3),\n        MatrixEntry(1, 1, 3),\n        MatrixEntry(1, 2, 9),\n        MatrixEntry(1, 3, 2),\n        MatrixEntry(1, 4, 8),\n        MatrixEntry(2, 1, 10),\n        MatrixEntry(2, 2, 8),\n        MatrixEntry(2, 4, 10),\n        MatrixEntry(3, 0, 2),\n        MatrixEntry(3, 3, 3),\n        MatrixEntry(3, 4, 4),\n        MatrixEntry(4, 1, 7),\n        MatrixEntry(4, 3, 8),\n        MatrixEntry(4, 4, 10)\n      )\n    }\n  }\n}\n"
  },
  {
    "path": "e2/src/test/scala/org/apache/predictionio/e2/fixture/NaiveBayesFixture.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.e2.fixture\n\nimport org.apache.predictionio.e2.engine.LabeledPoint\n\ntrait NaiveBayesFixture {\n\n  def fruit = {\n    new {\n      val Banana = \"Banana\"\n      val Orange = \"Orange\"\n      val OtherFruit = \"Other Fruit\"\n      val NotLong = \"Not Long\"\n      val Long = \"Long\"\n      val NotSweet = \"Not Sweet\"\n      val Sweet = \"Sweet\"\n      val NotYellow = \"Not Yellow\"\n      val Yellow = \"Yellow\"\n\n      val labeledPoints = Seq(\n        LabeledPoint(Banana, Array(Long, Sweet, Yellow)),\n        LabeledPoint(Banana, Array(Long, Sweet, Yellow)),\n        LabeledPoint(Banana, Array(Long, Sweet, Yellow)),\n        LabeledPoint(Banana, Array(Long, Sweet, Yellow)),\n        LabeledPoint(Banana, Array(NotLong, NotSweet, NotYellow)),\n        LabeledPoint(Orange, Array(NotLong, Sweet, NotYellow)),\n        LabeledPoint(Orange, Array(NotLong, NotSweet, NotYellow)),\n        LabeledPoint(OtherFruit, Array(Long, Sweet, NotYellow)),\n        LabeledPoint(OtherFruit, Array(NotLong, Sweet, NotYellow)),\n        LabeledPoint(OtherFruit, Array(Long, Sweet, Yellow)),\n        LabeledPoint(OtherFruit, Array(NotLong, NotSweet, NotYellow))\n      )\n    }\n  }\n}\n"
  },
  {
    "path": "e2/src/test/scala/org/apache/predictionio/e2/fixture/SharedSparkContext.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.e2.fixture\n\nimport org.apache.spark.{SparkConf, SparkContext}\nimport org.scalatest.{BeforeAndAfterAll, Suite}\n\ntrait SharedSparkContext extends BeforeAndAfterAll {\n  self: Suite =>\n  @transient private var _sc: SparkContext = _\n\n  def sc: SparkContext = _sc\n\n  var conf = new SparkConf(false)\n\n  override def beforeAll() {\n    _sc = new SparkContext(\"local\", \"test\", conf)\n    super.beforeAll()\n  }\n\n  override def afterAll() {\n    LocalSparkContext.stop(_sc)\n\n    _sc = null\n    super.afterAll()\n  }\n}\n\nobject LocalSparkContext {\n  def stop(sc: SparkContext) {\n    if (sc != null) {\n      sc.stop()\n    }\n    // To avoid Akka rebinding to the same port, since it doesn't unbind\n    // immediately on shutdown\n    System.clearProperty(\"spark.driver.port\")\n  }\n}\n\n"
  },
  {
    "path": "examples/redeploy-script/local.sh.template",
    "content": "#!/bin/bash\n\n##########################\n# Rename this to local.sh\n##########################\n\n##################################\n# Settings common to all variants\n##################################\nPIO_HOME= #/path/to/PredictionIO\nLOG_DIR= #/path/to/log_dir\n\n# Email for the log file/error message\nFROM_EMAIL= #\nTARGET_EMAIL= #\n\n# Change the default binding IP if neeeded\nIP=0.0.0.0\n\nEVENT_SERVER_PORT=7070\n\n# Sanity check below\n\ncheck_non_empty() {\n  # $1 is the content of the variable in quotes e.g. \"$FROM_EMAIL\"\n  # $2 is the error message\n  if [[ $1 == \"\" ]]; then\n    echo \"ERROR: specify $2\"\n    exit -1\n  fi\n}\n\ncheck_non_empty \"$PIO_HOME\"     \"PIO_HOME in local.sh\"\ncheck_non_empty \"$LOG_DIR\"      \"LOG_DIR in local.sh\"\ncheck_non_empty \"$FROM_EMAIL\"   \"FROM_EMAIL in local.sh\"\ncheck_non_empty \"$TARGET_EMAIL\" \"TARGET_EMAIL in local.sh\"\n"
  },
  {
    "path": "examples/redeploy-script/redeploy.sh",
    "content": "#!/bin/bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n#set -x\n\nmy_dir=\"$(dirname \"$0\")\"\nfilename=\"$(basename \"$0\" .sh)\"\n\nsource \"${my_dir}/local.sh\"\n\n###############################################################################\n# Configuration - read and modify this section.\n###############################################################################\n\n# Rename this file to a more descriptive one, it will be used as email title\n# with underscore replaced by space. e.g. EngineX_Redeploy_(dev).sh\n# This will also make deploying the wrong script less likely.\n\nTIMESTAMP=`date +\"%Y%m%d-%H%M%S\"`\n\n# LOG_DIR is set in local.sh\nLOG_FILE=\"${LOG_DIR}/${filename}-${TIMESTAMP}.log\"\n\n# For accessing the engine server status page\nHOSTNAME= #\n\n# A port other than the default 8000 is recommend since it can be shut down\n# upon the command \"pio deploy\" without the \"--port\" parameter by mistake\nPORT= # 8001\n\n# Access key for feedback loop. It is a 64-char string of the form\n# abcdefghijklmnopqrstuvwxyz1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ12\nACCESSKEY=\n\nENGINE_JSON= # enginex_algo_dev.json, enginey_null_prod.json\n\n# Use empty string if you want a local cluster for train, i.e. TRAIN_MASTER=\nTRAIN_MASTER=\"spark://`hostname`:7077\"\n# Use empty string if you want a local cluster for deploy, i.e. DEPLOY_MASTER=\n# For LAlgorithm and P2LAlgorithm, leave this as empty string to avoid holding\n# up resources on the spark cluster.\nDEPLOY_MASTER=\n\n# Bump these up as needed\nTRAIN_EXECUTOR_MEMORY=16G\nTRAIN_DRIVER_MEMORY=8G\nTRAIN_CORES=4\n# \nDEPLOY_EXECUTOR_MEMORY=16G\nDEPLOY_DRIVER_MEMORY=8G\nDEPLOY_CORES=4\n\n###############################################################################\n# End of configuration\n###############################################################################\n\n\n###############\n# Sanity check\n############### \n\nNAME=${filename//_/ } # Title of the email\n\ncheck_non_empty \"${NAME/redeploy/}\" \\\n                \"a more descriptive script name, e.g. EngineX_Redeploy_(dev).sh\"\n\ncheck_non_empty \"$HOSTNAME\"    \"HOSTNAME\"\ncheck_non_empty \"$PORT\"        \"PORT\"\ncheck_non_empty \"$ACCESSKEY\"   \"ACCESSKEY\"\ncheck_non_empty \"$ENGINE_JSON\" \"ENGINE_JSON\"\n\n##################\n# Start of script\n##################\n\nif [[ \"$TRAIN_MASTER\" == \"\" ]]; then \n  TRAIN_MASTER_PARAM=\nelse\n  TRAIN_MASTER_PARAM=\"--master $TRAIN_MASTER\"\nfi\n\nTRAIN_COMMAND=\"${PIO_HOME}/bin/pio train --verbose -v $ENGINE_JSON \n    -- $TRAIN_MASTER_PARAM \n    --executor-memory $TRAIN_EXECUTOR_MEMORY \n    --driver-memory $TRAIN_DRIVER_MEMORY \n    --total-executor-cores $TRAIN_CORES\"\n\nif [[ \"$DEPLOY_MASTER\" == \"\" ]]; then \n  DEPLOY_MASTER_PARAM=\nelse\n  DEPLOY_MASTER_PARAM=\"--master $DEPLOY_MASTER\"\nfi\n\nDEPLOY_COMMAND=\"${PIO_HOME}/bin/pio deploy -v $ENGINE_JSON\n    --ip $IP\n    --port $PORT\n    --event-server-port $EVENT_SERVER_PORT\n    --feedback --accesskey $ACCESSKEY\n    -- $DEPLOY_MASTER_PARAM\n    --executor-memory $DEPLOY_EXECUTOR_MEMORY\n    --driver-memory $DEPLOY_DRIVER_MEMORY\n    --total-executor-cores $DEPLOY_CORES\"\n\n# Print the two commands then quit if we see any arguments (debug)\nif [[ \"$1\" != \"\" ]]; then\n  echo \"[DEBUG] Commands to be run (not actually running it):\"\n  echo \"$(echo $TRAIN_COMMAND | sed \"s/\\\\n//g\")\"\n  echo \"$(echo $DEPLOY_COMMAND | sed \"s/\\\\n//g\")\"\n  exit -1\nfi\n\n######################\n# Actually running it\n######################\n\necho \"Logging to $LOG_FILE\"\ntouch $LOG_FILE\n\necho -n \"Deploy script started at \" | tee -a $LOG_FILE\ndate | tee -a $LOG_FILE\n\n# Assume this script is in scripts/ directory of the project\ncd $my_dir/..\n\necho \"Running $(echo $TRAIN_COMMAND | sed \"s/\\\\n//g\")\" | tee -a $LOG_FILE\nPIO_TRAIN=$($TRAIN_COMMAND 2>&1)\nTRAIN_RESULT=$?\n\necho \"$PIO_TRAIN\" >> $LOG_FILE\necho -n \"Training ended with return value $TRAIN_RESULT at \" | tee -a $LOG_FILE\ndate | tee -a $LOG_FILE\n\nif [[ $TRAIN_RESULT -ne 0 ]]; then\n  mail -s \"Error in train: $NAME $TIMESTAMP\" -a \"From: $FROM_EMAIL\" \\\n      $TARGET_EMAIL < $LOG_FILE\n  echo -n \"Deploy script aborted at \"\n  date\n  exit 1\nfi\n\n# Deploy\n# Get current running instance PID\nPIDBYPORT_COMMAND=\"lsof -t -i:$PORT\"\nDEPLOYEDPID=$($PIDBYPORT_COMMAND)\n\nDEPLOY_LOG=`mktemp $LOG_DIR/tmp.XXXXXXXXXX`\n$($DEPLOY_COMMAND 1>$DEPLOY_LOG 2>&1) &\n\n# Check if the engine is up\nsleep 60\ncurl $HOSTNAME:$PORT > /dev/null\nRETURN_VAL=$?\nCOUNTER=0\nwhile [[ $RETURN_VAL -ne 0 && $COUNTER -lt 20 ]]; do\n  sleep 30\n  curl $HOSTNAME:$PORT > /dev/null\n  let RETURN_VAL=$?\n  let COUNTER=COUNTER+1\ndone\n\n# Check if the previous engine instance is running\nKILLSD_COMMAND=\"kill $DEPLOYEDPID\"\nif [ -z \"$DEPLOYEDPID\" ]\nthen\n  printf \"\\nNo stale PIDs found for port $PORT\\n\"\nelse\n  $($KILLSD_COMMAND)\n  printf \"\\nStale PID found as $DEPLOYEDPID. Resources released.\\n\"\nfi\n\ncat $DEPLOY_LOG >> $LOG_FILE\nrm $DEPLOY_LOG\necho -n \"Deploy ended with return value $TRAIN_RESULT at \" | tee -a $LOG_FILE\ndate | tee -a $LOG_FILE\n\nif [[ $RETURN_VAL -ne 0 ]]; then\n  mail -s \"Error in deploy: $NAME $TIMESTAMP\" -a \"From: $FROM_EMAIL\" $TARGET_EMAIL < $LOG_FILE\nelse\n  mail -s \"Normal: $NAME $TIMESTAMP\" -a \"From: $FROM_EMAIL\" $TARGET_EMAIL < $LOG_FILE\nfi\n\necho -n \"Deploy script ended at \"\ndate\nexit $RETURN_VAL\n"
  },
  {
    "path": "examples/scala-parallel-classification/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis is based on Classification Engine Template v0.14.0.\n\nPlease refer to https://predictionio.apache.org/templates/classification/how-to/\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/.gitignore",
    "content": "manifest.json\npio.log\n/pio.sbt\ntarget/\n.idea\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nname := \"template-scala-parallel-classification\"\n\norganization := \"org.apache.predictionio\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\" % \"provided\",\n  \"org.apache.spark\"        %% \"spark-mllib\"              % \"2.4.0\" % \"provided\")\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/data/data.txt",
    "content": "0,51 35 12\n0,49 30 12\n0,47 32 12\n0,46 31 12\n0,50 36 12\n0,54 39 14\n0,46 34 13\n0,50 34 12\n0,44 29 12\n0,49 31 11\n0,54 37 12\n0,48 34 12\n0,48 30 11\n0,43 30 11\n0,58 40 12\n0,57 44 14\n0,54 39 14\n0,51 35 13\n0,57 38 13\n0,51 38 13\n0,54 34 12\n0,51 37 14\n0,46 36 12\n0,51 33 15\n0,48 34 12\n0,50 30 12\n0,50 34 14\n0,52 35 12\n0,52 34 12\n0,47 32 12\n0,48 31 12\n0,54 34 14\n0,52 41 11\n0,55 42 12\n0,49 31 11\n0,50 32 12\n0,55 35 12\n0,49 31 11\n0,44 30 12\n0,51 34 12\n0,50 35 13\n0,45 23 13\n0,44 32 12\n0,50 35 16\n0,51 38 14\n0,48 30 13\n0,51 38 12\n0,46 32 12\n0,53 37 12\n0,50 33 12\n1,70 32 14\n1,64 32 15\n1,69 31 15\n1,55 23 13\n1,65 28 15\n1,57 28 13\n1,63 33 16\n1,49 24 10\n1,66 29 13\n1,52 27 14\n1,50 20 10\n1,59 30 15\n1,60 22 10\n1,61 29 14\n1,56 29 13\n1,67 31 14\n1,56 30 15\n1,58 27 10\n1,62 22 15\n1,56 25 11\n1,59 32 18\n1,61 28 13\n1,63 25 15\n1,61 28 12\n1,64 29 13\n1,66 30 14\n1,68 28 14\n1,67 30 17\n1,60 29 15\n1,57 26 10\n1,55 24 11\n1,55 24 10\n1,58 27 12\n1,60 27 16\n1,54 30 15\n1,60 34 16\n1,67 31 15\n1,63 23 13\n1,56 30 13\n1,55 25 13\n1,55 26 12\n1,61 30 14\n1,58 26 12\n1,50 23 10\n1,56 27 13\n1,57 30 12\n1,57 29 13\n1,62 29 13\n1,51 25 11\n1,57 28 13\n2,63 33 25\n2,58 27 19\n2,71 30 21\n2,63 29 18\n2,65 30 22\n2,76 30 21\n2,49 25 17\n2,73 29 18\n2,67 25 18\n2,72 36 25\n2,65 32 20\n2,64 27 19\n2,68 30 21\n2,57 25 20\n2,58 28 24\n2,64 32 23\n2,65 30 18\n2,77 38 22\n2,77 26 23\n2,60 22 15\n2,69 32 23\n2,56 28 20\n2,77 28 20\n2,63 27 18\n2,67 33 21\n2,72 32 18\n2,62 28 18\n2,61 30 18\n2,64 28 21\n2,72 30 16\n2,74 28 19\n2,79 38 20\n2,64 28 22\n2,63 28 15\n2,61 26 14\n2,77 30 23\n2,63 34 24\n2,64 31 18\n2,60 30 18\n2,69 31 21\n2,67 31 24\n2,69 31 23\n2,58 27 19\n2,68 32 23\n2,67 33 25\n2,67 30 23\n2,63 25 19\n2,65 30 20\n2,62 34 23\n2,59 30 18\n3,80 10 70\n3,82 20 71\n3,90 15 73\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/data/import_eventserver.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nImport sample data for classification engine\n\"\"\"\n\nimport predictionio\nimport argparse\n\ndef import_events(client, file):\n  f = open(file, 'r')\n  count = 0\n  print(\"Importing data...\")\n  for line in f:\n    data = line.rstrip('\\r\\n').split(\",\")\n    plan = data[0]\n    attr = data[1].split(\" \")\n    client.create_event(\n      event=\"$set\",\n      entity_type=\"user\",\n      entity_id=str(count), # use the count num as user ID\n      properties= {\n        \"attr0\" : int(attr[0]),\n        \"attr1\" : int(attr[1]),\n        \"attr2\" : int(attr[2]),\n        \"plan\" : int(plan)\n      }\n    )\n    count += 1\n  f.close()\n  print(\"%s events are imported.\" % count)\n\nif __name__ == '__main__':\n  parser = argparse.ArgumentParser(\n    description=\"Import sample data for classification engine\")\n  parser.add_argument('--access_key', default='invald_access_key')\n  parser.add_argument('--url', default=\"http://localhost:7070\")\n  parser.add_argument('--file', default=\"./data/data.txt\")\n\n  args = parser.parse_args()\n  print(args)\n\n  client = predictionio.EventClient(\n    access_key=args.access_key,\n    url=args.url,\n    threads=5,\n    qsize=500)\n  import_events(client, args.file)\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.classification.ClassificationEngine\",\n  \"datasource\": {\n    \"params\": {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"randomforest\",\n      \"params\": {\n        \"numClasses\": 4,\n        \"numTrees\": 5,\n        \"featureSubsetStrategy\": \"auto\",\n        \"impurity\": \"gini\",\n        \"maxDepth\": 4,\n        \"maxBins\": 100\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/project/build.properties",
    "content": "sbt.version=1.2.8"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/src/main/scala/CompleteEvaluation.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.Evaluation\nimport org.apache.predictionio.controller.MetricEvaluator\n\nobject CompleteEvaluation extends Evaluation {\n  engineEvaluator = (\n    ClassificationEngine(),\n    MetricEvaluator(\n      metric = Accuracy(),\n      otherMetrics = Seq(Precision(0.0), Precision(1.0), Precision(2.0)),\n      outputPath = \"best.json\"))\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/src/main/scala/DataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.PDataSource\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.store.PEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.mllib.regression.LabeledPoint\nimport org.apache.spark.mllib.linalg.Vectors\n\nimport grizzled.slf4j.Logger\n\ncase class DataSourceParams(\n  appName: String,\n  evalK: Option[Int]  // define the k-fold parameter.\n) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, ActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    val labeledPoints: RDD[LabeledPoint] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"user\",\n      // only keep entities with these required properties defined\n      required = Some(List(\"plan\", \"attr0\", \"attr1\", \"attr2\")))(sc)\n      // aggregateProperties() returns RDD pair of\n      // entity ID and its aggregated properties\n      .map { case (entityId, properties) =>\n        try {\n          LabeledPoint(properties.get[Double](\"plan\"),\n            Vectors.dense(Array(\n              properties.get[Double](\"attr0\"),\n              properties.get[Double](\"attr1\"),\n              properties.get[Double](\"attr2\")\n            ))\n          )\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Failed to get properties ${properties} of\" +\n              s\" ${entityId}. Exception: ${e}.\")\n            throw e\n          }\n        }\n      }.cache()\n\n    new TrainingData(labeledPoints)\n  }\n\n  override\n  def readEval(sc: SparkContext)\n  : Seq[(TrainingData, EmptyEvaluationInfo, RDD[(Query, ActualResult)])] = {\n    require(dsp.evalK.nonEmpty, \"DataSourceParams.evalK must not be None\")\n\n    // The following code reads the data from data store. It is equivalent to\n    // the readTraining method. We copy-and-paste the exact code here for\n    // illustration purpose, a recommended approach is to factor out this logic\n    // into a helper function and have both readTraining and readEval call the\n    // helper.\n    val labeledPoints: RDD[LabeledPoint] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"user\",\n      // only keep entities with these required properties defined\n      required = Some(List(\"plan\", \"attr0\", \"attr1\", \"attr2\")))(sc)\n      // aggregateProperties() returns RDD pair of\n      // entity ID and its aggregated properties\n      .map { case (entityId, properties) =>\n        try {\n          LabeledPoint(properties.get[Double](\"plan\"),\n            Vectors.dense(Array(\n              properties.get[Double](\"attr0\"),\n              properties.get[Double](\"attr1\"),\n              properties.get[Double](\"attr2\")\n            ))\n          )\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Failed to get properties ${properties} of\" +\n              s\" ${entityId}. Exception: ${e}.\")\n            throw e\n          }\n        }\n      }.cache()\n    // End of reading from data store\n\n    // K-fold splitting\n    val evalK = dsp.evalK.get\n    val indexedPoints: RDD[(LabeledPoint, Long)] = labeledPoints.zipWithIndex()\n\n    (0 until evalK).map { idx =>\n      val trainingPoints = indexedPoints.filter(_._2 % evalK != idx).map(_._1)\n      val testingPoints = indexedPoints.filter(_._2 % evalK == idx).map(_._1)\n\n      (\n        new TrainingData(trainingPoints),\n        new EmptyEvaluationInfo(),\n        testingPoints.map {\n          p => (Query(p.features(0), p.features(1), p.features(2)), ActualResult(p.label))\n        }\n      )\n    }\n  }\n}\n\nclass TrainingData(\n  val labeledPoints: RDD[LabeledPoint]\n) extends Serializable\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/src/main/scala/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.EngineFactory\nimport org.apache.predictionio.controller.Engine\n\ncase class Query(\n  attr0 : Double,\n  attr1 : Double,\n  attr2 : Double\n)\n\ncase class PredictedResult(\n  label: Double\n)\n\ncase class ActualResult(\n  label: Double\n)\n\nobject ClassificationEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"naive\" -> classOf[NaiveBayesAlgorithm],\n        \"randomforest\" -> classOf[RandomForestAlgorithm]), // ADDED\n      classOf[Serving])\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/src/main/scala/Evaluation.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.AverageMetric\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.EngineParamsGenerator\nimport org.apache.predictionio.controller.Evaluation\n\ncase class Accuracy()\n  extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  def calculate(query: Query, predicted: PredictedResult, actual: ActualResult)\n  : Double = (if (predicted.label == actual.label) 1.0 else 0.0)\n}\n\nobject AccuracyEvaluation extends Evaluation {\n  // Define Engine and Metric used in Evaluation\n  engineMetric = (ClassificationEngine(), Accuracy())\n}\n\nobject EngineParamsList extends EngineParamsGenerator {\n  // Define list of EngineParams used in Evaluation\n\n  // First, we define the base engine params. It specifies the appId from which\n  // the data is read, and a evalK parameter is used to define the\n  // cross-validation.\n  private[this] val baseEP = EngineParams(\n    dataSourceParams = DataSourceParams(appName = \"INVALID_APP_NAME\", evalK = Some(5)))\n\n  // Second, we specify the engine params list by explicitly listing all\n  // algorithm parameters. In this case, we evaluate 3 engine params, each with\n  // a different algorithm params value.\n  engineParamsList = Seq(\n    baseEP.copy(algorithmParamsList = Seq((\"naive\", AlgorithmParams(10.0)))),\n    baseEP.copy(algorithmParamsList = Seq((\"naive\", AlgorithmParams(100.0)))),\n    baseEP.copy(algorithmParamsList = Seq((\"naive\", AlgorithmParams(1000.0)))))\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/src/main/scala/NaiveBayesAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.P2LAlgorithm\nimport org.apache.predictionio.controller.Params\n\nimport org.apache.spark.mllib.classification.NaiveBayes\nimport org.apache.spark.mllib.classification.NaiveBayesModel\nimport org.apache.spark.mllib.linalg.Vectors\nimport org.apache.spark.SparkContext\n\nimport grizzled.slf4j.Logger\n\ncase class AlgorithmParams(\n  lambda: Double\n) extends Params\n\n// extends P2LAlgorithm because the MLlib's NaiveBayesModel doesn't contain RDD.\nclass NaiveBayesAlgorithm(val ap: AlgorithmParams)\n  extends P2LAlgorithm[PreparedData, NaiveBayesModel, Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def train(sc: SparkContext, data: PreparedData): NaiveBayesModel = {\n    // MLLib NaiveBayes cannot handle empty training data.\n    require(data.labeledPoints.take(1).nonEmpty,\n      s\"RDD[labeledPoints] in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preparator generates PreparedData correctly.\")\n\n    NaiveBayes.train(data.labeledPoints, ap.lambda)\n  }\n\n  override\n  def predict(model: NaiveBayesModel, query: Query): PredictedResult = {\n    val label = model.predict(Vectors.dense(\n      Array(query.attr0, query.attr1, query.attr2)\n    ))\n    PredictedResult(label)\n  }\n\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/src/main/scala/PrecisionEvaluation.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.OptionAverageMetric\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.Evaluation\n\ncase class Precision(label: Double)\n  extends OptionAverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  override def header: String = s\"Precision(label = $label)\"\n\n  override\n  def calculate(query: Query, predicted: PredictedResult, actual: ActualResult)\n  : Option[Double] = {\n    if (predicted.label == label) {\n      if (predicted.label == actual.label) {\n        Some(1.0)  // True positive\n      } else {\n        Some(0.0)  // False positive\n      }\n    } else {\n      None  // Unrelated case for calculating precision\n    }\n  }\n}\n\nobject PrecisionEvaluation extends Evaluation {\n  engineMetric = (ClassificationEngine(), Precision(label = 1.0))\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/src/main/scala/Preparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.PPreparator\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.mllib.regression.LabeledPoint\n\nclass PreparedData(\n  val labeledPoints: RDD[LabeledPoint]\n) extends Serializable\n\nclass Preparator extends PPreparator[TrainingData, PreparedData] {\n\n  override\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(trainingData.labeledPoints)\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/src/main/scala/RandomForestAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.P2LAlgorithm\nimport org.apache.predictionio.controller.Params\n\nimport org.apache.spark.mllib.tree.RandomForest // CHANGED\nimport org.apache.spark.mllib.tree.model.RandomForestModel // CHANGED\nimport org.apache.spark.mllib.linalg.Vectors\nimport org.apache.spark.SparkContext\n\n// CHANGED\ncase class RandomForestAlgorithmParams(\n  numClasses: Int,\n  numTrees: Int,\n  featureSubsetStrategy: String,\n  impurity: String,\n  maxDepth: Int,\n  maxBins: Int\n) extends Params\n\n// extends P2LAlgorithm because the MLlib's RandomForestModel doesn't\n// contain RDD.\nclass RandomForestAlgorithm(val ap: RandomForestAlgorithmParams) // CHANGED\n  extends P2LAlgorithm[PreparedData, RandomForestModel, // CHANGED\n  Query, PredictedResult] {\n\n  // CHANGED\n  override\n  def train(sc: SparkContext, data: PreparedData): RandomForestModel = {\n    // CHANGED\n    // Empty categoricalFeaturesInfo indicates all features are continuous.\n    val categoricalFeaturesInfo = Map[Int, Int]()\n    RandomForest.trainClassifier(\n      data.labeledPoints,\n      ap.numClasses,\n      categoricalFeaturesInfo,\n      ap.numTrees,\n      ap.featureSubsetStrategy,\n      ap.impurity,\n      ap.maxDepth,\n      ap.maxBins)\n  }\n\n  override\n  def predict(\n    model: RandomForestModel, // CHANGED\n    query: Query): PredictedResult = {\n\n    val label = model.predict(Vectors.dense(\n      Array(query.attr0, query.attr1, query.attr2)\n    ))\n    PredictedResult(label)\n  }\n\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/src/main/scala/Serving.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.LServing\n\nclass Serving extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/add-algorithm/template.json",
    "content": "{\"pio\": {\"version\": { \"min\": \"0.10.0-incubating\" }}}\n"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/.gitignore",
    "content": "manifest.json\npio.log\n/pio.sbt\ntarget/\n.idea\n"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nname := \"template-scala-parallel-classification\"\n\norganization := \"org.apache.predictionio\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\" % \"provided\",\n  \"org.apache.spark\"        %% \"spark-mllib\"              % \"2.4.0\" % \"provided\")\n"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/data/data.txt",
    "content": "0,51 35 12\n0,49 30 12\n0,47 32 12\n0,46 31 12\n0,50 36 12\n0,54 39 14\n0,46 34 13\n0,50 34 12\n0,44 29 12\n0,49 31 11\n0,54 37 12\n0,48 34 12\n0,48 30 11\n0,43 30 11\n0,58 40 12\n0,57 44 14\n0,54 39 14\n0,51 35 13\n0,57 38 13\n0,51 38 13\n0,54 34 12\n0,51 37 14\n0,46 36 12\n0,51 33 15\n0,48 34 12\n0,50 30 12\n0,50 34 14\n0,52 35 12\n0,52 34 12\n0,47 32 12\n0,48 31 12\n0,54 34 14\n0,52 41 11\n0,55 42 12\n0,49 31 11\n0,50 32 12\n0,55 35 12\n0,49 31 11\n0,44 30 12\n0,51 34 12\n0,50 35 13\n0,45 23 13\n0,44 32 12\n0,50 35 16\n0,51 38 14\n0,48 30 13\n0,51 38 12\n0,46 32 12\n0,53 37 12\n0,50 33 12\n1,70 32 14\n1,64 32 15\n1,69 31 15\n1,55 23 13\n1,65 28 15\n1,57 28 13\n1,63 33 16\n1,49 24 10\n1,66 29 13\n1,52 27 14\n1,50 20 10\n1,59 30 15\n1,60 22 10\n1,61 29 14\n1,56 29 13\n1,67 31 14\n1,56 30 15\n1,58 27 10\n1,62 22 15\n1,56 25 11\n1,59 32 18\n1,61 28 13\n1,63 25 15\n1,61 28 12\n1,64 29 13\n1,66 30 14\n1,68 28 14\n1,67 30 17\n1,60 29 15\n1,57 26 10\n1,55 24 11\n1,55 24 10\n1,58 27 12\n1,60 27 16\n1,54 30 15\n1,60 34 16\n1,67 31 15\n1,63 23 13\n1,56 30 13\n1,55 25 13\n1,55 26 12\n1,61 30 14\n1,58 26 12\n1,50 23 10\n1,56 27 13\n1,57 30 12\n1,57 29 13\n1,62 29 13\n1,51 25 11\n1,57 28 13\n2,63 33 25\n2,58 27 19\n2,71 30 21\n2,63 29 18\n2,65 30 22\n2,76 30 21\n2,49 25 17\n2,73 29 18\n2,67 25 18\n2,72 36 25\n2,65 32 20\n2,64 27 19\n2,68 30 21\n2,57 25 20\n2,58 28 24\n2,64 32 23\n2,65 30 18\n2,77 38 22\n2,77 26 23\n2,60 22 15\n2,69 32 23\n2,56 28 20\n2,77 28 20\n2,63 27 18\n2,67 33 21\n2,72 32 18\n2,62 28 18\n2,61 30 18\n2,64 28 21\n2,72 30 16\n2,74 28 19\n2,79 38 20\n2,64 28 22\n2,63 28 15\n2,61 26 14\n2,77 30 23\n2,63 34 24\n2,64 31 18\n2,60 30 18\n2,69 31 21\n2,67 31 24\n2,69 31 23\n2,58 27 19\n2,68 32 23\n2,67 33 25\n2,67 30 23\n2,63 25 19\n2,65 30 20\n2,62 34 23\n2,59 30 18\n3,80 10 70\n3,82 20 71\n3,90 15 73\n"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/data/import_eventserver.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nImport sample data for classification engine\n\"\"\"\n\nimport predictionio\nimport argparse\n\ndef import_events(client, file):\n  f = open(file, 'r')\n  count = 0\n  print(\"Importing data...\")\n  for line in f:\n    data = line.rstrip('\\r\\n').split(\",\")\n    label = data[0]\n    attr = data[1].split(\" \")\n    client.create_event(\n      event=\"$set\",\n      entity_type=\"item\",\n      entity_id=str(count),\n      properties= {\n        \"featureA\" : int(attr[0]),\n        \"featureB\" : int(attr[1]),\n        \"featureC\" : int(attr[2]),\n        \"featureD\" : 0,\n        \"label\" : int(label)\n      }\n    )\n    count += 1\n  f.close()\n  print(\"%s events are imported.\" % count)\n\nif __name__ == '__main__':\n  parser = argparse.ArgumentParser(\n    description=\"Import sample data for classification engine\")\n  parser.add_argument('--access_key', default='invald_access_key')\n  parser.add_argument('--url', default=\"http://localhost:7070\")\n  parser.add_argument('--file', default=\"./data/data.txt\")\n\n  args = parser.parse_args()\n  print(args)\n\n  client = predictionio.EventClient(\n    access_key=args.access_key,\n    url=args.url,\n    threads=5,\n    qsize=500)\n  import_events(client, args.file)\n"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.classification.ClassificationEngine\",\n  \"datasource\": {\n    \"params\": {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"naive\",\n      \"params\": {\n        \"lambda\": 1.0\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")\n"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/project/build.properties",
    "content": "sbt.version=1.2.8"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/src/main/scala/CompleteEvaluation.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.Evaluation\nimport org.apache.predictionio.controller.MetricEvaluator\n\nobject CompleteEvaluation extends Evaluation {\n  engineEvaluator = (\n    ClassificationEngine(),\n    MetricEvaluator(\n      metric = Accuracy(),\n      otherMetrics = Seq(Precision(0.0), Precision(1.0), Precision(2.0)),\n      outputPath = \"best.json\"))\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/src/main/scala/DataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.PDataSource\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.store.PEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.mllib.regression.LabeledPoint\nimport org.apache.spark.mllib.linalg.Vectors\n\nimport grizzled.slf4j.Logger\n\ncase class DataSourceParams(\n  appName: String,\n  evalK: Option[Int]  // define the k-fold parameter.\n) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, ActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    val labeledPoints: RDD[LabeledPoint] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"item\", // MODIFIED\n      // only keep entities with these required properties defined\n      required = Some(List( // MODIFIED\n        \"featureA\", \"featureB\", \"featureC\", \"featureD\", \"label\")))(sc)\n      // aggregateProperties() returns RDD pair of\n      // entity ID and its aggregated properties\n      .map { case (entityId, properties) =>\n        try {\n          // MODIFIED\n          LabeledPoint(properties.get[Double](\"label\"),\n            Vectors.dense(Array(\n              properties.get[Double](\"featureA\"),\n              properties.get[Double](\"featureB\"),\n              properties.get[Double](\"featureC\"),\n              properties.get[Double](\"featureD\")\n            ))\n          )\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Failed to get properties ${properties} of\" +\n              s\" ${entityId}. Exception: ${e}.\")\n            throw e\n          }\n        }\n      }.cache()\n\n    new TrainingData(labeledPoints)\n  }\n\n  override\n  def readEval(sc: SparkContext)\n  : Seq[(TrainingData, EmptyEvaluationInfo, RDD[(Query, ActualResult)])] = {\n    require(dsp.evalK.nonEmpty, \"DataSourceParams.evalK must not be None\")\n\n    // The following code reads the data from data store. It is equivalent to\n    // the readTraining method. We copy-and-paste the exact code here for\n    // illustration purpose, a recommended approach is to factor out this logic\n    // into a helper function and have both readTraining and readEval call the\n    // helper.\n    val labeledPoints: RDD[LabeledPoint] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"item\", // MODIFIED\n      // only keep entities with these required properties defined\n      required = Some(List( // MODIFIED\n        \"featureA\", \"featureB\", \"featureC\", \"featureD\", \"label\")))(sc)\n      // aggregateProperties() returns RDD pair of\n      // entity ID and its aggregated properties\n      .map { case (entityId, properties) =>\n        try {\n          // MODIFIED\n          LabeledPoint(properties.get[Double](\"label\"),\n            Vectors.dense(Array(\n              properties.get[Double](\"featureA\"),\n              properties.get[Double](\"featureB\"),\n              properties.get[Double](\"featureC\"),\n              properties.get[Double](\"featureD\")\n            ))\n          )\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Failed to get properties ${properties} of\" +\n              s\" ${entityId}. Exception: ${e}.\")\n            throw e\n          }\n        }\n      }.cache()\n    // End of reading from data store\n\n    // K-fold splitting\n    val evalK = dsp.evalK.get\n    val indexedPoints: RDD[(LabeledPoint, Long)] = labeledPoints.zipWithIndex()\n\n    (0 until evalK).map { idx =>\n      val trainingPoints = indexedPoints.filter(_._2 % evalK != idx).map(_._1)\n      val testingPoints = indexedPoints.filter(_._2 % evalK == idx).map(_._1)\n\n      (\n        new TrainingData(trainingPoints),\n        new EmptyEvaluationInfo(),\n        testingPoints.map {\n          // MODIFIED\n          p => (Query(p.features(0), p.features(1), p.features(2), p.features(3)), ActualResult(p.label))\n        }\n      )\n    }\n  }\n}\n\nclass TrainingData(\n  val labeledPoints: RDD[LabeledPoint]\n) extends Serializable\n"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/src/main/scala/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.EngineFactory\nimport org.apache.predictionio.controller.Engine\n\n// MODIFIED\ncase class Query(\n  featureA : Double,\n  featureB : Double,\n  featureC : Double,\n  featureD : Double\n)\n\ncase class PredictedResult(\n  label: Double\n)\n\ncase class ActualResult(\n  label: Double\n)\n\nobject ClassificationEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"naive\" -> classOf[NaiveBayesAlgorithm]),\n      classOf[Serving])\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/src/main/scala/Evaluation.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.AverageMetric\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.EngineParamsGenerator\nimport org.apache.predictionio.controller.Evaluation\n\ncase class Accuracy()\n  extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  def calculate(query: Query, predicted: PredictedResult, actual: ActualResult)\n  : Double = (if (predicted.label == actual.label) 1.0 else 0.0)\n}\n\nobject AccuracyEvaluation extends Evaluation {\n  // Define Engine and Metric used in Evaluation\n  engineMetric = (ClassificationEngine(), Accuracy())\n}\n\nobject EngineParamsList extends EngineParamsGenerator {\n  // Define list of EngineParams used in Evaluation\n\n  // First, we define the base engine params. It specifies the appId from which\n  // the data is read, and a evalK parameter is used to define the\n  // cross-validation.\n  private[this] val baseEP = EngineParams(\n    dataSourceParams = DataSourceParams(appName = \"INVALID_APP_NAME\", evalK = Some(5)))\n\n  // Second, we specify the engine params list by explicitly listing all\n  // algorithm parameters. In this case, we evaluate 3 engine params, each with\n  // a different algorithm params value.\n  engineParamsList = Seq(\n    baseEP.copy(algorithmParamsList = Seq((\"naive\", AlgorithmParams(10.0)))),\n    baseEP.copy(algorithmParamsList = Seq((\"naive\", AlgorithmParams(100.0)))),\n    baseEP.copy(algorithmParamsList = Seq((\"naive\", AlgorithmParams(1000.0)))))\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/src/main/scala/NaiveBayesAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.P2LAlgorithm\nimport org.apache.predictionio.controller.Params\n\nimport org.apache.spark.mllib.classification.NaiveBayes\nimport org.apache.spark.mllib.classification.NaiveBayesModel\nimport org.apache.spark.mllib.linalg.Vectors\nimport org.apache.spark.SparkContext\n\nimport grizzled.slf4j.Logger\n\ncase class AlgorithmParams(\n  lambda: Double\n) extends Params\n\n// extends P2LAlgorithm because the MLlib's NaiveBayesModel doesn't contain RDD.\nclass NaiveBayesAlgorithm(val ap: AlgorithmParams)\n  extends P2LAlgorithm[PreparedData, NaiveBayesModel, Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def train(sc: SparkContext, data: PreparedData): NaiveBayesModel = {\n    // MLLib NaiveBayes cannot handle empty training data.\n    require(data.labeledPoints.take(1).nonEmpty,\n      s\"RDD[labeledPoints] in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preparator generates PreparedData correctly.\")\n\n    NaiveBayes.train(data.labeledPoints, ap.lambda)\n  }\n\n  override\n  def predict(model: NaiveBayesModel, query: Query): PredictedResult = {\n    val label = model.predict(Vectors.dense(\n      // MODIFIED\n      Array(query.featureA, query.featureB, query.featureC, query.featureD)\n    ))\n    PredictedResult(label)\n  }\n\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/src/main/scala/PrecisionEvaluation.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.OptionAverageMetric\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.Evaluation\n\ncase class Precision(label: Double)\n  extends OptionAverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  override def header: String = s\"Precision(label = $label)\"\n\n  override\n  def calculate(query: Query, predicted: PredictedResult, actual: ActualResult)\n  : Option[Double] = {\n    if (predicted.label == label) {\n      if (predicted.label == actual.label) {\n        Some(1.0)  // True positive\n      } else {\n        Some(0.0)  // False positive\n      }\n    } else {\n      None  // Unrelated case for calculating precision\n    }\n  }\n}\n\nobject PrecisionEvaluation extends Evaluation {\n  engineMetric = (ClassificationEngine(), Precision(label = 1.0))\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/src/main/scala/Preparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.PPreparator\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.mllib.regression.LabeledPoint\n\nclass PreparedData(\n  val labeledPoints: RDD[LabeledPoint]\n) extends Serializable\n\nclass Preparator extends PPreparator[TrainingData, PreparedData] {\n\n  override\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(trainingData.labeledPoints)\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/src/main/scala/Serving.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.classification\n\nimport org.apache.predictionio.controller.LServing\n\nclass Serving extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-classification/reading-custom-properties/template.json",
    "content": "{\"pio\": {\"version\": { \"min\": \"0.10.0-incubating\" }}}\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis is based on E-Commerce Recommendation Template v0.14.0.\n\nPlease refer to https://predictionio.apache.org/templates/ecommercerecommendation/how-to/\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/adjust-score/.gitignore",
    "content": "manifest.json\ntarget/\npio.log\n/pio.sbt\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/adjust-score/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nname := \"template-scala-parallel-ecommercerecommendation\"\n\norganization := \"org.apache.predictionio\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\" % \"provided\",\n  \"org.apache.spark\"        %% \"spark-mllib\"              % \"2.4.0\" % \"provided\")\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/adjust-score/data/import_eventserver.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nImport sample data for E-Commerce Recommendation Engine Template\n\"\"\"\n\nimport predictionio\nimport argparse\nimport random\n\nSEED = 3\n\ndef import_events(client):\n  random.seed(SEED)\n  count = 0\n  print(client.get_status())\n  print(\"Importing data...\")\n\n  # generate 10 users, with user ids u1,u2,....,u10\n  user_ids = [\"u%s\" % i for i in range(1, 11)]\n  for user_id in user_ids:\n    print(\"Set user\", user_id)\n    client.create_event(\n      event=\"$set\",\n      entity_type=\"user\",\n      entity_id=user_id\n    )\n    count += 1\n\n  # generate 50 items, with item ids i1,i2,....,i50\n  # random assign 1 to 4 categories among c1-c6 to items\n  categories = [\"c%s\" % i for i in range(1, 7)]\n  item_ids = [\"i%s\" % i for i in range(1, 51)]\n  for item_id in item_ids:\n    print(\"Set item\", item_id)\n    client.create_event(\n      event=\"$set\",\n      entity_type=\"item\",\n      entity_id=item_id,\n      properties={\n        \"categories\" : random.sample(categories, random.randint(1, 4))\n      }\n    )\n    count += 1\n\n  # each user randomly viewed 10 items\n  for user_id in user_ids:\n    for viewed_item in random.sample(item_ids, 10):\n      print(\"User\", user_id ,\"views item\", viewed_item)\n      client.create_event(\n        event=\"view\",\n        entity_type=\"user\",\n        entity_id=user_id,\n        target_entity_type=\"item\",\n        target_entity_id=viewed_item\n      )\n      count += 1\n      # randomly buy some of the viewed items\n      if random.choice([True, False]):\n        print(\"User\", user_id ,\"buys item\", viewed_item)\n        client.create_event(\n          event=\"buy\",\n          entity_type=\"user\",\n          entity_id=user_id,\n          target_entity_type=\"item\",\n          target_entity_id=viewed_item\n        )\n        count += 1\n\n  print(\"%s events are imported.\" % count)\n\nif __name__ == '__main__':\n  parser = argparse.ArgumentParser(\n    description=\"Import sample data for e-commerce recommendation engine\")\n  parser.add_argument('--access_key', default='invald_access_key')\n  parser.add_argument('--url', default=\"http://localhost:7070\")\n\n  args = parser.parse_args()\n  print(args)\n\n  client = predictionio.EventClient(\n    access_key=args.access_key,\n    url=args.url,\n    threads=5,\n    qsize=500)\n  import_events(client)\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/adjust-score/data/send_query.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nSend sample query to prediction engine\n\"\"\"\n\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint(engine_client.send_query({\"user\": \"u1\", \"num\": 4}))\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/adjust-score/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.ecommercerecommendation.ECommerceRecommendationEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"ecomm\",\n      \"params\": {\n        \"appName\": \"MyApp1\",\n        \"unseenOnly\": true,\n        \"seenEvents\": [\"buy\", \"view\"],\n        \"similarEvents\": [\"view\"],\n        \"rank\": 10,\n        \"numIterations\" : 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/adjust-score/project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/adjust-score/project/build.properties",
    "content": "sbt.version=1.2.8\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/adjust-score/src/main/scala/DataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.ecommercerecommendation\n\nimport org.apache.predictionio.controller.PDataSource\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EmptyActualResult\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.store.PEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nimport grizzled.slf4j.Logger\n\ncase class DataSourceParams(appName: String) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    // create a RDD of (entityID, User)\n    val usersRDD: RDD[(String, User)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"user\"\n    )(sc).map { case (entityId, properties) =>\n      val user = try {\n        User()\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties ${properties} of\" +\n            s\" user ${entityId}. Exception: ${e}.\")\n          throw e\n        }\n      }\n      (entityId, user)\n    }.cache()\n\n    // create a RDD of (entityID, Item)\n    val itemsRDD: RDD[(String, Item)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"item\"\n    )(sc).map { case (entityId, properties) =>\n      val item = try {\n        // Assume categories is optional property of item.\n        Item(categories = properties.getOpt[List[String]](\"categories\"))\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties ${properties} of\" +\n            s\" item ${entityId}. Exception: ${e}.\")\n          throw e\n        }\n      }\n      (entityId, item)\n    }.cache()\n\n    val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"view\", \"buy\")),\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n      .cache()\n\n    val viewEventsRDD: RDD[ViewEvent] = eventsRDD\n      .filter { event => event.event == \"view\" }\n      .map { event =>\n        try {\n          ViewEvent(\n            user = event.entityId,\n            item = event.targetEntityId.get,\n            t = event.eventTime.getMillis\n          )\n        } catch {\n          case e: Exception =>\n            logger.error(s\"Cannot convert ${event} to ViewEvent.\" +\n              s\" Exception: ${e}.\")\n            throw e\n        }\n      }\n\n    val buyEventsRDD: RDD[BuyEvent] = eventsRDD\n      .filter { event => event.event == \"buy\" }\n      .map { event =>\n        try {\n          BuyEvent(\n            user = event.entityId,\n            item = event.targetEntityId.get,\n            t = event.eventTime.getMillis\n          )\n        } catch {\n          case e: Exception =>\n            logger.error(s\"Cannot convert ${event} to BuyEvent.\" +\n              s\" Exception: ${e}.\")\n            throw e\n        }\n      }\n\n    new TrainingData(\n      users = usersRDD,\n      items = itemsRDD,\n      viewEvents = viewEventsRDD,\n      buyEvents = buyEventsRDD\n    )\n  }\n}\n\ncase class User()\n\ncase class Item(categories: Option[List[String]])\n\ncase class ViewEvent(user: String, item: String, t: Long)\n\ncase class BuyEvent(user: String, item: String, t: Long)\n\nclass TrainingData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent],\n  val buyEvents: RDD[BuyEvent]\n) extends Serializable {\n  override def toString = {\n    s\"users: [${users.count()} (${users.take(2).toList}...)]\" +\n    s\"items: [${items.count()} (${items.take(2).toList}...)]\" +\n    s\"viewEvents: [${viewEvents.count()}] (${viewEvents.take(2).toList}...)\" +\n    s\"buyEvents: [${buyEvents.count()}] (${buyEvents.take(2).toList}...)\"\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/adjust-score/src/main/scala/ECommAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.ecommercerecommendation\n\nimport org.apache.predictionio.controller.P2LAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.store.LEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.mllib.recommendation.ALS\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\nimport org.apache.spark.rdd.RDD\n\nimport grizzled.slf4j.Logger\n\nimport scala.collection.mutable.PriorityQueue\nimport scala.concurrent.duration.Duration\nimport scala.concurrent.ExecutionContext.Implicits.global\n\ncase class ECommAlgorithmParams(\n  appName: String,\n  unseenOnly: Boolean,\n  seenEvents: List[String],\n  similarEvents: List[String],\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]\n) extends Params\n\n\ncase class ProductModel(\n  item: Item,\n  features: Option[Array[Double]], // features by ALS\n  count: Int // popular count for default score\n)\n\n// ADDED\ncase class WeightGroup(\n  items: Set[String],\n  weight: Double\n)\n\nclass ECommModel(\n  val rank: Int,\n  val userFeatures: Map[Int, Array[Double]],\n  val productModels: Map[Int, ProductModel],\n  val userStringIntMap: BiMap[String, Int],\n  val itemStringIntMap: BiMap[String, Int]\n) extends Serializable {\n\n  @transient lazy val itemIntStringMap = itemStringIntMap.inverse\n\n  override def toString = {\n    s\" rank: ${rank}\" +\n    s\" userFeatures: [${userFeatures.size}]\" +\n    s\"(${userFeatures.take(2).toList}...)\" +\n    s\" productModels: [${productModels.size}]\" +\n    s\"(${productModels.take(2).toList}...)\" +\n    s\" userStringIntMap: [${userStringIntMap.size}]\" +\n    s\"(${userStringIntMap.take(2).toString}...)]\" +\n    s\" itemStringIntMap: [${itemStringIntMap.size}]\" +\n    s\"(${itemStringIntMap.take(2).toString}...)]\"\n  }\n}\n\nclass ECommAlgorithm(val ap: ECommAlgorithmParams)\n  extends P2LAlgorithm[PreparedData, ECommModel, Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def train(sc: SparkContext, data: PreparedData): ECommModel = {\n    require(!data.viewEvents.take(1).isEmpty,\n      s\"viewEvents in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(!data.users.take(1).isEmpty,\n      s\"users in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(!data.items.take(1).isEmpty,\n      s\"items in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    // create User and item's String ID to integer index BiMap\n    val userStringIntMap = BiMap.stringInt(data.users.keys)\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    val mllibRatings: RDD[MLlibRating] = genMLlibRating(\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap,\n      data = data\n    )\n\n    // MLLib ALS cannot handle empty training data.\n    require(!mllibRatings.take(1).isEmpty,\n      s\"mllibRatings cannot be empty.\" +\n      \" Please check if your events contain valid user and item ID.\")\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    // use ALS to train feature vectors\n    val m = ALS.trainImplicit(\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      alpha = 1.0,\n      seed = seed)\n\n    val userFeatures = m.userFeatures.collectAsMap.toMap\n\n    // convert ID to Int index\n    val items = data.items.map { case (id, item) =>\n      (itemStringIntMap(id), item)\n    }\n\n    // join item with the trained productFeatures\n    val productFeatures: Map[Int, (Item, Option[Array[Double]])] =\n      items.leftOuterJoin(m.productFeatures).collectAsMap.toMap\n\n    val popularCount = trainDefault(\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap,\n      data = data\n    )\n\n    val productModels: Map[Int, ProductModel] = productFeatures\n      .map { case (index, (item, features)) =>\n        val pm = ProductModel(\n          item = item,\n          features = features,\n          // NOTE: use getOrElse because popularCount may not contain all items.\n          count = popularCount.getOrElse(index, 0)\n        )\n        (index, pm)\n      }\n\n    new ECommModel(\n      rank = m.rank,\n      userFeatures = userFeatures,\n      productModels = productModels,\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap\n    )\n  }\n\n  /** Generate MLlibRating from PreparedData.\n    * You may customize this function if use different events or different aggregation method\n    */\n  def genMLlibRating(\n    userStringIntMap: BiMap[String, Int],\n    itemStringIntMap: BiMap[String, Int],\n    data: PreparedData): RDD[MLlibRating] = {\n\n    val mllibRatings = data.viewEvents\n      .map { r =>\n        // Convert user and item String IDs to Int index for MLlib\n        val uindex = userStringIntMap.getOrElse(r.user, -1)\n        val iindex = itemStringIntMap.getOrElse(r.item, -1)\n\n        if (uindex == -1)\n          logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n            + \" to Int index.\")\n\n        if (iindex == -1)\n          logger.info(s\"Couldn't convert nonexistent item ID ${r.item}\"\n            + \" to Int index.\")\n\n        ((uindex, iindex), 1)\n      }\n      .filter { case ((u, i), v) =>\n        // keep events with valid user and item index\n        (u != -1) && (i != -1)\n      }\n      .reduceByKey(_ + _) // aggregate all view events of same user-item pair\n      .map { case ((u, i), v) =>\n        // MLlibRating requires integer index for user and item\n        MLlibRating(u, i, v)\n      }\n      .cache()\n\n    mllibRatings\n  }\n\n  /** Train default model.\n    * You may customize this function if use different events or\n    * need different ways to count \"popular\" score or return default score for item.\n    */\n  def trainDefault(\n    userStringIntMap: BiMap[String, Int],\n    itemStringIntMap: BiMap[String, Int],\n    data: PreparedData): Map[Int, Int] = {\n    // count number of buys\n    // (item index, count)\n    val buyCountsRDD: RDD[(Int, Int)] = data.buyEvents\n      .map { r =>\n        // Convert user and item String IDs to Int index\n        val uindex = userStringIntMap.getOrElse(r.user, -1)\n        val iindex = itemStringIntMap.getOrElse(r.item, -1)\n\n        if (uindex == -1)\n          logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n            + \" to Int index.\")\n\n        if (iindex == -1)\n          logger.info(s\"Couldn't convert nonexistent item ID ${r.item}\"\n            + \" to Int index.\")\n\n        (uindex, iindex, 1)\n      }\n      .filter { case (u, i, v) =>\n        // keep events with valid user and item index\n        (u != -1) && (i != -1)\n      }\n      .map { case (u, i, v) => (i, 1) } // key is item\n      .reduceByKey{ case (a, b) => a + b } // count number of items occurrence\n\n    buyCountsRDD.collectAsMap.toMap\n  }\n\n  override\n  def predict(model: ECommModel, query: Query): PredictedResult = {\n\n    val userFeatures = model.userFeatures\n    val productModels = model.productModels\n\n    // convert whiteList's string ID to integer index\n    val whiteList: Option[Set[Int]] = query.whiteList.map( set =>\n      set.flatMap(model.itemStringIntMap.get(_))\n    )\n\n    val finalBlackList: Set[Int] = genBlackList(query = query)\n      // convert seen Items list from String ID to integer Index\n      .flatMap(x => model.itemStringIntMap.get(x))\n\n    // ADDED\n    val weights: Map[Int, Double] = (for {\n      group <- weightedItems\n      item <- group.items\n      index <- model.itemStringIntMap.get(item)\n    } yield (index, group.weight))\n      .toMap\n      .withDefaultValue(1.0)\n\n    val userFeature: Option[Array[Double]] =\n      model.userStringIntMap.get(query.user).flatMap { userIndex =>\n        userFeatures.get(userIndex)\n      }\n\n    val topScores: Array[(Int, Double)] = if (userFeature.isDefined) {\n      // the user has feature vector\n      predictKnownUser(\n        userFeature = userFeature.get,\n        productModels = productModels,\n        query = query,\n        whiteList = whiteList,\n        blackList = finalBlackList,\n        weights = weights // ADDED\n      )\n    } else {\n      // the user doesn't have feature vector.\n      // For example, new user is created after model is trained.\n      logger.info(s\"No userFeature found for user ${query.user}.\")\n\n      // check if the user has recent events on some items\n      val recentItems: Set[String] = getRecentItems(query)\n      val recentList: Set[Int] = recentItems.flatMap (x =>\n        model.itemStringIntMap.get(x))\n\n      val recentFeatures: Vector[Array[Double]] = recentList.toVector\n        // productModels may not contain the requested item\n        .map { i =>\n          productModels.get(i).flatMap { pm => pm.features }\n        }.flatten\n\n      if (recentFeatures.isEmpty) {\n        logger.info(s\"No features vector for recent items ${recentItems}.\")\n        predictDefault(\n          productModels = productModels,\n          query = query,\n          whiteList = whiteList,\n          blackList = finalBlackList,\n          weights = weights // ADDED\n        )\n      } else {\n        predictSimilar(\n          recentFeatures = recentFeatures,\n          productModels = productModels,\n          query = query,\n          whiteList = whiteList,\n          blackList = finalBlackList,\n          weights = weights // ADDED\n        )\n      }\n    }\n\n    val itemScores = topScores.map { case (i, s) =>\n      new ItemScore(\n        // convert item int index back to string ID\n        item = model.itemIntStringMap(i),\n        score = s\n      )\n    }\n\n    new PredictedResult(itemScores)\n  }\n\n  /** Generate final blackList based on other constraints */\n  def genBlackList(query: Query): Set[String] = {\n    // if unseenOnly is True, get all seen items\n    val seenItems: Set[String] = if (ap.unseenOnly) {\n\n      // get all user item events which are considered as \"seen\" events\n      val seenEvents: Iterator[Event] = try {\n        LEventStore.findByEntity(\n          appName = ap.appName,\n          entityType = \"user\",\n          entityId = query.user,\n          eventNames = Some(ap.seenEvents),\n          targetEntityType = Some(Some(\"item\")),\n          // set time limit to avoid super long DB access\n          timeout = Duration(200, \"millis\")\n        )\n      } catch {\n        case e: scala.concurrent.TimeoutException =>\n          logger.error(s\"Timeout when read seen events.\" +\n            s\" Empty list is used. ${e}\")\n          Iterator[Event]()\n        case e: Exception =>\n          logger.error(s\"Error when read seen events: ${e}\")\n          throw e\n      }\n\n      seenEvents.map { event =>\n        try {\n          event.targetEntityId.get\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Can't get targetEntityId of event ${event}.\")\n            throw e\n          }\n        }\n      }.toSet\n    } else {\n      Set[String]()\n    }\n\n    // get the latest constraint unavailableItems $set event\n    val unavailableItems: Set[String] = try {\n      val constr = LEventStore.findByEntity(\n        appName = ap.appName,\n        entityType = \"constraint\",\n        entityId = \"unavailableItems\",\n        eventNames = Some(Seq(\"$set\")),\n        limit = Some(1),\n        latest = true,\n        timeout = Duration(200, \"millis\")\n      )\n      if (constr.hasNext) {\n        constr.next.properties.get[Set[String]](\"items\")\n      } else {\n        Set[String]()\n      }\n    } catch {\n      case e: scala.concurrent.TimeoutException =>\n        logger.error(s\"Timeout when read set unavailableItems event.\" +\n          s\" Empty list is used. ${e}\")\n        Set[String]()\n      case e: Exception =>\n        logger.error(s\"Error when read set unavailableItems event: ${e}\")\n        throw e\n    }\n\n    // combine query's blackList,seenItems and unavailableItems\n    // into final blackList.\n    query.blackList.getOrElse(Set[String]()) ++ seenItems ++ unavailableItems\n  }\n\n  // ADDED\n  /** Get the latest constraint weightedItems */\n  def weightedItems: Seq[WeightGroup] = {\n    try {\n      val constr = LEventStore.findByEntity(\n        appName = ap.appName,\n        entityType = \"constraint\",\n        entityId = \"weightedItems\",\n        eventNames = Some(Seq(\"$set\")),\n        limit = Some(1),\n        latest = true,\n        timeout = Duration(200, \"millis\")\n      )\n      if (constr.hasNext) {\n        constr.next.properties.get[Seq[WeightGroup]](\"weights\")\n      } else {\n        Nil\n      }\n    } catch {\n      case e: scala.concurrent.TimeoutException =>\n        logger.error(s\"Timeout when read set weightedItems event.\" +\n          s\" Empty list is used. ${e}\")\n        Nil\n      case e: Exception =>\n        logger.error(s\"Error when read set weightedItems event: ${e}\")\n        throw e\n    }\n  }\n\n  /** Get recent events of the user on items for recommending similar items */\n  def getRecentItems(query: Query): Set[String] = {\n    // get latest 10 user view item events\n    val recentEvents = try {\n      LEventStore.findByEntity(\n        appName = ap.appName,\n        // entityType and entityId is specified for fast lookup\n        entityType = \"user\",\n        entityId = query.user,\n        eventNames = Some(ap.similarEvents),\n        targetEntityType = Some(Some(\"item\")),\n        limit = Some(10),\n        latest = true,\n        // set time limit to avoid super long DB access\n        timeout = Duration(200, \"millis\")\n      )\n    } catch {\n      case e: scala.concurrent.TimeoutException =>\n        logger.error(s\"Timeout when read recent events.\" +\n          s\" Empty list is used. ${e}\")\n        Iterator[Event]()\n      case e: Exception =>\n        logger.error(s\"Error when read recent events: ${e}\")\n        throw e\n    }\n\n    val recentItems: Set[String] = recentEvents.map { event =>\n      try {\n        event.targetEntityId.get\n      } catch {\n        case e: Exception => {\n          logger.error(\"Can't get targetEntityId of event ${event}.\")\n          throw e\n        }\n      }\n    }.toSet\n\n    recentItems\n  }\n\n  /** Prediction for user with known feature vector */\n  def predictKnownUser(\n    userFeature: Array[Double],\n    productModels: Map[Int, ProductModel],\n    query: Query,\n    whiteList: Option[Set[Int]],\n    blackList: Set[Int],\n    weights: Map[Int, Double] // ADDED\n  ): Array[(Int, Double)] = {\n    val indexScores: Map[Int, Double] = productModels.par // convert to parallel collection\n      .filter { case (i, pm) =>\n        pm.features.isDefined &&\n        isCandidateItem(\n          i = i,\n          item = pm.item,\n          categories = query.categories,\n          whiteList = whiteList,\n          blackList = blackList\n        )\n      }\n      .map { case (i, pm) =>\n        // NOTE: features must be defined, so can call .get\n        val s = dotProduct(userFeature, pm.features.get)\n        // may customize here to further adjust score\n        // ADDED\n        val adjustedScore = s * weights(i)\n        (i, adjustedScore)\n      }\n      .filter(_._2 > 0) // only keep items with score > 0\n      .seq // convert back to sequential collection\n\n    val ord = Ordering.by[(Int, Double), Double](_._2).reverse\n    val topScores = getTopN(indexScores, query.num)(ord).toArray\n\n    topScores\n  }\n\n  /** Default prediction when know nothing about the user */\n  def predictDefault(\n    productModels: Map[Int, ProductModel],\n    query: Query,\n    whiteList: Option[Set[Int]],\n    blackList: Set[Int],\n    weights: Map[Int, Double] // ADDED\n  ): Array[(Int, Double)] = {\n    val indexScores: Map[Int, Double] = productModels.par // convert back to sequential collection\n      .filter { case (i, pm) =>\n        isCandidateItem(\n          i = i,\n          item = pm.item,\n          categories = query.categories,\n          whiteList = whiteList,\n          blackList = blackList\n        )\n      }\n      .map { case (i, pm) =>\n        // may customize here to further adjust score\n        // ADDED\n        val s = pm.count.toDouble\n        val adjustedScore = s * weights(i)\n        (i, adjustedScore)\n      }\n      .seq\n\n    val ord = Ordering.by[(Int, Double), Double](_._2).reverse\n    val topScores = getTopN(indexScores, query.num)(ord).toArray\n\n    topScores\n  }\n\n  /** Return top similar items based on items user recently has action on */\n  def predictSimilar(\n    recentFeatures: Vector[Array[Double]],\n    productModels: Map[Int, ProductModel],\n    query: Query,\n    whiteList: Option[Set[Int]],\n    blackList: Set[Int],\n    weights: Map[Int, Double] // ADDED\n  ): Array[(Int, Double)] = {\n    val indexScores: Map[Int, Double] = productModels.par // convert to parallel collection\n      .filter { case (i, pm) =>\n        pm.features.isDefined &&\n        isCandidateItem(\n          i = i,\n          item = pm.item,\n          categories = query.categories,\n          whiteList = whiteList,\n          blackList = blackList\n        )\n      }\n      .map { case (i, pm) =>\n        val s = recentFeatures.map{ rf =>\n          // pm.features must be defined because of filter logic above\n          cosine(rf, pm.features.get)\n        }.reduce(_ + _)\n        // may customize here to further adjust score\n        // ADDED\n        val adjustedScore = s * weights(i)\n        (i, adjustedScore)\n      }\n      .filter(_._2 > 0) // keep items with score > 0\n      .seq // convert back to sequential collection\n\n    val ord = Ordering.by[(Int, Double), Double](_._2).reverse\n    val topScores = getTopN(indexScores, query.num)(ord).toArray\n\n    topScores\n  }\n\n  private\n  def getTopN[T](s: Iterable[T], n: Int)(implicit ord: Ordering[T]): Seq[T] = {\n\n    val q = PriorityQueue()\n\n    for (x <- s) {\n      if (q.size < n)\n        q.enqueue(x)\n      else {\n        // q is full\n        if (ord.compare(x, q.head) < 0) {\n          q.dequeue()\n          q.enqueue(x)\n        }\n      }\n    }\n\n    q.dequeueAll.toSeq.reverse\n  }\n\n  private\n  def dotProduct(v1: Array[Double], v2: Array[Double]): Double = {\n    val size = v1.size\n    var i = 0\n    var d: Double = 0\n    while (i < size) {\n      d += v1(i) * v2(i)\n      i += 1\n    }\n    d\n  }\n\n  private\n  def cosine(v1: Array[Double], v2: Array[Double]): Double = {\n    val size = v1.size\n    var i = 0\n    var n1: Double = 0\n    var n2: Double = 0\n    var d: Double = 0\n    while (i < size) {\n      n1 += v1(i) * v1(i)\n      n2 += v2(i) * v2(i)\n      d += v1(i) * v2(i)\n      i += 1\n    }\n    val n1n2 = (math.sqrt(n1) * math.sqrt(n2))\n    if (n1n2 == 0) 0 else (d / n1n2)\n  }\n\n  private\n  def isCandidateItem(\n    i: Int,\n    item: Item,\n    categories: Option[Set[String]],\n    whiteList: Option[Set[Int]],\n    blackList: Set[Int]\n  ): Boolean = {\n    // can add other custom filtering here\n    whiteList.map(_.contains(i)).getOrElse(true) &&\n    !blackList.contains(i) &&\n    // filter categories\n    categories.map { cat =>\n      item.categories.map { itemCat =>\n        // keep this item if has ovelap categories with the query\n        !(itemCat.toSet.intersect(cat).isEmpty)\n      }.getOrElse(false) // discard this item if it has no categories\n    }.getOrElse(true)\n\n  }\n\n}\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/adjust-score/src/main/scala/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.ecommercerecommendation\n\nimport org.apache.predictionio.controller.EngineFactory\nimport org.apache.predictionio.controller.Engine\n\ncase class Query(\n  user: String,\n  num: Int,\n  categories: Option[Set[String]],\n  whiteList: Option[Set[String]],\n  blackList: Option[Set[String]]\n) extends Serializable\n\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n) extends Serializable\n\ncase class ItemScore(\n  item: String,\n  score: Double\n) extends Serializable\n\nobject ECommerceRecommendationEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"ecomm\" -> classOf[ECommAlgorithm]),\n      classOf[Serving])\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/adjust-score/src/main/scala/Preparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.ecommercerecommendation\n\nimport org.apache.predictionio.controller.PPreparator\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  override\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(\n      users = trainingData.users,\n      items = trainingData.items,\n      viewEvents = trainingData.viewEvents,\n      buyEvents = trainingData.buyEvents)\n  }\n}\n\nclass PreparedData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent],\n  val buyEvents: RDD[BuyEvent]\n) extends Serializable\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/adjust-score/src/main/scala/Serving.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.ecommercerecommendation\n\nimport org.apache.predictionio.controller.LServing\n\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/adjust-score/template.json",
    "content": "{\"pio\": {\"version\": { \"min\": \"0.10.0-incubating\" }}}\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/train-with-rate-event/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nname := \"template-scala-parallel-ecommercerecommendation\"\n\norganization := \"org.apache.predictionio\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\" % \"provided\",\n  \"org.apache.spark\"        %% \"spark-mllib\"              % \"2.4.0\" % \"provided\")\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/train-with-rate-event/data/import_eventserver.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nImport sample data for E-Commerce Recommendation Engine Template\n\"\"\"\n\nimport predictionio\nimport argparse\nimport random\n\nSEED = 3\n\ndef import_events(client):\n  random.seed(SEED)\n  count = 0\n  print(client.get_status())\n  print(\"Importing data...\")\n\n  # generate 10 users, with user ids u1,u2,....,u10\n  user_ids = [\"u%s\" % i for i in range(1, 11)]\n  for user_id in user_ids:\n    print(\"Set user\", user_id)\n    client.create_event(\n      event=\"$set\",\n      entity_type=\"user\",\n      entity_id=user_id\n    )\n    count += 1\n\n  # generate 50 items, with item ids i1,i2,....,i50\n  # random assign 1 to 4 categories among c1-c6 to items\n  categories = [\"c%s\" % i for i in range(1, 7)]\n  item_ids = [\"i%s\" % i for i in range(1, 51)]\n  for item_id in item_ids:\n    print(\"Set item\", item_id)\n    client.create_event(\n      event=\"$set\",\n      entity_type=\"item\",\n      entity_id=item_id,\n      properties={\n        \"categories\" : random.sample(categories, random.randint(1, 4))\n      }\n    )\n    count += 1\n\n  # each user randomly viewed 10 items\n  for user_id in user_ids:\n    for viewed_item in random.sample(item_ids, 10):\n      print(\"User\", user_id ,\"views item\", viewed_item)\n      client.create_event(\n        event=\"view\",\n        entity_type=\"user\",\n        entity_id=user_id,\n        target_entity_type=\"item\",\n        target_entity_id=viewed_item\n      )\n      count += 1\n      # randomly rate some of the viewed items\n      if random.choice([True, False]):\n        rating = random.choice(range(1,6))\n        print(\"User\", user_id ,\"rates item\", viewed_item, \"rating\", rating)\n        client.create_event(\n          event=\"rate\",\n          entity_type=\"user\",\n          entity_id=user_id,\n          target_entity_type=\"item\",\n          target_entity_id=viewed_item,\n          properties={\n            \"rating\": rating\n          }\n        )\n        count += 1\n      # randomly buy some of the viewed items\n      if random.choice([True, False]):\n        print(\"User\", user_id ,\"buys item\", viewed_item)\n        client.create_event(\n          event=\"buy\",\n          entity_type=\"user\",\n          entity_id=user_id,\n          target_entity_type=\"item\",\n          target_entity_id=viewed_item\n        )\n        count += 1\n\n  print(\"%s events are imported.\" % count)\n\nif __name__ == '__main__':\n  parser = argparse.ArgumentParser(\n    description=\"Import sample data for e-commerce recommendation engine\")\n  parser.add_argument('--access_key', default='invald_access_key')\n  parser.add_argument('--url', default=\"http://localhost:7070\")\n\n  args = parser.parse_args()\n  print(args)\n\n  client = predictionio.EventClient(\n    access_key=args.access_key,\n    url=args.url,\n    threads=5,\n    qsize=500)\n  import_events(client)\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/train-with-rate-event/data/send_query.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nSend sample query to prediction engine\n\"\"\"\n\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint(engine_client.send_query({\"user\": \"u1\", \"num\": 4}))\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/train-with-rate-event/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.ecommercerecommendation.ECommerceRecommendationEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"ecomm\",\n      \"params\": {\n        \"appName\": \"MyApp1\",\n        \"unseenOnly\": true,\n        \"seenEvents\": [\"buy\", \"view\"],\n        \"similarEvents\": [\"view\"],\n        \"rank\": 10,\n        \"numIterations\" : 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/train-with-rate-event/project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/train-with-rate-event/project/build.properties",
    "content": "sbt.version=1.2.8\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/train-with-rate-event/src/main/scala/DataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.ecommercerecommendation\n\nimport org.apache.predictionio.controller.PDataSource\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EmptyActualResult\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.store.PEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nimport grizzled.slf4j.Logger\n\ncase class DataSourceParams(appName: String) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    // create a RDD of (entityID, User)\n    val usersRDD: RDD[(String, User)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"user\"\n    )(sc).map { case (entityId, properties) =>\n      val user = try {\n        User()\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties ${properties} of\" +\n            s\" user ${entityId}. Exception: ${e}.\")\n          throw e\n        }\n      }\n      (entityId, user)\n    }.cache()\n\n    // create a RDD of (entityID, Item)\n    val itemsRDD: RDD[(String, Item)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"item\"\n    )(sc).map { case (entityId, properties) =>\n      val item = try {\n        // Assume categories is optional property of item.\n        Item(categories = properties.getOpt[List[String]](\"categories\"))\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties ${properties} of\" +\n            s\" item ${entityId}. Exception: ${e}.\")\n          throw e\n        }\n      }\n      (entityId, item)\n    }.cache()\n\n    val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"rate\", \"buy\")), // MODIFIED\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n      .cache()\n\n    val rateEventsRDD: RDD[RateEvent] = eventsRDD // MODIFIED\n      .filter { event => event.event == \"rate\" } // MODIFIED\n      .map { event =>\n        try {\n          RateEvent( // MODIFIED\n            user = event.entityId,\n            item = event.targetEntityId.get,\n            rating = event.properties.get[Double](\"rating\"), // ADDED\n            t = event.eventTime.getMillis\n          )\n        } catch {\n          case e: Exception =>\n            logger.error(s\"Cannot convert ${event} to RateEvent.\" + // MODIFIED\n              s\" Exception: ${e}.\")\n            throw e\n        }\n      }\n\n    val buyEventsRDD: RDD[BuyEvent] = eventsRDD\n      .filter { event => event.event == \"buy\" }\n      .map { event =>\n        try {\n          BuyEvent(\n            user = event.entityId,\n            item = event.targetEntityId.get,\n            t = event.eventTime.getMillis\n          )\n        } catch {\n          case e: Exception =>\n            logger.error(s\"Cannot convert ${event} to BuyEvent.\" +\n              s\" Exception: ${e}.\")\n            throw e\n        }\n      }\n\n    new TrainingData(\n      users = usersRDD,\n      items = itemsRDD,\n      rateEvents = rateEventsRDD, // MODIFIED\n      buyEvents = buyEventsRDD\n    )\n  }\n}\n\ncase class User()\n\ncase class Item(categories: Option[List[String]])\n\n// MODIFIED\ncase class RateEvent(user: String, item: String, rating: Double, t: Long)\n\ncase class BuyEvent(user: String, item: String, t: Long)\n\nclass TrainingData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val rateEvents: RDD[RateEvent], // MODIFIED\n  val buyEvents: RDD[BuyEvent]\n) extends Serializable {\n  override def toString = {\n    s\"users: [${users.count()} (${users.take(2).toList}...)]\" +\n    s\"items: [${items.count()} (${items.take(2).toList}...)]\" +\n    // MODIFIED\n    s\"rateEvents: [${rateEvents.count()}] (${rateEvents.take(2).toList}...)\" +\n    s\"buyEvents: [${buyEvents.count()}] (${buyEvents.take(2).toList}...)\"\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/train-with-rate-event/src/main/scala/ECommAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.ecommercerecommendation\n\nimport org.apache.predictionio.controller.P2LAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.store.LEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.mllib.recommendation.ALS\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\nimport org.apache.spark.rdd.RDD\n\nimport grizzled.slf4j.Logger\n\nimport scala.collection.mutable.PriorityQueue\nimport scala.concurrent.duration.Duration\nimport scala.concurrent.ExecutionContext.Implicits.global\n\ncase class ECommAlgorithmParams(\n  appName: String,\n  unseenOnly: Boolean,\n  seenEvents: List[String],\n  similarEvents: List[String],\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]\n) extends Params\n\n\ncase class ProductModel(\n  item: Item,\n  features: Option[Array[Double]], // features by ALS\n  count: Int // popular count for default score\n)\n\nclass ECommModel(\n  val rank: Int,\n  val userFeatures: Map[Int, Array[Double]],\n  val productModels: Map[Int, ProductModel],\n  val userStringIntMap: BiMap[String, Int],\n  val itemStringIntMap: BiMap[String, Int]\n) extends Serializable {\n\n  @transient lazy val itemIntStringMap = itemStringIntMap.inverse\n\n  override def toString = {\n    s\" rank: ${rank}\" +\n    s\" userFeatures: [${userFeatures.size}]\" +\n    s\"(${userFeatures.take(2).toList}...)\" +\n    s\" productModels: [${productModels.size}]\" +\n    s\"(${productModels.take(2).toList}...)\" +\n    s\" userStringIntMap: [${userStringIntMap.size}]\" +\n    s\"(${userStringIntMap.take(2).toString}...)]\" +\n    s\" itemStringIntMap: [${itemStringIntMap.size}]\" +\n    s\"(${itemStringIntMap.take(2).toString}...)]\"\n  }\n}\n\nclass ECommAlgorithm(val ap: ECommAlgorithmParams)\n  extends P2LAlgorithm[PreparedData, ECommModel, Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  def train(sc: SparkContext, data: PreparedData): ECommModel = {\n    require(!data.rateEvents.take(1).isEmpty, // MODIFIED\n      s\"rateEvents in PreparedData cannot be empty.\" + // MODIFIED\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(!data.users.take(1).isEmpty,\n      s\"users in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(!data.items.take(1).isEmpty,\n      s\"items in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    // create User and item's String ID to integer index BiMap\n    val userStringIntMap = BiMap.stringInt(data.users.keys)\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    val mllibRatings: RDD[MLlibRating] = genMLlibRating(\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap,\n      data = data\n    )\n\n    // MLLib ALS cannot handle empty training data.\n    require(!mllibRatings.take(1).isEmpty,\n      s\"mllibRatings cannot be empty.\" +\n      \" Please check if your events contain valid user and item ID.\")\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    // use ALS to train feature vectors\n    val m = ALS.train( // MODIFIED\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      seed = seed)\n\n    val userFeatures = m.userFeatures.collectAsMap.toMap\n\n    // convert ID to Int index\n    val items = data.items.map { case (id, item) =>\n      (itemStringIntMap(id), item)\n    }\n\n    // join item with the trained productFeatures\n    val productFeatures: Map[Int, (Item, Option[Array[Double]])] =\n      items.leftOuterJoin(m.productFeatures).collectAsMap.toMap\n\n    val popularCount = trainDefault(\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap,\n      data = data\n    )\n\n    val productModels: Map[Int, ProductModel] = productFeatures\n      .map { case (index, (item, features)) =>\n        val pm = ProductModel(\n          item = item,\n          features = features,\n          // NOTE: use getOrElse because popularCount may not contain all items.\n          count = popularCount.getOrElse(index, 0)\n        )\n        (index, pm)\n      }\n\n    new ECommModel(\n      rank = m.rank,\n      userFeatures = userFeatures,\n      productModels = productModels,\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap\n    )\n  }\n\n  /** Generate MLlibRating from PreparedData.\n    * You may customize this function if use different events or different aggregation method\n    */\n  def genMLlibRating(\n    userStringIntMap: BiMap[String, Int],\n    itemStringIntMap: BiMap[String, Int],\n    data: PreparedData): RDD[MLlibRating] = {\n\n    val mllibRatings = data.rateEvents // MODIFIED\n      .map { r =>\n        // Convert user and item String IDs to Int index for MLlib\n        val uindex = userStringIntMap.getOrElse(r.user, -1)\n        val iindex = itemStringIntMap.getOrElse(r.item, -1)\n\n        if (uindex == -1)\n          logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n            + \" to Int index.\")\n\n        if (iindex == -1)\n          logger.info(s\"Couldn't convert nonexistent item ID ${r.item}\"\n            + \" to Int index.\")\n\n        ((uindex, iindex), (r.rating, r.t)) // MODIFIED\n      }\n      .filter { case ((u, i), v) =>\n        // keep events with valid user and item index\n        (u != -1) && (i != -1)\n      }\n      .reduceByKey { case (v1, v2) => // MODIFIED\n        // if a user may rate same item with different value at different times,\n        // use the latest value for this case.\n        // Can remove this reduceByKey() if no need to support this case.\n        val (rating1, t1) = v1\n        val (rating2, t2) = v2\n        // keep the latest value\n        if (t1 > t2) v1 else v2\n      }\n      .map { case ((u, i), (rating, t)) => // MODIFIED\n        // MLlibRating requires integer index for user and item\n        MLlibRating(u, i, rating) // MODIFIED\n      }\n      .cache()\n\n    mllibRatings\n  }\n\n  /** Train default model.\n    * You may customize this function if use different events or\n    * need different ways to count \"popular\" score or return default score for item.\n    */\n  def trainDefault(\n    userStringIntMap: BiMap[String, Int],\n    itemStringIntMap: BiMap[String, Int],\n    data: PreparedData): Map[Int, Int] = {\n    // count number of buys\n    // (item index, count)\n    val buyCountsRDD: RDD[(Int, Int)] = data.buyEvents\n      .map { r =>\n        // Convert user and item String IDs to Int index\n        val uindex = userStringIntMap.getOrElse(r.user, -1)\n        val iindex = itemStringIntMap.getOrElse(r.item, -1)\n\n        if (uindex == -1)\n          logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n            + \" to Int index.\")\n\n        if (iindex == -1)\n          logger.info(s\"Couldn't convert nonexistent item ID ${r.item}\"\n            + \" to Int index.\")\n\n        (uindex, iindex, 1)\n      }\n      .filter { case (u, i, v) =>\n        // keep events with valid user and item index\n        (u != -1) && (i != -1)\n      }\n      .map { case (u, i, v) => (i, 1) } // key is item\n      .reduceByKey{ case (a, b) => a + b } // count number of items occurrence\n\n    buyCountsRDD.collectAsMap.toMap\n  }\n\n  def predict(model: ECommModel, query: Query): PredictedResult = {\n\n    val userFeatures = model.userFeatures\n    val productModels = model.productModels\n\n    // convert whiteList's string ID to integer index\n    val whiteList: Option[Set[Int]] = query.whiteList.map( set =>\n      set.flatMap(model.itemStringIntMap.get(_))\n    )\n\n    val finalBlackList: Set[Int] = genBlackList(query = query)\n      // convert seen Items list from String ID to integer Index\n      .flatMap(x => model.itemStringIntMap.get(x))\n\n    val userFeature: Option[Array[Double]] =\n      model.userStringIntMap.get(query.user).flatMap { userIndex =>\n        userFeatures.get(userIndex)\n      }\n\n    val topScores: Array[(Int, Double)] = if (userFeature.isDefined) {\n      // the user has feature vector\n      predictKnownUser(\n        userFeature = userFeature.get,\n        productModels = productModels,\n        query = query,\n        whiteList = whiteList,\n        blackList = finalBlackList\n      )\n    } else {\n      // the user doesn't have feature vector.\n      // For example, new user is created after model is trained.\n      logger.info(s\"No userFeature found for user ${query.user}.\")\n\n      // check if the user has recent events on some items\n      val recentItems: Set[String] = getRecentItems(query)\n      val recentList: Set[Int] = recentItems.flatMap (x =>\n        model.itemStringIntMap.get(x))\n\n      val recentFeatures: Vector[Array[Double]] = recentList.toVector\n        // productModels may not contain the requested item\n        .map { i =>\n          productModels.get(i).flatMap { pm => pm.features }\n        }.flatten\n\n      if (recentFeatures.isEmpty) {\n        logger.info(s\"No features vector for recent items ${recentItems}.\")\n        predictDefault(\n          productModels = productModels,\n          query = query,\n          whiteList = whiteList,\n          blackList = finalBlackList\n        )\n      } else {\n        predictSimilar(\n          recentFeatures = recentFeatures,\n          productModels = productModels,\n          query = query,\n          whiteList = whiteList,\n          blackList = finalBlackList\n        )\n      }\n    }\n\n    val itemScores = topScores.map { case (i, s) =>\n      new ItemScore(\n        // convert item int index back to string ID\n        item = model.itemIntStringMap(i),\n        score = s\n      )\n    }\n\n    new PredictedResult(itemScores)\n  }\n\n  /** Generate final blackList based on other constraints */\n  def genBlackList(query: Query): Set[String] = {\n    // if unseenOnly is True, get all seen items\n    val seenItems: Set[String] = if (ap.unseenOnly) {\n\n      // get all user item events which are considered as \"seen\" events\n      val seenEvents: Iterator[Event] = try {\n        LEventStore.findByEntity(\n          appName = ap.appName,\n          entityType = \"user\",\n          entityId = query.user,\n          eventNames = Some(ap.seenEvents),\n          targetEntityType = Some(Some(\"item\")),\n          // set time limit to avoid super long DB access\n          timeout = Duration(200, \"millis\")\n        )\n      } catch {\n        case e: scala.concurrent.TimeoutException =>\n          logger.error(s\"Timeout when read seen events.\" +\n            s\" Empty list is used. ${e}\")\n          Iterator[Event]()\n        case e: Exception =>\n          logger.error(s\"Error when read seen events: ${e}\")\n          throw e\n      }\n\n      seenEvents.map { event =>\n        try {\n          event.targetEntityId.get\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Can't get targetEntityId of event ${event}.\")\n            throw e\n          }\n        }\n      }.toSet\n    } else {\n      Set[String]()\n    }\n\n    // get the latest constraint unavailableItems $set event\n    val unavailableItems: Set[String] = try {\n      val constr = LEventStore.findByEntity(\n        appName = ap.appName,\n        entityType = \"constraint\",\n        entityId = \"unavailableItems\",\n        eventNames = Some(Seq(\"$set\")),\n        limit = Some(1),\n        latest = true,\n        timeout = Duration(200, \"millis\")\n      )\n      if (constr.hasNext) {\n        constr.next.properties.get[Set[String]](\"items\")\n      } else {\n        Set[String]()\n      }\n    } catch {\n      case e: scala.concurrent.TimeoutException =>\n        logger.error(s\"Timeout when read set unavailableItems event.\" +\n          s\" Empty list is used. ${e}\")\n        Set[String]()\n      case e: Exception =>\n        logger.error(s\"Error when read set unavailableItems event: ${e}\")\n        throw e\n    }\n\n    // combine query's blackList,seenItems and unavailableItems\n    // into final blackList.\n    query.blackList.getOrElse(Set[String]()) ++ seenItems ++ unavailableItems\n  }\n\n  /** Get recent events of the user on items for recommending similar items */\n  def getRecentItems(query: Query): Set[String] = {\n    // get latest 10 user view item events\n    val recentEvents = try {\n      LEventStore.findByEntity(\n        appName = ap.appName,\n        // entityType and entityId is specified for fast lookup\n        entityType = \"user\",\n        entityId = query.user,\n        eventNames = Some(ap.similarEvents),\n        targetEntityType = Some(Some(\"item\")),\n        limit = Some(10),\n        latest = true,\n        // set time limit to avoid super long DB access\n        timeout = Duration(200, \"millis\")\n      )\n    } catch {\n      case e: scala.concurrent.TimeoutException =>\n        logger.error(s\"Timeout when read recent events.\" +\n          s\" Empty list is used. ${e}\")\n        Iterator[Event]()\n      case e: Exception =>\n        logger.error(s\"Error when read recent events: ${e}\")\n        throw e\n    }\n\n    val recentItems: Set[String] = recentEvents.map { event =>\n      try {\n        event.targetEntityId.get\n      } catch {\n        case e: Exception => {\n          logger.error(\"Can't get targetEntityId of event ${event}.\")\n          throw e\n        }\n      }\n    }.toSet\n\n    recentItems\n  }\n\n  /** Prediction for user with known feature vector */\n  def predictKnownUser(\n    userFeature: Array[Double],\n    productModels: Map[Int, ProductModel],\n    query: Query,\n    whiteList: Option[Set[Int]],\n    blackList: Set[Int]\n  ): Array[(Int, Double)] = {\n    val indexScores: Map[Int, Double] = productModels.par // convert to parallel collection\n      .filter { case (i, pm) =>\n        pm.features.isDefined &&\n        isCandidateItem(\n          i = i,\n          item = pm.item,\n          categories = query.categories,\n          whiteList = whiteList,\n          blackList = blackList\n        )\n      }\n      .map { case (i, pm) =>\n        // NOTE: features must be defined, so can call .get\n        val s = dotProduct(userFeature, pm.features.get)\n        // may customize here to further adjust score\n        (i, s)\n      }\n      .filter(_._2 > 0) // only keep items with score > 0\n      .seq // convert back to sequential collection\n\n    val ord = Ordering.by[(Int, Double), Double](_._2).reverse\n    val topScores = getTopN(indexScores, query.num)(ord).toArray\n\n    topScores\n  }\n\n  /** Default prediction when know nothing about the user */\n  def predictDefault(\n    productModels: Map[Int, ProductModel],\n    query: Query,\n    whiteList: Option[Set[Int]],\n    blackList: Set[Int]\n  ): Array[(Int, Double)] = {\n    val indexScores: Map[Int, Double] = productModels.par // convert back to sequential collection\n      .filter { case (i, pm) =>\n        isCandidateItem(\n          i = i,\n          item = pm.item,\n          categories = query.categories,\n          whiteList = whiteList,\n          blackList = blackList\n        )\n      }\n      .map { case (i, pm) =>\n        // may customize here to further adjust score\n        (i, pm.count.toDouble)\n      }\n      .seq\n\n    val ord = Ordering.by[(Int, Double), Double](_._2).reverse\n    val topScores = getTopN(indexScores, query.num)(ord).toArray\n\n    topScores\n  }\n\n  /** Return top similar items based on items user recently has action on */\n  def predictSimilar(\n    recentFeatures: Vector[Array[Double]],\n    productModels: Map[Int, ProductModel],\n    query: Query,\n    whiteList: Option[Set[Int]],\n    blackList: Set[Int]\n  ): Array[(Int, Double)] = {\n    val indexScores: Map[Int, Double] = productModels.par // convert to parallel collection\n      .filter { case (i, pm) =>\n        pm.features.isDefined &&\n        isCandidateItem(\n          i = i,\n          item = pm.item,\n          categories = query.categories,\n          whiteList = whiteList,\n          blackList = blackList\n        )\n      }\n      .map { case (i, pm) =>\n        val s = recentFeatures.map{ rf =>\n          // pm.features must be defined because of filter logic above\n          cosine(rf, pm.features.get)\n        }.reduce(_ + _)\n        // may customize here to further adjust score\n        (i, s)\n      }\n      .filter(_._2 > 0) // keep items with score > 0\n      .seq // convert back to sequential collection\n\n    val ord = Ordering.by[(Int, Double), Double](_._2).reverse\n    val topScores = getTopN(indexScores, query.num)(ord).toArray\n\n    topScores\n  }\n\n  private\n  def getTopN[T](s: Iterable[T], n: Int)(implicit ord: Ordering[T]): Seq[T] = {\n\n    val q = PriorityQueue()\n\n    for (x <- s) {\n      if (q.size < n)\n        q.enqueue(x)\n      else {\n        // q is full\n        if (ord.compare(x, q.head) < 0) {\n          q.dequeue()\n          q.enqueue(x)\n        }\n      }\n    }\n\n    q.dequeueAll.toSeq.reverse\n  }\n\n  private\n  def dotProduct(v1: Array[Double], v2: Array[Double]): Double = {\n    val size = v1.size\n    var i = 0\n    var d: Double = 0\n    while (i < size) {\n      d += v1(i) * v2(i)\n      i += 1\n    }\n    d\n  }\n\n  private\n  def cosine(v1: Array[Double], v2: Array[Double]): Double = {\n    val size = v1.size\n    var i = 0\n    var n1: Double = 0\n    var n2: Double = 0\n    var d: Double = 0\n    while (i < size) {\n      n1 += v1(i) * v1(i)\n      n2 += v2(i) * v2(i)\n      d += v1(i) * v2(i)\n      i += 1\n    }\n    val n1n2 = (math.sqrt(n1) * math.sqrt(n2))\n    if (n1n2 == 0) 0 else (d / n1n2)\n  }\n\n  private\n  def isCandidateItem(\n    i: Int,\n    item: Item,\n    categories: Option[Set[String]],\n    whiteList: Option[Set[Int]],\n    blackList: Set[Int]\n  ): Boolean = {\n    // can add other custom filtering here\n    whiteList.map(_.contains(i)).getOrElse(true) &&\n    !blackList.contains(i) &&\n    // filter categories\n    categories.map { cat =>\n      item.categories.map { itemCat =>\n        // keep this item if has ovelap categories with the query\n        !(itemCat.toSet.intersect(cat).isEmpty)\n      }.getOrElse(false) // discard this item if it has no categories\n    }.getOrElse(true)\n\n  }\n\n}\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/train-with-rate-event/src/main/scala/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.ecommercerecommendation\n\nimport org.apache.predictionio.controller.EngineFactory\nimport org.apache.predictionio.controller.Engine\n\ncase class Query(\n  user: String,\n  num: Int,\n  categories: Option[Set[String]],\n  whiteList: Option[Set[String]],\n  blackList: Option[Set[String]]\n) extends Serializable\n\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n) extends Serializable\n\ncase class ItemScore(\n  item: String,\n  score: Double\n) extends Serializable\n\nobject ECommerceRecommendationEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"ecomm\" -> classOf[ECommAlgorithm]),\n      classOf[Serving])\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/train-with-rate-event/src/main/scala/Preparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.ecommercerecommendation\n\nimport org.apache.predictionio.controller.PPreparator\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(\n      users = trainingData.users,\n      items = trainingData.items,\n      rateEvents = trainingData.rateEvents, // MODIFIED\n      buyEvents = trainingData.buyEvents)\n  }\n}\n\nclass PreparedData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val rateEvents: RDD[RateEvent], // MODIFIED\n  val buyEvents: RDD[BuyEvent]\n) extends Serializable\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/train-with-rate-event/src/main/scala/Serving.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.ecommercerecommendation\n\nimport org.apache.predictionio.controller.LServing\n\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-ecommercerecommendation/train-with-rate-event/template.json",
    "content": "{\"pio\": {\"version\": { \"min\": \"0.10.0-incubating\" }}}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis is based on Recommendation Template v0.14.0.\n\nPlease refer to https://predictionio.apache.org/templates/recommendation/how-to/\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/blacklist-items/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nname := \"template-scala-parallel-recommendation\"\n\norganization := \"org.apache.predictionio\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\" % \"provided\",\n  \"org.apache.spark\"        %% \"spark-mllib\"              % \"2.4.0\" % \"provided\")"
  },
  {
    "path": "examples/scala-parallel-recommendation/blacklist-items/data/import_eventserver.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nImport sample data for recommendation engine\n\"\"\"\n\nimport predictionio\nimport argparse\nimport random\n\nRATE_ACTIONS_DELIMITER = \"::\"\nSEED = 3\n\ndef import_events(client, file):\n  f = open(file, 'r')\n  random.seed(SEED)\n  count = 0\n  print(\"Importing data...\")\n  for line in f:\n    data = line.rstrip('\\r\\n').split(RATE_ACTIONS_DELIMITER)\n    # For demonstration purpose, randomly mix in some buy events\n    if (random.randint(0, 1) == 1):\n      client.create_event(\n        event=\"rate\",\n        entity_type=\"user\",\n        entity_id=data[0],\n        target_entity_type=\"item\",\n        target_entity_id=data[1],\n        properties= { \"rating\" : float(data[2]) }\n      )\n    else:\n      client.create_event(\n        event=\"buy\",\n        entity_type=\"user\",\n        entity_id=data[0],\n        target_entity_type=\"item\",\n        target_entity_id=data[1]\n      )\n    count += 1\n  f.close()\n  print(\"%s events are imported.\" % count)\n\nif __name__ == '__main__':\n  parser = argparse.ArgumentParser(\n    description=\"Import sample data for recommendation engine\")\n  parser.add_argument('--access_key', default='invald_access_key')\n  parser.add_argument('--url', default=\"http://localhost:7070\")\n  parser.add_argument('--file', default=\"./data/sample_movielens_data.txt\")\n\n  args = parser.parse_args()\n  print(args)\n\n  client = predictionio.EventClient(\n    access_key=args.access_key,\n    url=args.url,\n    threads=5,\n    qsize=500)\n  import_events(client, args.file)\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/blacklist-items/data/send_query.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nSend sample query to prediction engine\n\"\"\"\n\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint(engine_client.send_query({\"user\": \"1\", \"num\": 4}))\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/blacklist-items/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.recommendation.RecommendationEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\": 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/blacklist-items/project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")"
  },
  {
    "path": "examples/scala-parallel-recommendation/blacklist-items/project/build.properties",
    "content": "sbt.version=1.2.8\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/blacklist-items/src/main/scala/ALSAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.PAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.mllib.recommendation.ALS\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\nimport org.apache.spark.mllib.recommendation.ALSModel\n\nimport grizzled.slf4j.Logger\n\ncase class ALSAlgorithmParams(\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]) extends Params\n\nclass ALSAlgorithm(val ap: ALSAlgorithmParams)\n  extends PAlgorithm[PreparedData, ALSModel, Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  if (ap.numIterations > 30) {\n    logger.warn(\n      s\"ALSAlgorithmParams.numIterations > 30, current: ${ap.numIterations}. \" +\n      s\"There is a chance of running to StackOverflowException.\" +\n      s\"To remedy it, set lower numIterations or checkpoint parameters.\")\n  }\n\n  override\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n    // MLLib ALS cannot handle empty training data.\n    require(!data.ratings.take(1).isEmpty,\n      s\"RDD[Rating] in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preparator generates PreparedData correctly.\")\n    // Convert user and item String IDs to Int index for MLlib\n\n    val userStringIntMap = BiMap.stringInt(data.ratings.map(_.user))\n    val itemStringIntMap = BiMap.stringInt(data.ratings.map(_.item))\n    val mllibRatings = data.ratings.map( r =>\n      // MLlibRating requires integer index for user and item\n      MLlibRating(userStringIntMap(r.user), itemStringIntMap(r.item), r.rating)\n    )\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    // Set checkpoint directory\n    // sc.setCheckpointDir(\"checkpoint\")\n\n    // If you only have one type of implicit event (Eg. \"view\" event only),\n    // set implicitPrefs to true\n    val implicitPrefs = false\n    val als = new ALS()\n    als.setUserBlocks(-1)\n    als.setProductBlocks(-1)\n    als.setRank(ap.rank)\n    als.setIterations(ap.numIterations)\n    als.setLambda(ap.lambda)\n    als.setImplicitPrefs(implicitPrefs)\n    als.setAlpha(1.0)\n    als.setSeed(seed)\n    als.setCheckpointInterval(10)\n    val m = als.run(mllibRatings)\n\n    new ALSModel(\n      rank = m.rank,\n      userFeatures = m.userFeatures,\n      productFeatures = m.productFeatures,\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap)\n  }\n\n  override\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n    // Convert String ID to Int index for Mllib\n    model.userStringIntMap.get(query.user).map { userInt =>\n      // create inverse view of itemStringIntMap\n      val itemIntStringMap = model.itemStringIntMap.inverse\n      // recommendProductsWithFilter() returns Array[MLlibRating], which uses item Int\n      // index. Convert it to String ID for returning PredictedResult\n      val blackList = query.blackList.flatMap(model.itemStringIntMap.get) // ADDED\n      val itemScores = model\n        .recommendProductsWithFilter(userInt, query.num, blackList) // MODIFIED\n        .map (r => ItemScore(itemIntStringMap(r.product), r.rating))\n      PredictedResult(itemScores)\n    }.getOrElse{\n      logger.info(s\"No prediction for unknown user ${query.user}.\")\n      PredictedResult(Array.empty)\n    }\n  }\n\n  // This function is used by the evaluation module, where a batch of queries is sent to this engine\n  // for evaluation purpose.\n  override def batchPredict(model: ALSModel, queries: RDD[(Long, Query)]): RDD[(Long, PredictedResult)] = {\n    val userIxQueries: RDD[(Int, (Long, Query))] = queries\n    .map { case (ix, query) => {\n      // If user not found, then the index is -1\n      val userIx = model.userStringIntMap.get(query.user).getOrElse(-1)\n      (userIx, (ix, query))\n    }}\n\n    // Cross product of all valid users from the queries and products in the model.\n    val usersProducts: RDD[(Int, Int)] = userIxQueries\n      .keys\n      .filter(_ != -1)\n      .cartesian(model.productFeatures.map(_._1))\n\n    // Call mllib ALS's predict function.\n    val ratings: RDD[MLlibRating] = model.predict(usersProducts)\n\n    // The following code construct predicted results from mllib's ratings.\n    // Not optimal implementation. Instead of groupBy, should use combineByKey with a PriorityQueue\n    val userRatings: RDD[(Int, Iterable[MLlibRating])] = ratings.groupBy(_.user)\n\n    userIxQueries.leftOuterJoin(userRatings)\n    .map {\n      // When there are ratings\n      case (userIx, ((ix, query), Some(ratings))) => {\n        val topItemScores: Array[ItemScore] = ratings\n        .toArray\n        .sortBy(_.rating)(Ordering.Double.reverse) // note: from large to small ordering\n        .take(query.num)\n        .map { rating => ItemScore(\n          model.itemStringIntMap.inverse(rating.product),\n          rating.rating) }\n\n        (ix, PredictedResult(itemScores = topItemScores))\n      }\n      // When user doesn't exist in training data\n      case (userIx, ((ix, query), None)) => {\n        require(userIx == -1)\n        (ix, PredictedResult(itemScores = Array.empty))\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/blacklist-items/src/main/scala/ALSModel.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.spark.mllib.recommendation\n// This must be the same package as Spark's MatrixFactorizationModel because\n// MatrixFactorizationModel's constructor is private and we are using\n// its constructor in order to save and load the model\n\nimport com.github.fommil.netlib.BLAS.{getInstance => blas} // ADDED\nimport org.apache.predictionio.examples.recommendation.ALSAlgorithmParams\n\nimport org.apache.predictionio.controller.PersistentModel\nimport org.apache.predictionio.controller.PersistentModelLoader\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass ALSModel(\n    override val rank: Int,\n    override val userFeatures: RDD[(Int, Array[Double])],\n    override val productFeatures: RDD[(Int, Array[Double])],\n    val userStringIntMap: BiMap[String, Int],\n    val itemStringIntMap: BiMap[String, Int])\n  extends MatrixFactorizationModel(rank, userFeatures, productFeatures)\n  with PersistentModel[ALSAlgorithmParams] {\n\n  // ADDED\n  def recommendProductsWithFilter(user: Int, num: Int, productIdFilter: Set[Int]) = {\n    val filteredProductFeatures = productFeatures\n      .filter { case (id, _) => !productIdFilter.contains(id) } // (*)\n    recommend(userFeatures.lookup(user).head, filteredProductFeatures, num)\n      .map(t => Rating(user, t._1, t._2))\n  }\n\n  // ADDED\n  private def recommend(\n      recommendToFeatures: Array[Double],\n      recommendableFeatures: RDD[(Int, Array[Double])],\n      num: Int): Array[(Int, Double)] = {\n    val scored = recommendableFeatures.map { case (id, features) =>\n      (id, blas.ddot(features.length, recommendToFeatures, 1, features, 1))\n    }\n    scored.top(num)(Ordering.by(_._2))\n  }\n\n  override\n  def save(id: String, params: ALSAlgorithmParams,\n    sc: SparkContext): Boolean = {\n\n    sc.parallelize(Seq(rank)).saveAsObjectFile(s\"/tmp/${id}/rank\")\n    userFeatures.saveAsObjectFile(s\"/tmp/${id}/userFeatures\")\n    productFeatures.saveAsObjectFile(s\"/tmp/${id}/productFeatures\")\n    sc.parallelize(Seq(userStringIntMap))\n      .saveAsObjectFile(s\"/tmp/${id}/userStringIntMap\")\n    sc.parallelize(Seq(itemStringIntMap))\n      .saveAsObjectFile(s\"/tmp/${id}/itemStringIntMap\")\n    true\n  }\n\n  override def toString = {\n    s\"userFeatures: [${userFeatures.count()}]\" +\n    s\"(${userFeatures.take(2).toList}...)\" +\n    s\" productFeatures: [${productFeatures.count()}]\" +\n    s\"(${productFeatures.take(2).toList}...)\" +\n    s\" userStringIntMap: [${userStringIntMap.size}]\" +\n    s\"(${userStringIntMap.take(2)}...)\" +\n    s\" itemStringIntMap: [${itemStringIntMap.size}]\" +\n    s\"(${itemStringIntMap.take(2)}...)\"\n  }\n}\n\nobject ALSModel\n  extends PersistentModelLoader[ALSAlgorithmParams, ALSModel] {\n  def apply(id: String, params: ALSAlgorithmParams,\n    sc: Option[SparkContext]) = {\n    new ALSModel(\n      rank = sc.get.objectFile[Int](s\"/tmp/${id}/rank\").first,\n      userFeatures = sc.get.objectFile(s\"/tmp/${id}/userFeatures\"),\n      productFeatures = sc.get.objectFile(s\"/tmp/${id}/productFeatures\"),\n      userStringIntMap = sc.get\n        .objectFile[BiMap[String, Int]](s\"/tmp/${id}/userStringIntMap\").first,\n      itemStringIntMap = sc.get\n        .objectFile[BiMap[String, Int]](s\"/tmp/${id}/itemStringIntMap\").first)\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/blacklist-items/src/main/scala/DataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.PDataSource\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EmptyActualResult\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.store.PEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nimport grizzled.slf4j.Logger\n\ncase class DataSourceEvalParams(kFold: Int, queryNum: Int)\n\ncase class DataSourceParams(\n  appName: String,\n  evalParams: Option[DataSourceEvalParams]) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, ActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  def getRatings(sc: SparkContext): RDD[Rating] = {\n\n    val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"rate\", \"buy\")), // read \"rate\" and \"buy\" event\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n\n    val ratingsRDD: RDD[Rating] = eventsRDD.map { event =>\n      val rating = try {\n        val ratingValue: Double = event.event match {\n          case \"rate\" => event.properties.get[Double](\"rating\")\n          case \"buy\" => 4.0 // map buy event to rating value of 4\n          case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n        }\n        // entityId and targetEntityId is String\n        Rating(event.entityId,\n          event.targetEntityId.get,\n          ratingValue)\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Cannot convert ${event} to Rating. Exception: ${e}.\")\n          throw e\n        }\n      }\n      rating\n    }.cache()\n\n    ratingsRDD\n  }\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n    new TrainingData(getRatings(sc))\n  }\n\n  override\n  def readEval(sc: SparkContext)\n  : Seq[(TrainingData, EmptyEvaluationInfo, RDD[(Query, ActualResult)])] = {\n    require(!dsp.evalParams.isEmpty, \"Must specify evalParams\")\n    val evalParams = dsp.evalParams.get\n\n    val kFold = evalParams.kFold\n    val ratings: RDD[(Rating, Long)] = getRatings(sc).zipWithUniqueId\n    ratings.cache\n\n    (0 until kFold).map { idx => {\n      val trainingRatings = ratings.filter(_._2 % kFold != idx).map(_._1)\n      val testingRatings = ratings.filter(_._2 % kFold == idx).map(_._1)\n\n      val testingUsers: RDD[(String, Iterable[Rating])] = testingRatings.groupBy(_.user)\n\n      (new TrainingData(trainingRatings),\n        new EmptyEvaluationInfo(),\n        testingUsers.map {\n          case (user, ratings) => (Query(user, evalParams.queryNum, Set.empty), ActualResult(ratings.toArray))\n        }\n      )\n    }}\n  }\n}\n\ncase class Rating(\n  user: String,\n  item: String,\n  rating: Double\n)\n\nclass TrainingData(\n  val ratings: RDD[Rating]\n) extends Serializable {\n  override def toString = {\n    s\"ratings: [${ratings.count()}] (${ratings.take(2).toList}...)\"\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/blacklist-items/src/main/scala/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.EngineFactory\nimport org.apache.predictionio.controller.Engine\n\ncase class Query(\n  user: String,\n  num: Int,\n  blackList: Set[String] // ADDED\n)\n\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n)\n\ncase class ActualResult(\n  ratings: Array[Rating]\n)\n\ncase class ItemScore(\n  item: String,\n  score: Double\n)\n\nobject RecommendationEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"als\" -> classOf[ALSAlgorithm]),\n      classOf[Serving])\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/blacklist-items/src/main/scala/Evaluation.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.Evaluation\nimport org.apache.predictionio.controller.OptionAverageMetric\nimport org.apache.predictionio.controller.AverageMetric\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EngineParamsGenerator\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.MetricEvaluator\n\n// Usage:\n// $ pio eval org.example.recommendation.RecommendationEvaluation \\\n//   org.example.recommendation.EngineParamsList\n\ncase class PrecisionAtK(k: Int, ratingThreshold: Double = 2.0)\n    extends OptionAverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  require(k > 0, \"k must be greater than 0\")\n\n  override def header = s\"Precision@K (k=$k, threshold=$ratingThreshold)\"\n\n  override\n  def calculate(q: Query, p: PredictedResult, a: ActualResult): Option[Double] = {\n    val positives: Set[String] = a.ratings.filter(_.rating >= ratingThreshold).map(_.item).toSet\n\n    // If there is no positive results, Precision is undefined. We don't consider this case in the\n    // metrics, hence we return None.\n    if (positives.size == 0) {\n      None\n    } else {\n      val tpCount: Int = p.itemScores.take(k).filter(is => positives(is.item)).size\n      Some(tpCount.toDouble / math.min(k, positives.size))\n    }\n  }\n}\n\ncase class PositiveCount(ratingThreshold: Double = 2.0)\n    extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  override def header = s\"PositiveCount (threshold=$ratingThreshold)\"\n\n  override\n  def calculate(q: Query, p: PredictedResult, a: ActualResult): Double = {\n    a.ratings.filter(_.rating >= ratingThreshold).size\n  }\n}\n\nobject RecommendationEvaluation extends Evaluation {\n  engineEvaluator = (\n    RecommendationEngine(),\n    MetricEvaluator(\n      metric = PrecisionAtK(k = 10, ratingThreshold = 4.0),\n      otherMetrics = Seq(\n        PositiveCount(ratingThreshold = 4.0),\n        PrecisionAtK(k = 10, ratingThreshold = 2.0),\n        PositiveCount(ratingThreshold = 2.0),\n        PrecisionAtK(k = 10, ratingThreshold = 1.0),\n        PositiveCount(ratingThreshold = 1.0)\n      )))\n}\n\n\nobject ComprehensiveRecommendationEvaluation extends Evaluation {\n  val ratingThresholds = Seq(0.0, 2.0, 4.0)\n  val ks = Seq(1, 3, 10)\n\n  engineEvaluator = (\n    RecommendationEngine(),\n    MetricEvaluator(\n      metric = PrecisionAtK(k = 3, ratingThreshold = 2.0),\n      otherMetrics = (\n        (for (r <- ratingThresholds) yield PositiveCount(ratingThreshold = r)) ++\n        (for (r <- ratingThresholds; k <- ks) yield PrecisionAtK(k = k, ratingThreshold = r))\n      )))\n}\n\n\ntrait BaseEngineParamsList extends EngineParamsGenerator {\n  protected val baseEP = EngineParams(\n    dataSourceParams = DataSourceParams(\n      appName = \"MyApp1\",\n      evalParams = Some(DataSourceEvalParams(kFold = 5, queryNum = 10))))\n}\n\nobject EngineParamsList extends BaseEngineParamsList {\n  engineParamsList = for(\n    rank <- Seq(5, 10, 20);\n    numIterations <- Seq(1, 5, 10))\n    yield baseEP.copy(\n      algorithmParamsList = Seq(\n        (\"als\", ALSAlgorithmParams(rank, numIterations, 0.01, Some(3)))))\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/blacklist-items/src/main/scala/Preparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.PPreparator\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  override\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(ratings = trainingData.ratings)\n  }\n}\n\nclass PreparedData(\n  val ratings: RDD[Rating]\n) extends Serializable\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/blacklist-items/src/main/scala/Serving.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.LServing\n\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/blacklist-items/template.json",
    "content": "{\"pio\": {\"version\": { \"min\": \"0.10.0-incubating\" }}}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/.gitignore",
    "content": "data/sample_movielens_data.txt\nmanifest.json\ntarget/\npio.log\n/pio.sbt"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nname := \"template-scala-parallel-recommendation\"\n\norganization := \"org.apache.predictionio\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\" % \"provided\",\n  \"org.apache.spark\"        %% \"spark-mllib\"              % \"2.4.0\" % \"provided\")"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/data/import_eventserver.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nImport sample data for recommendation engine\n\"\"\"\n\nimport predictionio\nimport argparse\nimport random\n\nRATE_ACTIONS_DELIMITER = \"::\"\nSEED = 3\n\ndef import_events(client, file):\n  f = open(file, 'r')\n  random.seed(SEED)\n  count = 0\n  print(\"Importing data...\")\n  for line in f:\n    data = line.rstrip('\\r\\n').split(RATE_ACTIONS_DELIMITER)\n    # For demonstration purpose, randomly mix in some buy events\n    if (random.randint(0, 1) == 1):\n      client.create_event(\n        event=\"rate\",\n        entity_type=\"user\",\n        entity_id=data[0],\n        target_entity_type=\"item\",\n        target_entity_id=data[1],\n        properties= { \"rating\" : float(data[2]) }\n      )\n    else:\n      client.create_event(\n        event=\"buy\",\n        entity_type=\"user\",\n        entity_id=data[0],\n        target_entity_type=\"item\",\n        target_entity_id=data[1]\n      )\n    count += 1\n  f.close()\n  print(\"%s events are imported.\" % count)\n\nif __name__ == '__main__':\n  parser = argparse.ArgumentParser(\n    description=\"Import sample data for recommendation engine\")\n  parser.add_argument('--access_key', default='invald_access_key')\n  parser.add_argument('--url', default=\"http://localhost:7070\")\n  parser.add_argument('--file', default=\"./data/sample_movielens_data.txt\")\n\n  args = parser.parse_args()\n  print(args)\n\n  client = predictionio.EventClient(\n    access_key=args.access_key,\n    url=args.url,\n    threads=5,\n    qsize=500)\n  import_events(client, args.file)\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/data/sample_not_train_data.txt",
    "content": "3\n4\n10\n22\n34\n54\n65\n89\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/data/send_query.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nSend sample query to prediction engine\n\"\"\"\n\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint(engine_client.send_query({\"user\": \"1\", \"num\": 4}))\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.recommendation.RecommendationEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"preparator\": {\n    \"params\": {\n      \"filepath\": \"./data/sample_not_train_data.txt\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\": 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/project/build.properties",
    "content": "sbt.version=1.2.8\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/ALSAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.PAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.mllib.recommendation.ALS\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\nimport org.apache.spark.mllib.recommendation.ALSModel\n\nimport grizzled.slf4j.Logger\n\ncase class ALSAlgorithmParams(\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]) extends Params\n\nclass ALSAlgorithm(val ap: ALSAlgorithmParams)\n  extends PAlgorithm[PreparedData, ALSModel, Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  if (ap.numIterations > 30) {\n    logger.warn(\n      s\"ALSAlgorithmParams.numIterations > 30, current: ${ap.numIterations}. \" +\n      s\"There is a chance of running to StackOverflowException.\" +\n      s\"To remedy it, set lower numIterations or checkpoint parameters.\")\n  }\n\n  override\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n    // MLLib ALS cannot handle empty training data.\n    require(!data.ratings.take(1).isEmpty,\n      s\"RDD[Rating] in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preparator generates PreparedData correctly.\")\n    // Convert user and item String IDs to Int index for MLlib\n\n    val userStringIntMap = BiMap.stringInt(data.ratings.map(_.user))\n    val itemStringIntMap = BiMap.stringInt(data.ratings.map(_.item))\n    val mllibRatings = data.ratings.map( r =>\n      // MLlibRating requires integer index for user and item\n      MLlibRating(userStringIntMap(r.user), itemStringIntMap(r.item), r.rating)\n    )\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    // Set checkpoint directory\n    // sc.setCheckpointDir(\"checkpoint\")\n\n    // If you only have one type of implicit event (Eg. \"view\" event only),\n    // set implicitPrefs to true\n    val implicitPrefs = false\n    val als = new ALS()\n    als.setUserBlocks(-1)\n    als.setProductBlocks(-1)\n    als.setRank(ap.rank)\n    als.setIterations(ap.numIterations)\n    als.setLambda(ap.lambda)\n    als.setImplicitPrefs(implicitPrefs)\n    als.setAlpha(1.0)\n    als.setSeed(seed)\n    als.setCheckpointInterval(10)\n    val m = als.run(mllibRatings)\n\n    new ALSModel(\n      rank = m.rank,\n      userFeatures = m.userFeatures,\n      productFeatures = m.productFeatures,\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap)\n  }\n\n  override\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n    // Convert String ID to Int index for Mllib\n    model.userStringIntMap.get(query.user).map { userInt =>\n      // create inverse view of itemStringIntMap\n      val itemIntStringMap = model.itemStringIntMap.inverse\n      // recommendProducts() returns Array[MLlibRating], which uses item Int\n      // index. Convert it to String ID for returning PredictedResult\n      val itemScores = model.recommendProducts(userInt, query.num)\n        .map (r => ItemScore(itemIntStringMap(r.product), r.rating))\n      PredictedResult(itemScores)\n    }.getOrElse{\n      logger.info(s\"No prediction for unknown user ${query.user}.\")\n      PredictedResult(Array.empty)\n    }\n  }\n\n  // This function is used by the evaluation module, where a batch of queries is sent to this engine\n  // for evaluation purpose.\n  override def batchPredict(model: ALSModel, queries: RDD[(Long, Query)]): RDD[(Long, PredictedResult)] = {\n    val userIxQueries: RDD[(Int, (Long, Query))] = queries\n    .map { case (ix, query) => {\n      // If user not found, then the index is -1\n      val userIx = model.userStringIntMap.get(query.user).getOrElse(-1)\n      (userIx, (ix, query))\n    }}\n\n    // Cross product of all valid users from the queries and products in the model.\n    val usersProducts: RDD[(Int, Int)] = userIxQueries\n      .keys\n      .filter(_ != -1)\n      .cartesian(model.productFeatures.map(_._1))\n\n    // Call mllib ALS's predict function.\n    val ratings: RDD[MLlibRating] = model.predict(usersProducts)\n\n    // The following code construct predicted results from mllib's ratings.\n    // Not optimal implementation. Instead of groupBy, should use combineByKey with a PriorityQueue\n    val userRatings: RDD[(Int, Iterable[MLlibRating])] = ratings.groupBy(_.user)\n\n    userIxQueries.leftOuterJoin(userRatings)\n    .map {\n      // When there are ratings\n      case (userIx, ((ix, query), Some(ratings))) => {\n        val topItemScores: Array[ItemScore] = ratings\n        .toArray\n        .sortBy(_.rating)(Ordering.Double.reverse) // note: from large to small ordering\n        .take(query.num)\n        .map { rating => ItemScore(\n          model.itemStringIntMap.inverse(rating.product),\n          rating.rating) }\n\n        (ix, PredictedResult(itemScores = topItemScores))\n      }\n      // When user doesn't exist in training data\n      case (userIx, ((ix, query), None)) => {\n        require(userIx == -1)\n        (ix, PredictedResult(itemScores = Array.empty))\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/ALSModel.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.spark.mllib.recommendation\n// This must be the same package as Spark's MatrixFactorizationModel because\n// MatrixFactorizationModel's constructor is private and we are using\n// its constructor in order to save and load the model\n\nimport org.apache.predictionio.examples.recommendation.ALSAlgorithmParams\n\nimport org.apache.predictionio.controller.PersistentModel\nimport org.apache.predictionio.controller.PersistentModelLoader\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass ALSModel(\n    override val rank: Int,\n    override val userFeatures: RDD[(Int, Array[Double])],\n    override val productFeatures: RDD[(Int, Array[Double])],\n    val userStringIntMap: BiMap[String, Int],\n    val itemStringIntMap: BiMap[String, Int])\n  extends MatrixFactorizationModel(rank, userFeatures, productFeatures)\n  with PersistentModel[ALSAlgorithmParams] {\n\n  override\n  def save(id: String, params: ALSAlgorithmParams,\n    sc: SparkContext): Boolean = {\n\n    sc.parallelize(Seq(rank)).saveAsObjectFile(s\"/tmp/${id}/rank\")\n    userFeatures.saveAsObjectFile(s\"/tmp/${id}/userFeatures\")\n    productFeatures.saveAsObjectFile(s\"/tmp/${id}/productFeatures\")\n    sc.parallelize(Seq(userStringIntMap))\n      .saveAsObjectFile(s\"/tmp/${id}/userStringIntMap\")\n    sc.parallelize(Seq(itemStringIntMap))\n      .saveAsObjectFile(s\"/tmp/${id}/itemStringIntMap\")\n    true\n  }\n\n  override def toString = {\n    s\"userFeatures: [${userFeatures.count()}]\" +\n    s\"(${userFeatures.take(2).toList}...)\" +\n    s\" productFeatures: [${productFeatures.count()}]\" +\n    s\"(${productFeatures.take(2).toList}...)\" +\n    s\" userStringIntMap: [${userStringIntMap.size}]\" +\n    s\"(${userStringIntMap.take(2)}...)\" +\n    s\" itemStringIntMap: [${itemStringIntMap.size}]\" +\n    s\"(${itemStringIntMap.take(2)}...)\"\n  }\n}\n\nobject ALSModel\n  extends PersistentModelLoader[ALSAlgorithmParams, ALSModel] {\n  def apply(id: String, params: ALSAlgorithmParams,\n    sc: Option[SparkContext]) = {\n    new ALSModel(\n      rank = sc.get.objectFile[Int](s\"/tmp/${id}/rank\").first,\n      userFeatures = sc.get.objectFile(s\"/tmp/${id}/userFeatures\"),\n      productFeatures = sc.get.objectFile(s\"/tmp/${id}/productFeatures\"),\n      userStringIntMap = sc.get\n        .objectFile[BiMap[String, Int]](s\"/tmp/${id}/userStringIntMap\").first,\n      itemStringIntMap = sc.get\n        .objectFile[BiMap[String, Int]](s\"/tmp/${id}/itemStringIntMap\").first)\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/DataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.PDataSource\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EmptyActualResult\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.store.PEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nimport grizzled.slf4j.Logger\n\ncase class DataSourceEvalParams(kFold: Int, queryNum: Int)\n\ncase class DataSourceParams(\n  appName: String,\n  evalParams: Option[DataSourceEvalParams]) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, ActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  def getRatings(sc: SparkContext): RDD[Rating] = {\n\n    val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"rate\", \"buy\")), // read \"rate\" and \"buy\" event\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n\n    val ratingsRDD: RDD[Rating] = eventsRDD.map { event =>\n      val rating = try {\n        val ratingValue: Double = event.event match {\n          case \"rate\" => event.properties.get[Double](\"rating\")\n          case \"buy\" => 4.0 // map buy event to rating value of 4\n          case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n        }\n        // entityId and targetEntityId is String\n        Rating(event.entityId,\n          event.targetEntityId.get,\n          ratingValue)\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Cannot convert ${event} to Rating. Exception: ${e}.\")\n          throw e\n        }\n      }\n      rating\n    }.cache()\n\n    ratingsRDD\n  }\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n    new TrainingData(getRatings(sc))\n  }\n\n  override\n  def readEval(sc: SparkContext)\n  : Seq[(TrainingData, EmptyEvaluationInfo, RDD[(Query, ActualResult)])] = {\n    require(!dsp.evalParams.isEmpty, \"Must specify evalParams\")\n    val evalParams = dsp.evalParams.get\n\n    val kFold = evalParams.kFold\n    val ratings: RDD[(Rating, Long)] = getRatings(sc).zipWithUniqueId\n    ratings.cache\n\n    (0 until kFold).map { idx => {\n      val trainingRatings = ratings.filter(_._2 % kFold != idx).map(_._1)\n      val testingRatings = ratings.filter(_._2 % kFold == idx).map(_._1)\n\n      val testingUsers: RDD[(String, Iterable[Rating])] = testingRatings.groupBy(_.user)\n\n      (new TrainingData(trainingRatings),\n        new EmptyEvaluationInfo(),\n        testingUsers.map {\n          case (user, ratings) => (Query(user, evalParams.queryNum), ActualResult(ratings.toArray))\n        }\n      )\n    }}\n  }\n}\n\ncase class Rating(\n  user: String,\n  item: String,\n  rating: Double\n)\n\nclass TrainingData(\n  val ratings: RDD[Rating]\n) extends Serializable {\n  override def toString = {\n    s\"ratings: [${ratings.count()}] (${ratings.take(2).toList}...)\"\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.EngineFactory\nimport org.apache.predictionio.controller.Engine\n\ncase class Query(\n  user: String,\n  num: Int\n)\n\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n)\n\ncase class ActualResult(\n  ratings: Array[Rating]\n)\n\ncase class ItemScore(\n  item: String,\n  score: Double\n)\n\nobject RecommendationEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"als\" -> classOf[ALSAlgorithm]),\n      classOf[Serving])\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/Evaluation.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.Evaluation\nimport org.apache.predictionio.controller.OptionAverageMetric\nimport org.apache.predictionio.controller.AverageMetric\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EngineParamsGenerator\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.MetricEvaluator\n\n// Usage:\n// $ pio eval org.example.recommendation.RecommendationEvaluation \\\n//   org.example.recommendation.EngineParamsList\n\ncase class PrecisionAtK(k: Int, ratingThreshold: Double = 2.0)\n    extends OptionAverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  require(k > 0, \"k must be greater than 0\")\n\n  override def header = s\"Precision@K (k=$k, threshold=$ratingThreshold)\"\n\n  override\n  def calculate(q: Query, p: PredictedResult, a: ActualResult): Option[Double] = {\n    val positives: Set[String] = a.ratings.filter(_.rating >= ratingThreshold).map(_.item).toSet\n\n    // If there is no positive results, Precision is undefined. We don't consider this case in the\n    // metrics, hence we return None.\n    if (positives.size == 0) {\n      None\n    } else {\n      val tpCount: Int = p.itemScores.take(k).filter(is => positives(is.item)).size\n      Some(tpCount.toDouble / math.min(k, positives.size))\n    }\n  }\n}\n\ncase class PositiveCount(ratingThreshold: Double = 2.0)\n    extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  override def header = s\"PositiveCount (threshold=$ratingThreshold)\"\n\n  override\n  def calculate(q: Query, p: PredictedResult, a: ActualResult): Double = {\n    a.ratings.filter(_.rating >= ratingThreshold).size\n  }\n}\n\nobject RecommendationEvaluation extends Evaluation {\n  engineEvaluator = (\n    RecommendationEngine(),\n    MetricEvaluator(\n      metric = PrecisionAtK(k = 10, ratingThreshold = 4.0),\n      otherMetrics = Seq(\n        PositiveCount(ratingThreshold = 4.0),\n        PrecisionAtK(k = 10, ratingThreshold = 2.0),\n        PositiveCount(ratingThreshold = 2.0),\n        PrecisionAtK(k = 10, ratingThreshold = 1.0),\n        PositiveCount(ratingThreshold = 1.0)\n      )))\n}\n\n\nobject ComprehensiveRecommendationEvaluation extends Evaluation {\n  val ratingThresholds = Seq(0.0, 2.0, 4.0)\n  val ks = Seq(1, 3, 10)\n\n  engineEvaluator = (\n    RecommendationEngine(),\n    MetricEvaluator(\n      metric = PrecisionAtK(k = 3, ratingThreshold = 2.0),\n      otherMetrics = (\n        (for (r <- ratingThresholds) yield PositiveCount(ratingThreshold = r)) ++\n        (for (r <- ratingThresholds; k <- ks) yield PrecisionAtK(k = k, ratingThreshold = r))\n      )))\n}\n\n\ntrait BaseEngineParamsList extends EngineParamsGenerator {\n  protected val baseEP = EngineParams(\n    dataSourceParams = DataSourceParams(\n      appName = \"MyApp1\",\n      evalParams = Some(DataSourceEvalParams(kFold = 5, queryNum = 10))))\n}\n\nobject EngineParamsList extends BaseEngineParamsList {\n  engineParamsList = for(\n    rank <- Seq(5, 10, 20);\n    numIterations <- Seq(1, 5, 10))\n    yield baseEP.copy(\n      algorithmParamsList = Seq(\n        (\"als\", ALSAlgorithmParams(rank, numIterations, 0.01, Some(3)))))\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/Preparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.PPreparator\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nimport scala.io.Source // ADDED\nimport org.apache.predictionio.controller.Params // ADDED\n\n// ADDED CustomPreparatorParams case class\ncase class CustomPreparatorParams(\n  filepath: String\n) extends Params\n\nclass Preparator(pp: CustomPreparatorParams) // ADDED CustomPreparatorParams\n  extends PPreparator[TrainingData, PreparedData] {\n\n  override\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    val noTrainItems = Source.fromFile(pp.filepath).getLines.toSet // CHANGED\n    val ratings = trainingData.ratings.filter( r =>\n      !noTrainItems.contains(r.item)\n    )\n    new PreparedData(ratings)\n  }\n}\n\nclass PreparedData(\n  val ratings: RDD[Rating]\n) extends Serializable\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/Serving.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.LServing\n\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-data-prep/template.json",
    "content": "{\"pio\": {\"version\": { \"min\": \"0.10.0-incubating\" }}}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/.gitignore",
    "content": "data/sample_movielens_data.txt\nmanifest.json\ntarget/\npio.log\n/pio.sbt"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nname := \"template-scala-parallel-recommendation\"\n\norganization := \"org.apache.predictionio\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\" % \"provided\",\n  \"org.apache.spark\"        %% \"spark-mllib\"              % \"2.4.0\" % \"provided\")"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/data/import_eventserver.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nImport sample data for recommendation engine\n\"\"\"\n\nimport predictionio\nimport argparse\nimport random\n\nRATE_ACTIONS_DELIMITER = \"::\"\nSEED = 3\n\ndef import_events(client, file):\n  f = open(file, 'r')\n  random.seed(SEED)\n  count = 0\n  print(\"Importing data...\")\n  for line in f:\n    data = line.rstrip('\\r\\n').split(RATE_ACTIONS_DELIMITER)\n    # For demonstration purpose, randomly mix in some buy events\n    if (random.randint(0, 1) == 1):\n      client.create_event(\n        event=\"rate\",\n        entity_type=\"user\",\n        entity_id=data[0],\n        target_entity_type=\"item\",\n        target_entity_id=data[1],\n        properties= { \"rating\" : float(data[2]) }\n      )\n    else:\n      client.create_event(\n        event=\"buy\",\n        entity_type=\"user\",\n        entity_id=data[0],\n        target_entity_type=\"item\",\n        target_entity_id=data[1]\n      )\n    count += 1\n  f.close()\n  print(\"%s events are imported.\" % count)\n\nif __name__ == '__main__':\n  parser = argparse.ArgumentParser(\n    description=\"Import sample data for recommendation engine\")\n  parser.add_argument('--access_key', default='invald_access_key')\n  parser.add_argument('--url', default=\"http://localhost:7070\")\n  parser.add_argument('--file', default=\"./data/sample_movielens_data.txt\")\n\n  args = parser.parse_args()\n  print(args)\n\n  client = predictionio.EventClient(\n    access_key=args.access_key,\n    url=args.url,\n    threads=5,\n    qsize=500)\n  import_events(client, args.file)\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/data/sample_disabled_items.txt",
    "content": ""
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/data/send_query.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nSend sample query to prediction engine\n\"\"\"\n\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint(engine_client.send_query({\"user\": \"1\", \"num\": 4}))\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.recommendation.RecommendationEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\": 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ],\n  \"serving\": {\n    \"params\": {\n      \"filepath\": \"./data/sample_disabled_items.txt\"\n    }\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/project/build.properties",
    "content": "sbt.version=1.2.8\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/src/main/scala/ALSAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.PAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.mllib.recommendation.ALS\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\nimport org.apache.spark.mllib.recommendation.ALSModel\n\nimport grizzled.slf4j.Logger\n\ncase class ALSAlgorithmParams(\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]) extends Params\n\nclass ALSAlgorithm(val ap: ALSAlgorithmParams)\n  extends PAlgorithm[PreparedData, ALSModel, Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  if (ap.numIterations > 30) {\n    logger.warn(\n      s\"ALSAlgorithmParams.numIterations > 30, current: ${ap.numIterations}. \" +\n      s\"There is a chance of running to StackOverflowException.\" +\n      s\"To remedy it, set lower numIterations or checkpoint parameters.\")\n  }\n\n  override\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n    // MLLib ALS cannot handle empty training data.\n    require(!data.ratings.take(1).isEmpty,\n      s\"RDD[Rating] in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preparator generates PreparedData correctly.\")\n    // Convert user and item String IDs to Int index for MLlib\n\n    val userStringIntMap = BiMap.stringInt(data.ratings.map(_.user))\n    val itemStringIntMap = BiMap.stringInt(data.ratings.map(_.item))\n    val mllibRatings = data.ratings.map( r =>\n      // MLlibRating requires integer index for user and item\n      MLlibRating(userStringIntMap(r.user), itemStringIntMap(r.item), r.rating)\n    )\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    // Set checkpoint directory\n    // sc.setCheckpointDir(\"checkpoint\")\n\n    // If you only have one type of implicit event (Eg. \"view\" event only),\n    // set implicitPrefs to true\n    val implicitPrefs = false\n    val als = new ALS()\n    als.setUserBlocks(-1)\n    als.setProductBlocks(-1)\n    als.setRank(ap.rank)\n    als.setIterations(ap.numIterations)\n    als.setLambda(ap.lambda)\n    als.setImplicitPrefs(implicitPrefs)\n    als.setAlpha(1.0)\n    als.setSeed(seed)\n    als.setCheckpointInterval(10)\n    val m = als.run(mllibRatings)\n\n    new ALSModel(\n      rank = m.rank,\n      userFeatures = m.userFeatures,\n      productFeatures = m.productFeatures,\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap)\n  }\n\n  override\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n    // Convert String ID to Int index for Mllib\n    model.userStringIntMap.get(query.user).map { userInt =>\n      // create inverse view of itemStringIntMap\n      val itemIntStringMap = model.itemStringIntMap.inverse\n      // recommendProducts() returns Array[MLlibRating], which uses item Int\n      // index. Convert it to String ID for returning PredictedResult\n      val itemScores = model.recommendProducts(userInt, query.num)\n        .map (r => ItemScore(itemIntStringMap(r.product), r.rating))\n      PredictedResult(itemScores)\n    }.getOrElse{\n      logger.info(s\"No prediction for unknown user ${query.user}.\")\n      PredictedResult(Array.empty)\n    }\n  }\n\n  // This function is used by the evaluation module, where a batch of queries is sent to this engine\n  // for evaluation purpose.\n  override def batchPredict(model: ALSModel, queries: RDD[(Long, Query)]): RDD[(Long, PredictedResult)] = {\n    val userIxQueries: RDD[(Int, (Long, Query))] = queries\n    .map { case (ix, query) => {\n      // If user not found, then the index is -1\n      val userIx = model.userStringIntMap.get(query.user).getOrElse(-1)\n      (userIx, (ix, query))\n    }}\n\n    // Cross product of all valid users from the queries and products in the model.\n    val usersProducts: RDD[(Int, Int)] = userIxQueries\n      .keys\n      .filter(_ != -1)\n      .cartesian(model.productFeatures.map(_._1))\n\n    // Call mllib ALS's predict function.\n    val ratings: RDD[MLlibRating] = model.predict(usersProducts)\n\n    // The following code construct predicted results from mllib's ratings.\n    // Not optimal implementation. Instead of groupBy, should use combineByKey with a PriorityQueue\n    val userRatings: RDD[(Int, Iterable[MLlibRating])] = ratings.groupBy(_.user)\n\n    userIxQueries.leftOuterJoin(userRatings)\n    .map {\n      // When there are ratings\n      case (userIx, ((ix, query), Some(ratings))) => {\n        val topItemScores: Array[ItemScore] = ratings\n        .toArray\n        .sortBy(_.rating)(Ordering.Double.reverse) // note: from large to small ordering\n        .take(query.num)\n        .map { rating => ItemScore(\n          model.itemStringIntMap.inverse(rating.product),\n          rating.rating) }\n\n        (ix, PredictedResult(itemScores = topItemScores))\n      }\n      // When user doesn't exist in training data\n      case (userIx, ((ix, query), None)) => {\n        require(userIx == -1)\n        (ix, PredictedResult(itemScores = Array.empty))\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/src/main/scala/ALSModel.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.spark.mllib.recommendation\n// This must be the same package as Spark's MatrixFactorizationModel because\n// MatrixFactorizationModel's constructor is private and we are using\n// its constructor in order to save and load the model\n\nimport org.apache.predictionio.examples.recommendation.ALSAlgorithmParams\n\nimport org.apache.predictionio.controller.PersistentModel\nimport org.apache.predictionio.controller.PersistentModelLoader\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass ALSModel(\n    override val rank: Int,\n    override val userFeatures: RDD[(Int, Array[Double])],\n    override val productFeatures: RDD[(Int, Array[Double])],\n    val userStringIntMap: BiMap[String, Int],\n    val itemStringIntMap: BiMap[String, Int])\n  extends MatrixFactorizationModel(rank, userFeatures, productFeatures)\n  with PersistentModel[ALSAlgorithmParams] {\n\n  override\n  def save(id: String, params: ALSAlgorithmParams,\n    sc: SparkContext): Boolean = {\n\n    sc.parallelize(Seq(rank)).saveAsObjectFile(s\"/tmp/${id}/rank\")\n    userFeatures.saveAsObjectFile(s\"/tmp/${id}/userFeatures\")\n    productFeatures.saveAsObjectFile(s\"/tmp/${id}/productFeatures\")\n    sc.parallelize(Seq(userStringIntMap))\n      .saveAsObjectFile(s\"/tmp/${id}/userStringIntMap\")\n    sc.parallelize(Seq(itemStringIntMap))\n      .saveAsObjectFile(s\"/tmp/${id}/itemStringIntMap\")\n    true\n  }\n\n  override def toString = {\n    s\"userFeatures: [${userFeatures.count()}]\" +\n    s\"(${userFeatures.take(2).toList}...)\" +\n    s\" productFeatures: [${productFeatures.count()}]\" +\n    s\"(${productFeatures.take(2).toList}...)\" +\n    s\" userStringIntMap: [${userStringIntMap.size}]\" +\n    s\"(${userStringIntMap.take(2)}...)\" +\n    s\" itemStringIntMap: [${itemStringIntMap.size}]\" +\n    s\"(${itemStringIntMap.take(2)}...)\"\n  }\n}\n\nobject ALSModel\n  extends PersistentModelLoader[ALSAlgorithmParams, ALSModel] {\n  def apply(id: String, params: ALSAlgorithmParams,\n    sc: Option[SparkContext]) = {\n    new ALSModel(\n      rank = sc.get.objectFile[Int](s\"/tmp/${id}/rank\").first,\n      userFeatures = sc.get.objectFile(s\"/tmp/${id}/userFeatures\"),\n      productFeatures = sc.get.objectFile(s\"/tmp/${id}/productFeatures\"),\n      userStringIntMap = sc.get\n        .objectFile[BiMap[String, Int]](s\"/tmp/${id}/userStringIntMap\").first,\n      itemStringIntMap = sc.get\n        .objectFile[BiMap[String, Int]](s\"/tmp/${id}/itemStringIntMap\").first)\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/src/main/scala/DataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.PDataSource\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EmptyActualResult\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.store.PEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nimport grizzled.slf4j.Logger\n\ncase class DataSourceEvalParams(kFold: Int, queryNum: Int)\n\ncase class DataSourceParams(\n  appName: String,\n  evalParams: Option[DataSourceEvalParams]) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, ActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  def getRatings(sc: SparkContext): RDD[Rating] = {\n\n    val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"rate\", \"buy\")), // read \"rate\" and \"buy\" event\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n\n    val ratingsRDD: RDD[Rating] = eventsRDD.map { event =>\n      val rating = try {\n        val ratingValue: Double = event.event match {\n          case \"rate\" => event.properties.get[Double](\"rating\")\n          case \"buy\" => 4.0 // map buy event to rating value of 4\n          case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n        }\n        // entityId and targetEntityId is String\n        Rating(event.entityId,\n          event.targetEntityId.get,\n          ratingValue)\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Cannot convert ${event} to Rating. Exception: ${e}.\")\n          throw e\n        }\n      }\n      rating\n    }.cache()\n\n    ratingsRDD\n  }\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n    new TrainingData(getRatings(sc))\n  }\n\n  override\n  def readEval(sc: SparkContext)\n  : Seq[(TrainingData, EmptyEvaluationInfo, RDD[(Query, ActualResult)])] = {\n    require(!dsp.evalParams.isEmpty, \"Must specify evalParams\")\n    val evalParams = dsp.evalParams.get\n\n    val kFold = evalParams.kFold\n    val ratings: RDD[(Rating, Long)] = getRatings(sc).zipWithUniqueId\n    ratings.cache\n\n    (0 until kFold).map { idx => {\n      val trainingRatings = ratings.filter(_._2 % kFold != idx).map(_._1)\n      val testingRatings = ratings.filter(_._2 % kFold == idx).map(_._1)\n\n      val testingUsers: RDD[(String, Iterable[Rating])] = testingRatings.groupBy(_.user)\n\n      (new TrainingData(trainingRatings),\n        new EmptyEvaluationInfo(),\n        testingUsers.map {\n          case (user, ratings) => (Query(user, evalParams.queryNum), ActualResult(ratings.toArray))\n        }\n      )\n    }}\n  }\n}\n\ncase class Rating(\n  user: String,\n  item: String,\n  rating: Double\n)\n\nclass TrainingData(\n  val ratings: RDD[Rating]\n) extends Serializable {\n  override def toString = {\n    s\"ratings: [${ratings.count()}] (${ratings.take(2).toList}...)\"\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/src/main/scala/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.EngineFactory\nimport org.apache.predictionio.controller.Engine\n\ncase class Query(\n  user: String,\n  num: Int\n)\n\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n)\n\ncase class ActualResult(\n  ratings: Array[Rating]\n)\n\ncase class ItemScore(\n  item: String,\n  score: Double\n)\n\nobject RecommendationEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"als\" -> classOf[ALSAlgorithm]),\n      classOf[Serving])\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/src/main/scala/Evaluation.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.Evaluation\nimport org.apache.predictionio.controller.OptionAverageMetric\nimport org.apache.predictionio.controller.AverageMetric\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EngineParamsGenerator\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.MetricEvaluator\n\n// Usage:\n// $ pio eval org.example.recommendation.RecommendationEvaluation \\\n//   org.example.recommendation.EngineParamsList\n\ncase class PrecisionAtK(k: Int, ratingThreshold: Double = 2.0)\n    extends OptionAverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  require(k > 0, \"k must be greater than 0\")\n\n  override def header = s\"Precision@K (k=$k, threshold=$ratingThreshold)\"\n\n  override\n  def calculate(q: Query, p: PredictedResult, a: ActualResult): Option[Double] = {\n    val positives: Set[String] = a.ratings.filter(_.rating >= ratingThreshold).map(_.item).toSet\n\n    // If there is no positive results, Precision is undefined. We don't consider this case in the\n    // metrics, hence we return None.\n    if (positives.size == 0) {\n      None\n    } else {\n      val tpCount: Int = p.itemScores.take(k).filter(is => positives(is.item)).size\n      Some(tpCount.toDouble / math.min(k, positives.size))\n    }\n  }\n}\n\ncase class PositiveCount(ratingThreshold: Double = 2.0)\n    extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  override def header = s\"PositiveCount (threshold=$ratingThreshold)\"\n\n  override\n  def calculate(q: Query, p: PredictedResult, a: ActualResult): Double = {\n    a.ratings.filter(_.rating >= ratingThreshold).size\n  }\n}\n\nobject RecommendationEvaluation extends Evaluation {\n  engineEvaluator = (\n    RecommendationEngine(),\n    MetricEvaluator(\n      metric = PrecisionAtK(k = 10, ratingThreshold = 4.0),\n      otherMetrics = Seq(\n        PositiveCount(ratingThreshold = 4.0),\n        PrecisionAtK(k = 10, ratingThreshold = 2.0),\n        PositiveCount(ratingThreshold = 2.0),\n        PrecisionAtK(k = 10, ratingThreshold = 1.0),\n        PositiveCount(ratingThreshold = 1.0)\n      )))\n}\n\n\nobject ComprehensiveRecommendationEvaluation extends Evaluation {\n  val ratingThresholds = Seq(0.0, 2.0, 4.0)\n  val ks = Seq(1, 3, 10)\n\n  engineEvaluator = (\n    RecommendationEngine(),\n    MetricEvaluator(\n      metric = PrecisionAtK(k = 3, ratingThreshold = 2.0),\n      otherMetrics = (\n        (for (r <- ratingThresholds) yield PositiveCount(ratingThreshold = r)) ++\n        (for (r <- ratingThresholds; k <- ks) yield PrecisionAtK(k = k, ratingThreshold = r))\n      )))\n}\n\n\ntrait BaseEngineParamsList extends EngineParamsGenerator {\n  protected val baseEP = EngineParams(\n    dataSourceParams = DataSourceParams(\n      appName = \"MyApp1\",\n      evalParams = Some(DataSourceEvalParams(kFold = 5, queryNum = 10))))\n}\n\nobject EngineParamsList extends BaseEngineParamsList {\n  engineParamsList = for(\n    rank <- Seq(5, 10, 20);\n    numIterations <- Seq(1, 5, 10))\n    yield baseEP.copy(\n      algorithmParamsList = Seq(\n        (\"als\", ALSAlgorithmParams(rank, numIterations, 0.01, Some(3)))))\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/src/main/scala/Preparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.PPreparator\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  override\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(ratings = trainingData.ratings)\n  }\n}\n\nclass PreparedData(\n  val ratings: RDD[Rating]\n) extends Serializable\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/src/main/scala/Serving.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.LServing\n\nimport scala.io.Source\n\nimport org.apache.predictionio.controller.Params  // ADDED\n\n// ADDED ServingParams to specify the blacklisting file location.\ncase class ServingParams(filepath: String) extends Params\n\nclass Serving(val params: ServingParams)\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query, predictedResults: Seq[PredictedResult])\n  : PredictedResult = {\n    val disabledProducts: Set[String] = Source\n      .fromFile(params.filepath)\n      .getLines\n      .toSet\n\n    val itemScores = predictedResults.head.itemScores\n    PredictedResult(itemScores.filter(ps => !disabledProducts(ps.item)))\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/customize-serving/template.json",
    "content": "{\"pio\": {\"version\": { \"min\": \"0.10.0-incubating\" }}}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/reading-custom-events/.gitignore",
    "content": "data/sample_movielens_data.txt\nmanifest.json\ntarget/\npio.log\n/pio.sbt"
  },
  {
    "path": "examples/scala-parallel-recommendation/reading-custom-events/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nname := \"template-scala-parallel-recommendation\"\n\norganization := \"org.apache.predictionio\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\" % \"provided\",\n  \"org.apache.spark\"        %% \"spark-mllib\"              % \"2.4.0\" % \"provided\")"
  },
  {
    "path": "examples/scala-parallel-recommendation/reading-custom-events/data/import_eventserver.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nImport sample data for recommendation engine\n\"\"\"\n\nimport predictionio\nimport argparse\nimport random\n\nRATE_ACTIONS_DELIMITER = \"::\"\nSEED = 3\n\ndef import_events(client, file):\n  f = open(file, 'r')\n  random.seed(SEED)\n  count = 0\n  print(\"Importing data...\")\n  for line in f:\n    data = line.rstrip('\\r\\n').split(RATE_ACTIONS_DELIMITER)\n    # For demonstration purpose, randomly mix in some dislike events\n    if (random.randint(0, 1) == 1):\n      client.create_event(\n        event=\"like\",\n        entity_type=\"customer\",\n        entity_id=data[0],\n        target_entity_type=\"product\",\n        target_entity_id=data[1]\n      )\n    else:\n      client.create_event(\n        event=\"dislike\",\n        entity_type=\"customer\",\n        entity_id=data[0],\n        target_entity_type=\"product\",\n        target_entity_id=data[1]\n      )\n    count += 1\n  f.close()\n  print(\"%s events are imported.\" % count)\n\nif __name__ == '__main__':\n  parser = argparse.ArgumentParser(\n    description=\"Import sample data for recommendation engine\")\n  parser.add_argument('--access_key', default='invald_access_key')\n  parser.add_argument('--url', default=\"http://localhost:7070\")\n  parser.add_argument('--file', default=\"./data/sample_movielens_data.txt\")\n\n  args = parser.parse_args()\n  print(args)\n\n  client = predictionio.EventClient(\n    access_key=args.access_key,\n    url=args.url,\n    threads=5,\n    qsize=500)\n  import_events(client, args.file)\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/reading-custom-events/data/send_query.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nSend sample query to prediction engine\n\"\"\"\n\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint(engine_client.send_query({\"user\": \"1\", \"num\": 4}))\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/reading-custom-events/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.recommendation.RecommendationEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\": 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/reading-custom-events/project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")"
  },
  {
    "path": "examples/scala-parallel-recommendation/reading-custom-events/project/build.properties",
    "content": "sbt.version=1.2.8\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/ALSAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.PAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.mllib.recommendation.ALS\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\nimport org.apache.spark.mllib.recommendation.ALSModel\n\nimport grizzled.slf4j.Logger\n\ncase class ALSAlgorithmParams(\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]) extends Params\n\nclass ALSAlgorithm(val ap: ALSAlgorithmParams)\n  extends PAlgorithm[PreparedData, ALSModel, Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  if (ap.numIterations > 30) {\n    logger.warn(\n      s\"ALSAlgorithmParams.numIterations > 30, current: ${ap.numIterations}. \" +\n      s\"There is a chance of running to StackOverflowException.\" +\n      s\"To remedy it, set lower numIterations or checkpoint parameters.\")\n  }\n\n  override\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n    // MLLib ALS cannot handle empty training data.\n    require(!data.ratings.take(1).isEmpty,\n      s\"RDD[Rating] in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preparator generates PreparedData correctly.\")\n    // Convert user and item String IDs to Int index for MLlib\n\n    val userStringIntMap = BiMap.stringInt(data.ratings.map(_.user))\n    val itemStringIntMap = BiMap.stringInt(data.ratings.map(_.item))\n    val mllibRatings = data.ratings.map( r =>\n      // MLlibRating requires integer index for user and item\n      MLlibRating(userStringIntMap(r.user), itemStringIntMap(r.item), r.rating)\n    )\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    // Set checkpoint directory\n    // sc.setCheckpointDir(\"checkpoint\")\n\n    // If you only have one type of implicit event (Eg. \"view\" event only),\n    // set implicitPrefs to true\n    val implicitPrefs = false\n    val als = new ALS()\n    als.setUserBlocks(-1)\n    als.setProductBlocks(-1)\n    als.setRank(ap.rank)\n    als.setIterations(ap.numIterations)\n    als.setLambda(ap.lambda)\n    als.setImplicitPrefs(implicitPrefs)\n    als.setAlpha(1.0)\n    als.setSeed(seed)\n    als.setCheckpointInterval(10)\n    val m = als.run(mllibRatings)\n\n    new ALSModel(\n      rank = m.rank,\n      userFeatures = m.userFeatures,\n      productFeatures = m.productFeatures,\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap)\n  }\n\n  override\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n    // Convert String ID to Int index for Mllib\n    model.userStringIntMap.get(query.user).map { userInt =>\n      // create inverse view of itemStringIntMap\n      val itemIntStringMap = model.itemStringIntMap.inverse\n      // recommendProducts() returns Array[MLlibRating], which uses item Int\n      // index. Convert it to String ID for returning PredictedResult\n      val itemScores = model.recommendProducts(userInt, query.num)\n        .map (r => ItemScore(itemIntStringMap(r.product), r.rating))\n      PredictedResult(itemScores)\n    }.getOrElse{\n      logger.info(s\"No prediction for unknown user ${query.user}.\")\n      PredictedResult(Array.empty)\n    }\n  }\n\n  // This function is used by the evaluation module, where a batch of queries is sent to this engine\n  // for evaluation purpose.\n  override def batchPredict(model: ALSModel, queries: RDD[(Long, Query)]): RDD[(Long, PredictedResult)] = {\n    val userIxQueries: RDD[(Int, (Long, Query))] = queries\n    .map { case (ix, query) => {\n      // If user not found, then the index is -1\n      val userIx = model.userStringIntMap.get(query.user).getOrElse(-1)\n      (userIx, (ix, query))\n    }}\n\n    // Cross product of all valid users from the queries and products in the model.\n    val usersProducts: RDD[(Int, Int)] = userIxQueries\n      .keys\n      .filter(_ != -1)\n      .cartesian(model.productFeatures.map(_._1))\n\n    // Call mllib ALS's predict function.\n    val ratings: RDD[MLlibRating] = model.predict(usersProducts)\n\n    // The following code construct predicted results from mllib's ratings.\n    // Not optimal implementation. Instead of groupBy, should use combineByKey with a PriorityQueue\n    val userRatings: RDD[(Int, Iterable[MLlibRating])] = ratings.groupBy(_.user)\n\n    userIxQueries.leftOuterJoin(userRatings)\n    .map {\n      // When there are ratings\n      case (userIx, ((ix, query), Some(ratings))) => {\n        val topItemScores: Array[ItemScore] = ratings\n        .toArray\n        .sortBy(_.rating)(Ordering.Double.reverse) // note: from large to small ordering\n        .take(query.num)\n        .map { rating => ItemScore(\n          model.itemStringIntMap.inverse(rating.product),\n          rating.rating) }\n\n        (ix, PredictedResult(itemScores = topItemScores))\n      }\n      // When user doesn't exist in training data\n      case (userIx, ((ix, query), None)) => {\n        require(userIx == -1)\n        (ix, PredictedResult(itemScores = Array.empty))\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/ALSModel.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.spark.mllib.recommendation\n// This must be the same package as Spark's MatrixFactorizationModel because\n// MatrixFactorizationModel's constructor is private and we are using\n// its constructor in order to save and load the model\n\nimport org.apache.predictionio.examples.recommendation.ALSAlgorithmParams\n\nimport org.apache.predictionio.controller.PersistentModel\nimport org.apache.predictionio.controller.PersistentModelLoader\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass ALSModel(\n    override val rank: Int,\n    override val userFeatures: RDD[(Int, Array[Double])],\n    override val productFeatures: RDD[(Int, Array[Double])],\n    val userStringIntMap: BiMap[String, Int],\n    val itemStringIntMap: BiMap[String, Int])\n  extends MatrixFactorizationModel(rank, userFeatures, productFeatures)\n  with PersistentModel[ALSAlgorithmParams] {\n\n  override\n  def save(id: String, params: ALSAlgorithmParams,\n    sc: SparkContext): Boolean = {\n\n    sc.parallelize(Seq(rank)).saveAsObjectFile(s\"/tmp/${id}/rank\")\n    userFeatures.saveAsObjectFile(s\"/tmp/${id}/userFeatures\")\n    productFeatures.saveAsObjectFile(s\"/tmp/${id}/productFeatures\")\n    sc.parallelize(Seq(userStringIntMap))\n      .saveAsObjectFile(s\"/tmp/${id}/userStringIntMap\")\n    sc.parallelize(Seq(itemStringIntMap))\n      .saveAsObjectFile(s\"/tmp/${id}/itemStringIntMap\")\n    true\n  }\n\n  override def toString = {\n    s\"userFeatures: [${userFeatures.count()}]\" +\n    s\"(${userFeatures.take(2).toList}...)\" +\n    s\" productFeatures: [${productFeatures.count()}]\" +\n    s\"(${productFeatures.take(2).toList}...)\" +\n    s\" userStringIntMap: [${userStringIntMap.size}]\" +\n    s\"(${userStringIntMap.take(2)}...)\" +\n    s\" itemStringIntMap: [${itemStringIntMap.size}]\" +\n    s\"(${itemStringIntMap.take(2)}...)\"\n  }\n}\n\nobject ALSModel\n  extends PersistentModelLoader[ALSAlgorithmParams, ALSModel] {\n  def apply(id: String, params: ALSAlgorithmParams,\n    sc: Option[SparkContext]) = {\n    new ALSModel(\n      rank = sc.get.objectFile[Int](s\"/tmp/${id}/rank\").first,\n      userFeatures = sc.get.objectFile(s\"/tmp/${id}/userFeatures\"),\n      productFeatures = sc.get.objectFile(s\"/tmp/${id}/productFeatures\"),\n      userStringIntMap = sc.get\n        .objectFile[BiMap[String, Int]](s\"/tmp/${id}/userStringIntMap\").first,\n      itemStringIntMap = sc.get\n        .objectFile[BiMap[String, Int]](s\"/tmp/${id}/itemStringIntMap\").first)\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/DataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.PDataSource\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EmptyActualResult\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.store.PEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nimport grizzled.slf4j.Logger\n\ncase class DataSourceEvalParams(kFold: Int, queryNum: Int)\n\ncase class DataSourceParams(\n  appName: String,\n  evalParams: Option[DataSourceEvalParams]) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, ActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  def getRatings(sc: SparkContext): RDD[Rating] = {\n\n    val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"customer\"), // MODIFIED\n      eventNames = Some(List(\"like\", \"dislike\")), // MODIFIED\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"product\")))(sc) // MODIFIED\n\n    val ratingsRDD: RDD[Rating] = eventsRDD.map { event =>\n      val rating = try {\n        val ratingValue: Double = event.event match {\n          // MODIFIED\n          case \"like\" => 4.0 // map a like event to a rating of 4.0\n          case \"dislike\" => 1.0  // map a like event to a rating of 1.0\n          case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n        }\n        // entityId and targetEntityId is String\n        Rating(event.entityId,\n          event.targetEntityId.get,\n          ratingValue)\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Cannot convert ${event} to Rating. Exception: ${e}.\")\n          throw e\n        }\n      }\n      rating\n    }.cache()\n\n    ratingsRDD\n  }\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n    new TrainingData(getRatings(sc))\n  }\n\n  override\n  def readEval(sc: SparkContext)\n  : Seq[(TrainingData, EmptyEvaluationInfo, RDD[(Query, ActualResult)])] = {\n    require(!dsp.evalParams.isEmpty, \"Must specify evalParams\")\n    val evalParams = dsp.evalParams.get\n\n    val kFold = evalParams.kFold\n    val ratings: RDD[(Rating, Long)] = getRatings(sc).zipWithUniqueId\n    ratings.cache\n\n    (0 until kFold).map { idx => {\n      val trainingRatings = ratings.filter(_._2 % kFold != idx).map(_._1)\n      val testingRatings = ratings.filter(_._2 % kFold == idx).map(_._1)\n\n      val testingUsers: RDD[(String, Iterable[Rating])] = testingRatings.groupBy(_.user)\n\n      (new TrainingData(trainingRatings),\n        new EmptyEvaluationInfo(),\n        testingUsers.map {\n          case (user, ratings) => (Query(user, evalParams.queryNum), ActualResult(ratings.toArray))\n        }\n      )\n    }}\n  }\n}\n\ncase class Rating(\n  user: String,\n  item: String,\n  rating: Double\n)\n\nclass TrainingData(\n  val ratings: RDD[Rating]\n) extends Serializable {\n  override def toString = {\n    s\"ratings: [${ratings.count()}] (${ratings.take(2).toList}...)\"\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.EngineFactory\nimport org.apache.predictionio.controller.Engine\n\ncase class Query(\n  user: String,\n  num: Int\n)\n\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n)\n\ncase class ActualResult(\n  ratings: Array[Rating]\n)\n\ncase class ItemScore(\n  item: String,\n  score: Double\n)\n\nobject RecommendationEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"als\" -> classOf[ALSAlgorithm]),\n      classOf[Serving])\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/Evaluation.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.Evaluation\nimport org.apache.predictionio.controller.OptionAverageMetric\nimport org.apache.predictionio.controller.AverageMetric\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EngineParamsGenerator\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.MetricEvaluator\n\n// Usage:\n// $ pio eval org.example.recommendation.RecommendationEvaluation \\\n//   org.example.recommendation.EngineParamsList\n\ncase class PrecisionAtK(k: Int, ratingThreshold: Double = 2.0)\n    extends OptionAverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  require(k > 0, \"k must be greater than 0\")\n\n  override def header = s\"Precision@K (k=$k, threshold=$ratingThreshold)\"\n\n  override\n  def calculate(q: Query, p: PredictedResult, a: ActualResult): Option[Double] = {\n    val positives: Set[String] = a.ratings.filter(_.rating >= ratingThreshold).map(_.item).toSet\n\n    // If there is no positive results, Precision is undefined. We don't consider this case in the\n    // metrics, hence we return None.\n    if (positives.size == 0) {\n      None\n    } else {\n      val tpCount: Int = p.itemScores.take(k).filter(is => positives(is.item)).size\n      Some(tpCount.toDouble / math.min(k, positives.size))\n    }\n  }\n}\n\ncase class PositiveCount(ratingThreshold: Double = 2.0)\n    extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  override def header = s\"PositiveCount (threshold=$ratingThreshold)\"\n\n  override\n  def calculate(q: Query, p: PredictedResult, a: ActualResult): Double = {\n    a.ratings.filter(_.rating >= ratingThreshold).size\n  }\n}\n\nobject RecommendationEvaluation extends Evaluation {\n  engineEvaluator = (\n    RecommendationEngine(),\n    MetricEvaluator(\n      metric = PrecisionAtK(k = 10, ratingThreshold = 4.0),\n      otherMetrics = Seq(\n        PositiveCount(ratingThreshold = 4.0),\n        PrecisionAtK(k = 10, ratingThreshold = 2.0),\n        PositiveCount(ratingThreshold = 2.0),\n        PrecisionAtK(k = 10, ratingThreshold = 1.0),\n        PositiveCount(ratingThreshold = 1.0)\n      )))\n}\n\n\nobject ComprehensiveRecommendationEvaluation extends Evaluation {\n  val ratingThresholds = Seq(0.0, 2.0, 4.0)\n  val ks = Seq(1, 3, 10)\n\n  engineEvaluator = (\n    RecommendationEngine(),\n    MetricEvaluator(\n      metric = PrecisionAtK(k = 3, ratingThreshold = 2.0),\n      otherMetrics = (\n        (for (r <- ratingThresholds) yield PositiveCount(ratingThreshold = r)) ++\n        (for (r <- ratingThresholds; k <- ks) yield PrecisionAtK(k = k, ratingThreshold = r))\n      )))\n}\n\n\ntrait BaseEngineParamsList extends EngineParamsGenerator {\n  protected val baseEP = EngineParams(\n    dataSourceParams = DataSourceParams(\n      appName = \"MyApp1\",\n      evalParams = Some(DataSourceEvalParams(kFold = 5, queryNum = 10))))\n}\n\nobject EngineParamsList extends BaseEngineParamsList {\n  engineParamsList = for(\n    rank <- Seq(5, 10, 20);\n    numIterations <- Seq(1, 5, 10))\n    yield baseEP.copy(\n      algorithmParamsList = Seq(\n        (\"als\", ALSAlgorithmParams(rank, numIterations, 0.01, Some(3)))))\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/Preparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.PPreparator\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  override\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(ratings = trainingData.ratings)\n  }\n}\n\nclass PreparedData(\n  val ratings: RDD[Rating]\n) extends Serializable\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/Serving.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.LServing\n\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/reading-custom-events/template.json",
    "content": "{\"pio\": {\"version\": { \"min\": \"0.10.0-incubating\" }}}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/train-with-view-event/.gitignore",
    "content": "data/sample_movielens_data.txt\nmanifest.json\ntarget/\npio.log\n/pio.sbt"
  },
  {
    "path": "examples/scala-parallel-recommendation/train-with-view-event/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nname := \"template-scala-parallel-recommendation\"\n\norganization := \"org.apache.predictionio\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\" % \"provided\",\n  \"org.apache.spark\"        %% \"spark-mllib\"              % \"2.4.0\" % \"provided\")"
  },
  {
    "path": "examples/scala-parallel-recommendation/train-with-view-event/data/import_eventserver.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nImport sample data for recommendation engine\n\"\"\"\n\nimport predictionio\nimport argparse\nimport random\n\nRATE_ACTIONS_DELIMITER = \"::\"\nSEED = 3\n\ndef import_events(client, file):\n  f = open(file, 'r')\n  random.seed(SEED)\n  count = 0\n  print(\"Importing data...\")\n  for line in f:\n    data = line.rstrip('\\r\\n').split(RATE_ACTIONS_DELIMITER)\n    client.create_event(\n      event=\"view\",\n      entity_type=\"user\",\n      entity_id=data[0],\n      target_entity_type=\"item\",\n      target_entity_id=data[1]\n    )\n    count += 1\n    # For demonstration purpose, randomly mix in some buy events\n    if (random.randint(0, 1) == 1):\n      client.create_event(\n        event=\"rate\",\n        entity_type=\"user\",\n        entity_id=data[0],\n        target_entity_type=\"item\",\n        target_entity_id=data[1],\n        properties= { \"rating\" : float(data[2]) }\n      )\n    else:\n      client.create_event(\n        event=\"buy\",\n        entity_type=\"user\",\n        entity_id=data[0],\n        target_entity_type=\"item\",\n        target_entity_id=data[1]\n      )\n    count += 1\n  f.close()\n  print(\"%s events are imported.\" % count)\n\nif __name__ == '__main__':\n  parser = argparse.ArgumentParser(\n    description=\"Import sample data for recommendation engine\")\n  parser.add_argument('--access_key', default='invald_access_key')\n  parser.add_argument('--url', default=\"http://localhost:7070\")\n  parser.add_argument('--file', default=\"./data/sample_movielens_data.txt\")\n\n  args = parser.parse_args()\n  print(args)\n\n  client = predictionio.EventClient(\n    access_key=args.access_key,\n    url=args.url,\n    threads=5,\n    qsize=500)\n  import_events(client, args.file)\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/train-with-view-event/data/send_query.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nSend sample query to prediction engine\n\"\"\"\n\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint(engine_client.send_query({\"user\": \"1\", \"num\": 4}))\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/train-with-view-event/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.recommendation.RecommendationEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\": 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/train-with-view-event/project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")"
  },
  {
    "path": "examples/scala-parallel-recommendation/train-with-view-event/project/build.properties",
    "content": "sbt.version=1.2.8\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/ALSAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.PAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.mllib.recommendation.ALS\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\nimport org.apache.spark.mllib.recommendation.ALSModel\n\nimport grizzled.slf4j.Logger\n\ncase class ALSAlgorithmParams(\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]) extends Params\n\nclass ALSAlgorithm(val ap: ALSAlgorithmParams)\n  extends PAlgorithm[PreparedData, ALSModel, Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  if (ap.numIterations > 30) {\n    logger.warn(\n      s\"ALSAlgorithmParams.numIterations > 30, current: ${ap.numIterations}. \" +\n      s\"There is a chance of running to StackOverflowException.\" +\n      s\"To remedy it, set lower numIterations or checkpoint parameters.\")\n  }\n\n  override\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n    // MLLib ALS cannot handle empty training data.\n    require(!data.ratings.take(1).isEmpty,\n      s\"RDD[Rating] in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preparator generates PreparedData correctly.\")\n    // Convert user and item String IDs to Int index for MLlib\n\n    val userStringIntMap = BiMap.stringInt(data.ratings.map(_.user))\n    val itemStringIntMap = BiMap.stringInt(data.ratings.map(_.item))\n    val mllibRatings = data.ratings.map( r =>\n      // MLlibRating requires integer index for user and item\n      MLlibRating(userStringIntMap(r.user), itemStringIntMap(r.item), r.rating)\n    )\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    // Set checkpoint directory\n    // sc.setCheckpointDir(\"checkpoint\")\n\n    // If you only have one type of implicit event (Eg. \"view\" event only),\n    // set implicitPrefs to true\n    // MODIFIED\n    val implicitPrefs = true\n    val als = new ALS()\n    als.setUserBlocks(-1)\n    als.setProductBlocks(-1)\n    als.setRank(ap.rank)\n    als.setIterations(ap.numIterations)\n    als.setLambda(ap.lambda)\n    als.setImplicitPrefs(implicitPrefs)\n    als.setAlpha(1.0)\n    als.setSeed(seed)\n    als.setCheckpointInterval(10)\n    val m = als.run(mllibRatings)\n\n    new ALSModel(\n      rank = m.rank,\n      userFeatures = m.userFeatures,\n      productFeatures = m.productFeatures,\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap)\n  }\n\n  override\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n    // Convert String ID to Int index for Mllib\n    model.userStringIntMap.get(query.user).map { userInt =>\n      // create inverse view of itemStringIntMap\n      val itemIntStringMap = model.itemStringIntMap.inverse\n      // recommendProducts() returns Array[MLlibRating], which uses item Int\n      // index. Convert it to String ID for returning PredictedResult\n      val itemScores = model.recommendProducts(userInt, query.num)\n        .map (r => ItemScore(itemIntStringMap(r.product), r.rating))\n      PredictedResult(itemScores)\n    }.getOrElse{\n      logger.info(s\"No prediction for unknown user ${query.user}.\")\n      PredictedResult(Array.empty)\n    }\n  }\n\n  // This function is used by the evaluation module, where a batch of queries is sent to this engine\n  // for evaluation purpose.\n  override def batchPredict(model: ALSModel, queries: RDD[(Long, Query)]): RDD[(Long, PredictedResult)] = {\n    val userIxQueries: RDD[(Int, (Long, Query))] = queries\n    .map { case (ix, query) => {\n      // If user not found, then the index is -1\n      val userIx = model.userStringIntMap.get(query.user).getOrElse(-1)\n      (userIx, (ix, query))\n    }}\n\n    // Cross product of all valid users from the queries and products in the model.\n    val usersProducts: RDD[(Int, Int)] = userIxQueries\n      .keys\n      .filter(_ != -1)\n      .cartesian(model.productFeatures.map(_._1))\n\n    // Call mllib ALS's predict function.\n    val ratings: RDD[MLlibRating] = model.predict(usersProducts)\n\n    // The following code construct predicted results from mllib's ratings.\n    // Not optimal implementation. Instead of groupBy, should use combineByKey with a PriorityQueue\n    val userRatings: RDD[(Int, Iterable[MLlibRating])] = ratings.groupBy(_.user)\n\n    userIxQueries.leftOuterJoin(userRatings)\n    .map {\n      // When there are ratings\n      case (userIx, ((ix, query), Some(ratings))) => {\n        val topItemScores: Array[ItemScore] = ratings\n        .toArray\n        .sortBy(_.rating)(Ordering.Double.reverse) // note: from large to small ordering\n        .take(query.num)\n        .map { rating => ItemScore(\n          model.itemStringIntMap.inverse(rating.product),\n          rating.rating) }\n\n        (ix, PredictedResult(itemScores = topItemScores))\n      }\n      // When user doesn't exist in training data\n      case (userIx, ((ix, query), None)) => {\n        require(userIx == -1)\n        (ix, PredictedResult(itemScores = Array.empty))\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/ALSModel.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.spark.mllib.recommendation\n// This must be the same package as Spark's MatrixFactorizationModel because\n// MatrixFactorizationModel's constructor is private and we are using\n// its constructor in order to save and load the model\n\nimport org.apache.predictionio.examples.recommendation.ALSAlgorithmParams\n\nimport org.apache.predictionio.controller.PersistentModel\nimport org.apache.predictionio.controller.PersistentModelLoader\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass ALSModel(\n    override val rank: Int,\n    override val userFeatures: RDD[(Int, Array[Double])],\n    override val productFeatures: RDD[(Int, Array[Double])],\n    val userStringIntMap: BiMap[String, Int],\n    val itemStringIntMap: BiMap[String, Int])\n  extends MatrixFactorizationModel(rank, userFeatures, productFeatures)\n  with PersistentModel[ALSAlgorithmParams] {\n\n  override\n  def save(id: String, params: ALSAlgorithmParams,\n    sc: SparkContext): Boolean = {\n\n    sc.parallelize(Seq(rank)).saveAsObjectFile(s\"/tmp/${id}/rank\")\n    userFeatures.saveAsObjectFile(s\"/tmp/${id}/userFeatures\")\n    productFeatures.saveAsObjectFile(s\"/tmp/${id}/productFeatures\")\n    sc.parallelize(Seq(userStringIntMap))\n      .saveAsObjectFile(s\"/tmp/${id}/userStringIntMap\")\n    sc.parallelize(Seq(itemStringIntMap))\n      .saveAsObjectFile(s\"/tmp/${id}/itemStringIntMap\")\n    true\n  }\n\n  override def toString = {\n    s\"userFeatures: [${userFeatures.count()}]\" +\n    s\"(${userFeatures.take(2).toList}...)\" +\n    s\" productFeatures: [${productFeatures.count()}]\" +\n    s\"(${productFeatures.take(2).toList}...)\" +\n    s\" userStringIntMap: [${userStringIntMap.size}]\" +\n    s\"(${userStringIntMap.take(2)}...)\" +\n    s\" itemStringIntMap: [${itemStringIntMap.size}]\" +\n    s\"(${itemStringIntMap.take(2)}...)\"\n  }\n}\n\nobject ALSModel\n  extends PersistentModelLoader[ALSAlgorithmParams, ALSModel] {\n  def apply(id: String, params: ALSAlgorithmParams,\n    sc: Option[SparkContext]) = {\n    new ALSModel(\n      rank = sc.get.objectFile[Int](s\"/tmp/${id}/rank\").first,\n      userFeatures = sc.get.objectFile(s\"/tmp/${id}/userFeatures\"),\n      productFeatures = sc.get.objectFile(s\"/tmp/${id}/productFeatures\"),\n      userStringIntMap = sc.get\n        .objectFile[BiMap[String, Int]](s\"/tmp/${id}/userStringIntMap\").first,\n      itemStringIntMap = sc.get\n        .objectFile[BiMap[String, Int]](s\"/tmp/${id}/itemStringIntMap\").first)\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/DataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.PDataSource\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EmptyActualResult\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.store.PEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nimport grizzled.slf4j.Logger\n\ncase class DataSourceEvalParams(kFold: Int, queryNum: Int)\n\ncase class DataSourceParams(\n  appName: String,\n  evalParams: Option[DataSourceEvalParams]) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, ActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  def getRatings(sc: SparkContext): RDD[Rating] = {\n\n    val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"view\")), // MODIFIED\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n\n    val ratingsRDD: RDD[Rating] = eventsRDD.map { event =>\n      try {\n        val ratingValue: Double = event.event match {\n          case \"view\" => 1.0 // MODIFIED\n          case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n        }\n        // MODIFIED\n        // key is (user id, item id)\n        // value is the rating value, which is 1.\n        ((event.entityId, event.targetEntityId.get), ratingValue)\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Cannot convert ${event} to Rating. Exception: ${e}.\")\n          throw e\n        }\n      }\n    }\n    // MODIFIED\n    // sum all values for the same user id and item id key\n    .reduceByKey { case (a, b) => a + b }\n    .map { case ((uid, iid), r) =>\n      Rating(uid, iid, r)\n    }.cache()\n\n    ratingsRDD\n  }\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n    new TrainingData(getRatings(sc))\n  }\n\n  override\n  def readEval(sc: SparkContext)\n  : Seq[(TrainingData, EmptyEvaluationInfo, RDD[(Query, ActualResult)])] = {\n    require(!dsp.evalParams.isEmpty, \"Must specify evalParams\")\n    val evalParams = dsp.evalParams.get\n\n    val kFold = evalParams.kFold\n    val ratings: RDD[(Rating, Long)] = getRatings(sc).zipWithUniqueId\n    ratings.cache\n\n    (0 until kFold).map { idx => {\n      val trainingRatings = ratings.filter(_._2 % kFold != idx).map(_._1)\n      val testingRatings = ratings.filter(_._2 % kFold == idx).map(_._1)\n\n      val testingUsers: RDD[(String, Iterable[Rating])] = testingRatings.groupBy(_.user)\n\n      (new TrainingData(trainingRatings),\n        new EmptyEvaluationInfo(),\n        testingUsers.map {\n          case (user, ratings) => (Query(user, evalParams.queryNum), ActualResult(ratings.toArray))\n        }\n      )\n    }}\n  }\n}\n\ncase class Rating(\n  user: String,\n  item: String,\n  rating: Double\n)\n\nclass TrainingData(\n  val ratings: RDD[Rating]\n) extends Serializable {\n  override def toString = {\n    s\"ratings: [${ratings.count()}] (${ratings.take(2).toList}...)\"\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.EngineFactory\nimport org.apache.predictionio.controller.Engine\n\ncase class Query(\n  user: String,\n  num: Int\n)\n\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n)\n\ncase class ActualResult(\n  ratings: Array[Rating]\n)\n\ncase class ItemScore(\n  item: String,\n  score: Double\n)\n\nobject RecommendationEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"als\" -> classOf[ALSAlgorithm]),\n      classOf[Serving])\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/Evaluation.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.Evaluation\nimport org.apache.predictionio.controller.OptionAverageMetric\nimport org.apache.predictionio.controller.AverageMetric\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EngineParamsGenerator\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.MetricEvaluator\n\n// Usage:\n// $ pio eval org.example.recommendation.RecommendationEvaluation \\\n//   org.example.recommendation.EngineParamsList\n\ncase class PrecisionAtK(k: Int, ratingThreshold: Double = 2.0)\n    extends OptionAverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  require(k > 0, \"k must be greater than 0\")\n\n  override def header = s\"Precision@K (k=$k, threshold=$ratingThreshold)\"\n\n  override\n  def calculate(q: Query, p: PredictedResult, a: ActualResult): Option[Double] = {\n    val positives: Set[String] = a.ratings.filter(_.rating >= ratingThreshold).map(_.item).toSet\n\n    // If there is no positive results, Precision is undefined. We don't consider this case in the\n    // metrics, hence we return None.\n    if (positives.size == 0) {\n      None\n    } else {\n      val tpCount: Int = p.itemScores.take(k).filter(is => positives(is.item)).size\n      Some(tpCount.toDouble / math.min(k, positives.size))\n    }\n  }\n}\n\ncase class PositiveCount(ratingThreshold: Double = 2.0)\n    extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  override def header = s\"PositiveCount (threshold=$ratingThreshold)\"\n\n  override\n  def calculate(q: Query, p: PredictedResult, a: ActualResult): Double = {\n    a.ratings.filter(_.rating >= ratingThreshold).size\n  }\n}\n\nobject RecommendationEvaluation extends Evaluation {\n  engineEvaluator = (\n    RecommendationEngine(),\n    MetricEvaluator(\n      metric = PrecisionAtK(k = 10, ratingThreshold = 4.0),\n      otherMetrics = Seq(\n        PositiveCount(ratingThreshold = 4.0),\n        PrecisionAtK(k = 10, ratingThreshold = 2.0),\n        PositiveCount(ratingThreshold = 2.0),\n        PrecisionAtK(k = 10, ratingThreshold = 1.0),\n        PositiveCount(ratingThreshold = 1.0)\n      )))\n}\n\n\nobject ComprehensiveRecommendationEvaluation extends Evaluation {\n  val ratingThresholds = Seq(0.0, 2.0, 4.0)\n  val ks = Seq(1, 3, 10)\n\n  engineEvaluator = (\n    RecommendationEngine(),\n    MetricEvaluator(\n      metric = PrecisionAtK(k = 3, ratingThreshold = 2.0),\n      otherMetrics = (\n        (for (r <- ratingThresholds) yield PositiveCount(ratingThreshold = r)) ++\n        (for (r <- ratingThresholds; k <- ks) yield PrecisionAtK(k = k, ratingThreshold = r))\n      )))\n}\n\n\ntrait BaseEngineParamsList extends EngineParamsGenerator {\n  protected val baseEP = EngineParams(\n    dataSourceParams = DataSourceParams(\n      appName = \"MyApp1\",\n      evalParams = Some(DataSourceEvalParams(kFold = 5, queryNum = 10))))\n}\n\nobject EngineParamsList extends BaseEngineParamsList {\n  engineParamsList = for(\n    rank <- Seq(5, 10, 20);\n    numIterations <- Seq(1, 5, 10))\n    yield baseEP.copy(\n      algorithmParamsList = Seq(\n        (\"als\", ALSAlgorithmParams(rank, numIterations, 0.01, Some(3)))))\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/Preparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.PPreparator\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  override\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(ratings = trainingData.ratings)\n  }\n}\n\nclass PreparedData(\n  val ratings: RDD[Rating]\n) extends Serializable\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/Serving.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.recommendation\n\nimport org.apache.predictionio.controller.LServing\n\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-recommendation/train-with-view-event/template.json",
    "content": "{\"pio\": {\"version\": { \"min\": \"0.10.0-incubating\" }}}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\nThis is based on Similar Product Template v0.14.0.\n\nPlease refer to https://predictionio.apache.org/templates/similarproduct/how-to/\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/.gitignore",
    "content": "manifest.json\ntarget/\npio.log\n/pio.sbt"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nname := \"template-scala-parallel-similarproduct\"\n\norganization := \"org.apache.predictionio\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\" % \"provided\",\n  \"org.apache.spark\"        %% \"spark-mllib\"              % \"2.4.0\" % \"provided\")\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/data/import_eventserver.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nImport sample data for similar product engine\n\"\"\"\n\nimport predictionio\nimport argparse\nimport random\n\nSEED = 3\n\ndef import_events(client):\n  random.seed(SEED)\n  count = 0\n  print(client.get_status())\n  print(\"Importing data...\")\n\n  # generate 10 users, with user ids u1,u2,....,u10\n  user_ids = [\"u%s\" % i for i in range(1, 11)]\n  for user_id in user_ids:\n    print(\"Set user\", user_id)\n    client.create_event(\n      event=\"$set\",\n      entity_type=\"user\",\n      entity_id=user_id\n    )\n    count += 1\n\n  # generate 50 items, with item ids i1,i2,....,i50\n  # random assign 1 to 4 categories among c1-c6 to items\n  categories = [\"c%s\" % i for i in range(1, 7)]\n  item_ids = [\"i%s\" % i for i in range(1, 51)]\n  for item_id in item_ids:\n    print(\"Set item\", item_id)\n    client.create_event(\n      event=\"$set\",\n      entity_type=\"item\",\n      entity_id=item_id,\n      properties={\n        \"categories\" : random.sample(categories, random.randint(1, 4))\n      }\n    )\n    count += 1\n\n  # each user randomly viewed 10 items\n  for user_id in user_ids:\n    for viewed_item in random.sample(item_ids, 10):\n      print(\"User\", user_id ,\"views item\", viewed_item)\n      client.create_event(\n        event=\"view\",\n        entity_type=\"user\",\n        entity_id=user_id,\n        target_entity_type=\"item\",\n        target_entity_id=viewed_item\n      )\n      count += 1\n\n  # each user randomly liked/disliked 10 items\n  for user_id in user_ids:\n    for viewed_item in random.sample(item_ids, 10):\n      if random.choice((False, True)) :\n        print \"User\", user_id ,\"likes item\", viewed_item\n        client.create_event(\n          event=\"like\",\n          entity_type=\"user\",\n          entity_id=user_id,\n          target_entity_type=\"item\",\n          target_entity_id=viewed_item\n        )\n      else:\n        print \"User\", user_id ,\"dislikes item\", viewed_item\n        client.create_event(\n          event=\"dislike\",\n          entity_type=\"user\",\n          entity_id=user_id,\n          target_entity_type=\"item\",\n          target_entity_id=viewed_item\n        )\n      count += 1\n\n  print(\"%s events are imported.\" % count)\n\nif __name__ == '__main__':\n  parser = argparse.ArgumentParser(\n    description=\"Import sample data for similar product engine\")\n  parser.add_argument('--access_key', default='invald_access_key')\n  parser.add_argument('--url', default=\"http://localhost:7070\")\n\n  args = parser.parse_args()\n  print(args)\n\n  client = predictionio.EventClient(\n    access_key=args.access_key,\n    url=args.url,\n    threads=5,\n    qsize=500)\n  import_events(client)\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/data/send_query.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nSend sample query to prediction engine\n\"\"\"\n\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint(engine_client.send_query({\"items\": [\"i1\", \"i3\"], \"num\": 4}))\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/engine-cooccurrence.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.similarproduct.SimilarProductEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"cooccurrence\",\n      \"params\": {\n        \"n\": 20\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.similarproduct.SimilarProductEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\" : 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    },\n    {\n      \"name\": \"likealgo\",\n      \"params\": {\n        \"rank\": 8,\n        \"numIterations\" : 15,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/project/build.properties",
    "content": "sbt.version=1.2.8\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/ALSAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.P2LAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.mllib.recommendation.ALS\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\n\nimport grizzled.slf4j.Logger\n\nimport scala.collection.mutable.PriorityQueue\n\ncase class ALSAlgorithmParams(\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]) extends Params\n\nclass ALSModel(\n  val productFeatures: Map[Int, Array[Double]],\n  val itemStringIntMap: BiMap[String, Int],\n  val items: Map[Int, Item]\n) extends Serializable {\n\n  @transient lazy val itemIntStringMap = itemStringIntMap.inverse\n\n  override def toString = {\n    s\" productFeatures: [${productFeatures.size}]\" +\n    s\"(${productFeatures.take(2).toList}...)\" +\n    s\" itemStringIntMap: [${itemStringIntMap.size}]\" +\n    s\"(${itemStringIntMap.take(2).toString}...)]\" +\n    s\" items: [${items.size}]\" +\n    s\"(${items.take(2).toString}...)]\"\n  }\n}\n\n/**\n  * Use ALS to build item x feature matrix\n  */\nclass ALSAlgorithm(val ap: ALSAlgorithmParams)\n  extends P2LAlgorithm[PreparedData, ALSModel, Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n    require(!data.viewEvents.take(1).isEmpty,\n      s\"viewEvents in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(!data.users.take(1).isEmpty,\n      s\"users in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(!data.items.take(1).isEmpty,\n      s\"items in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    // create User and item's String ID to integer index BiMap\n    val userStringIntMap = BiMap.stringInt(data.users.keys)\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    // collect Item as Map and convert ID to Int index\n    val items: Map[Int, Item] = data.items.map { case (id, item) =>\n      (itemStringIntMap(id), item)\n    }.collectAsMap.toMap\n\n    val mllibRatings = data.viewEvents\n      .map { r =>\n        // Convert user and item String IDs to Int index for MLlib\n        val uindex = userStringIntMap.getOrElse(r.user, -1)\n        val iindex = itemStringIntMap.getOrElse(r.item, -1)\n\n        if (uindex == -1)\n          logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n            + \" to Int index.\")\n\n        if (iindex == -1)\n          logger.info(s\"Couldn't convert nonexistent item ID ${r.item}\"\n            + \" to Int index.\")\n\n        ((uindex, iindex), 1)\n      }.filter { case ((u, i), v) =>\n        // keep events with valid user and item index\n        (u != -1) && (i != -1)\n      }.reduceByKey(_ + _) // aggregate all view events of same user-item pair\n      .map { case ((u, i), v) =>\n        // MLlibRating requires integer index for user and item\n        MLlibRating(u, i, v)\n      }\n      .cache()\n\n    // MLLib ALS cannot handle empty training data.\n    require(!mllibRatings.take(1).isEmpty,\n      s\"mllibRatings cannot be empty.\" +\n      \" Please check if your events contain valid user and item ID.\")\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    val m = ALS.trainImplicit(\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      alpha = 1.0,\n      seed = seed)\n\n    new ALSModel(\n      productFeatures = m.productFeatures.collectAsMap.toMap,\n      itemStringIntMap = itemStringIntMap,\n      items = items\n    )\n  }\n\n  override\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n\n    val productFeatures = model.productFeatures\n\n    // convert items to Int index\n    val queryList: Set[Int] = query.items.map(model.itemStringIntMap.get(_))\n      .flatten.toSet\n\n    val queryFeatures: Vector[Array[Double]] = queryList.toVector\n      // productFeatures may not contain the requested item\n      .map { item => productFeatures.get(item) }\n      .flatten\n\n    val whiteList: Option[Set[Int]] = query.whiteList.map( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n    val blackList: Option[Set[Int]] = query.blackList.map ( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n\n    val ord = Ordering.by[(Int, Double), Double](_._2).reverse\n\n    val indexScores: Array[(Int, Double)] = if (queryFeatures.isEmpty) {\n      logger.info(s\"No productFeatures vector for query items ${query.items}.\")\n      Array[(Int, Double)]()\n    } else {\n      productFeatures.par // convert to parallel collection\n        .mapValues { f =>\n          queryFeatures.map{ qf =>\n            cosine(qf, f)\n          }.reduce(_ + _)\n        }\n        .filter(_._2 > 0) // keep items with score > 0\n        .seq // convert back to sequential collection\n        .toArray\n    }\n\n    val filteredScore = indexScores.view.filter { case (i, v) =>\n      isCandidateItem(\n        i = i,\n        items = model.items,\n        categories = query.categories,\n        categoryBlackList = query.categoryBlackList,\n        queryList = queryList,\n        whiteList = whiteList,\n        blackList = blackList\n      )\n    }\n\n    val topScores = getTopN(filteredScore, query.num)(ord).toArray\n\n    val itemScores = topScores.map { case (i, s) =>\n      ItemScore(\n        item = model.itemIntStringMap(i),\n        score = s\n      )\n    }\n\n    PredictedResult(itemScores)\n  }\n\n  private\n  def getTopN[T](s: Seq[T], n: Int)(implicit ord: Ordering[T]): Seq[T] = {\n\n    val q = PriorityQueue()\n\n    for (x <- s) {\n      if (q.size < n)\n        q.enqueue(x)\n      else {\n        // q is full\n        if (ord.compare(x, q.head) < 0) {\n          q.dequeue()\n          q.enqueue(x)\n        }\n      }\n    }\n\n    q.dequeueAll.toSeq.reverse\n  }\n\n  private\n  def cosine(v1: Array[Double], v2: Array[Double]): Double = {\n    val size = v1.size\n    var i = 0\n    var n1: Double = 0\n    var n2: Double = 0\n    var d: Double = 0\n    while (i < size) {\n      n1 += v1(i) * v1(i)\n      n2 += v2(i) * v2(i)\n      d += v1(i) * v2(i)\n      i += 1\n    }\n    val n1n2 = (math.sqrt(n1) * math.sqrt(n2))\n    if (n1n2 == 0) 0 else (d / n1n2)\n  }\n\n  private\n  def isCandidateItem(\n    i: Int,\n    items: Map[Int, Item],\n    categories: Option[Set[String]],\n    categoryBlackList: Option[Set[String]],\n    queryList: Set[Int],\n    whiteList: Option[Set[Int]],\n    blackList: Option[Set[Int]]\n  ): Boolean = {\n    whiteList.map(_.contains(i)).getOrElse(true) &&\n    blackList.map(!_.contains(i)).getOrElse(true) &&\n    // discard items in query as well\n    (!queryList.contains(i)) &&\n    // filter categories\n    categories.map { cat =>\n      items(i).categories.map { itemCat =>\n        // keep this item if has ovelap categories with the query\n        !(itemCat.toSet.intersect(cat).isEmpty)\n      }.getOrElse(false) // discard this item if it has no categories\n    }.getOrElse(true) &&\n    categoryBlackList.map { cat =>\n      items(i).categories.map { itemCat =>\n        // discard this item if has ovelap categories with the query\n        (itemCat.toSet.intersect(cat).isEmpty)\n      }.getOrElse(true) // keep this item if it has no categories\n    }.getOrElse(true)\n  }\n\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/CooccurrenceAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.P2LAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\ncase class CooccurrenceAlgorithmParams(\n  n: Int // top co-occurrence\n) extends Params\n\nclass CooccurrenceModel(\n  val topCooccurrences: Map[Int, Array[(Int, Int)]],\n  val itemStringIntMap: BiMap[String, Int],\n  val items: Map[Int, Item]\n) extends Serializable {\n  @transient lazy val itemIntStringMap = itemStringIntMap.inverse\n\n  override def toString(): String = {\n    val s = topCooccurrences.mapValues { v => v.mkString(\",\") }\n    s.toString\n  }\n}\n\nclass CooccurrenceAlgorithm(val ap: CooccurrenceAlgorithmParams)\n  extends P2LAlgorithm[PreparedData, CooccurrenceModel, Query, PredictedResult] {\n\n  override\n  def train(sc: SparkContext, data: PreparedData): CooccurrenceModel = {\n\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    val topCooccurrences = trainCooccurrence(\n      events = data.viewEvents,\n      n = ap.n,\n      itemStringIntMap = itemStringIntMap\n    )\n\n    // collect Item as Map and convert ID to Int index\n    val items: Map[Int, Item] = data.items.map { case (id, item) =>\n      (itemStringIntMap(id), item)\n    }.collectAsMap.toMap\n\n    new CooccurrenceModel(\n      topCooccurrences = topCooccurrences,\n      itemStringIntMap = itemStringIntMap,\n      items = items\n    )\n\n  }\n\n  /* given the user-item events, find out top n co-occurrence pair for each item */\n  def trainCooccurrence(\n    events: RDD[ViewEvent],\n    n: Int,\n    itemStringIntMap: BiMap[String, Int]): Map[Int, Array[(Int, Int)]] = {\n\n    val userItem = events\n      // map item from string to integer index\n      .flatMap {\n        case ViewEvent(user, item, _) if itemStringIntMap.contains(item) =>\n          Some(user, itemStringIntMap(item))\n        case _ => None\n      }\n      // if user view same item multiple times, only count as once\n      .distinct()\n      .cache()\n\n    val cooccurrences: RDD[((Int, Int), Int)] = userItem.join(userItem)\n      // remove duplicate pair in reversed order for each user. eg. (a,b) vs. (b,a)\n      .filter { case (user, (item1, item2)) => item1 < item2 }\n      .map { case (user, (item1, item2)) => ((item1, item2), 1) }\n      .reduceByKey{ (a: Int, b: Int) => a + b }\n\n    val topCooccurrences = cooccurrences\n      .flatMap{ case (pair, count) =>\n        Seq((pair._1, (pair._2, count)), (pair._2, (pair._1, count)))\n      }\n      .groupByKey\n      .map { case (item, itemCounts) =>\n        (item, itemCounts.toArray.sortBy(_._2)(Ordering.Int.reverse).take(n))\n      }\n      .collectAsMap.toMap\n\n    topCooccurrences\n  }\n\n  override\n  def predict(model: CooccurrenceModel, query: Query): PredictedResult = {\n\n    // convert items to Int index\n    val queryList: Set[Int] = query.items\n      .flatMap(model.itemStringIntMap.get(_))\n      .toSet\n\n    val whiteList: Option[Set[Int]] = query.whiteList.map( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n\n    val blackList: Option[Set[Int]] = query.blackList.map ( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n\n    val counts: Array[(Int, Int)] = queryList.toVector\n      .flatMap { q =>\n        model.topCooccurrences.getOrElse(q, Array())\n      }\n      .groupBy { case (index, count) => index }\n      .map { case (index, indexCounts) => (index, indexCounts.map(_._2).sum) }\n      .toArray\n\n    val itemScores = counts\n      .filter { case (i, v) =>\n        isCandidateItem(\n          i = i,\n          items = model.items,\n          categories = query.categories,\n          queryList = queryList,\n          whiteList = whiteList,\n          blackList = blackList\n        )\n      }\n      .sortBy(_._2)(Ordering.Int.reverse)\n      .take(query.num)\n      .map { case (index, count) =>\n        ItemScore(\n          item = model.itemIntStringMap(index),\n          score = count\n        )\n      }\n\n    PredictedResult(itemScores)\n\n  }\n\n  private\n  def isCandidateItem(\n    i: Int,\n    items: Map[Int, Item],\n    categories: Option[Set[String]],\n    queryList: Set[Int],\n    whiteList: Option[Set[Int]],\n    blackList: Option[Set[Int]]\n  ): Boolean = {\n    whiteList.map(_.contains(i)).getOrElse(true) &&\n    blackList.map(!_.contains(i)).getOrElse(true) &&\n    // discard items in query as well\n    (!queryList.contains(i)) &&\n    // filter categories\n    categories.map { cat =>\n      items(i).categories.map { itemCat =>\n        // keep this item if has ovelap categories with the query\n        !(itemCat.toSet.intersect(cat).isEmpty)\n      }.getOrElse(false) // discard this item if it has no categories\n    }.getOrElse(true)\n  }\n\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/DataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.PDataSource\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EmptyActualResult\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.store.PEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nimport grizzled.slf4j.Logger\n\ncase class DataSourceParams(appName: String) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    // create a RDD of (entityID, User)\n    val usersRDD: RDD[(String, User)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"user\"\n    )(sc).map { case (entityId, properties) =>\n      val user = try {\n        User()\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties ${properties} of\" +\n            s\" user ${entityId}. Exception: ${e}.\")\n          throw e\n        }\n      }\n      (entityId, user)\n    }.cache()\n\n    // create a RDD of (entityID, Item)\n    val itemsRDD: RDD[(String, Item)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"item\"\n    )(sc).map { case (entityId, properties) =>\n      val item = try {\n        // Assume categories is optional property of item.\n        Item(categories = properties.getOpt[List[String]](\"categories\"))\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties ${properties} of\" +\n            s\" item ${entityId}. Exception: ${e}.\")\n          throw e\n        }\n      }\n      (entityId, item)\n    }.cache()\n\n    // get all \"user\" \"view\" \"item\" events\n    val viewEventsRDD: RDD[ViewEvent] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"view\")),\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n      // eventsDb.find() returns RDD[Event]\n      .map { event =>\n        val viewEvent = try {\n          event.event match {\n            case \"view\" => ViewEvent(\n              user = event.entityId,\n              item = event.targetEntityId.get,\n              t = event.eventTime.getMillis)\n            case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n          }\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Cannot convert ${event} to ViewEvent.\" +\n              s\" Exception: ${e}.\")\n            throw e\n          }\n        }\n        viewEvent\n      }.cache()\n\n    // ADDED\n    // get all \"user\" \"like\" and \"dislike\" \"item\" events\n    val likeEventsRDD: RDD[LikeEvent] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"like\", \"dislike\")),\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n      // eventsDb.find() returns RDD[Event]\n      .map { event =>\n        val likeEvent = try {\n          event.event match {\n            case \"like\" | \"dislike\" => LikeEvent(\n              user = event.entityId,\n              item = event.targetEntityId.get,\n              t = event.eventTime.getMillis,\n              like = (event.event == \"like\"))\n            case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n          }\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Cannot convert ${event} to LikeEvent.\" +\n              s\" Exception: ${e}.\")\n            throw e\n          }\n        }\n        likeEvent\n      }.cache()\n\n    new TrainingData(\n      users = usersRDD,\n      items = itemsRDD,\n      viewEvents = viewEventsRDD,\n      likeEvents = likeEventsRDD // ADDED\n    )\n  }\n}\n\ncase class User()\n\ncase class Item(categories: Option[List[String]])\n\ncase class ViewEvent(user: String, item: String, t: Long)\n\ncase class LikeEvent( // ADDED\n  user: String,\n  item: String,\n  t: Long,\n  like: Boolean // true: like. false: dislike\n)\n\nclass TrainingData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent],\n  val likeEvents: RDD[LikeEvent] // ADDED\n) extends Serializable {\n  override def toString = {\n    s\"users: [${users.count()} (${users.take(2).toList}...)]\" +\n    s\"items: [${items.count()} (${items.take(2).toList}...)]\" +\n    s\"viewEvents: [${viewEvents.count()}] (${viewEvents.take(2).toList}...)\" +\n    // ADDED\n    s\"likeEvents: [${likeEvents.count()}] (${likeEvents.take(2).toList}...)\"\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.EngineFactory\nimport org.apache.predictionio.controller.Engine\n\ncase class Query(\n  items: List[String],\n  num: Int,\n  categories: Option[Set[String]],\n  categoryBlackList: Option[Set[String]],\n  whiteList: Option[Set[String]],\n  blackList: Option[Set[String]]\n)\n\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n){\n  override def toString: String = itemScores.mkString(\",\")\n}\n\ncase class ItemScore(\n  item: String,\n  score: Double\n)\n\nobject SimilarProductEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\n        \"als\" -> classOf[ALSAlgorithm],\n        \"cooccurrence\" -> classOf[CooccurrenceAlgorithm],\n        \"likealgo\" -> classOf[LikeAlgorithm]), // ADDED\n      classOf[Serving])\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/LikeAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.mllib.recommendation.ALS\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\n\nimport grizzled.slf4j.Logger\n\n// ADDED\n// Extend original ALSAlgorithm and override train() function to handle\n// like and dislike events\nclass LikeAlgorithm(ap: ALSAlgorithmParams) extends ALSAlgorithm(ap) {\n\n  @transient lazy override val logger = Logger[this.type]\n\n  override\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n    require(!data.likeEvents.take(1).isEmpty,\n      s\"likeEvents in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(!data.users.take(1).isEmpty,\n      s\"users in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(!data.items.take(1).isEmpty,\n      s\"items in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    // create User and item's String ID to integer index BiMap\n    val userStringIntMap = BiMap.stringInt(data.users.keys)\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    // collect Item as Map and convert ID to Int index\n    val items: Map[Int, Item] = data.items.map { case (id, item) =>\n      (itemStringIntMap(id), item)\n    }.collectAsMap.toMap\n\n    val mllibRatings = data.likeEvents\n      .map { r =>\n        // Convert user and item String IDs to Int index for MLlib\n        val uindex = userStringIntMap.getOrElse(r.user, -1)\n        val iindex = itemStringIntMap.getOrElse(r.item, -1)\n\n        if (uindex == -1)\n          logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n            + \" to Int index.\")\n\n        if (iindex == -1)\n          logger.info(s\"Couldn't convert nonexistent item ID ${r.item}\"\n            + \" to Int index.\")\n\n        // key is (uindex, iindex) tuple, value is (like, t) tuple\n        ((uindex, iindex), (r.like, r.t))\n      }.filter { case ((u, i), v) =>\n        // keep events with valid user and item index\n        (u != -1) && (i != -1)\n      }.reduceByKey { case (v1, v2) => // MODIFIED\n        // An user may like an item and change to dislike it later,\n        // or vice versa. Use the latest value for this case.\n        val (like1, t1) = v1\n        val (like2, t2) = v2\n        // keep the latest value\n        if (t1 > t2) v1 else v2\n      }.map { case ((u, i), (like, t)) => // MODIFIED\n        // With ALS.trainImplicit(), we can use negative value to indicate\n        // nagative siginal (ie. dislike)\n        val r = if (like) 1 else -1\n        // MLlibRating requires integer index for user and item\n        MLlibRating(u, i, r)\n      }\n      .cache()\n\n    // MLLib ALS cannot handle empty training data.\n    require(!mllibRatings.take(1).isEmpty,\n      s\"mllibRatings cannot be empty.\" +\n      \" Please check if your events contain valid user and item ID.\")\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    val m = ALS.trainImplicit(\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      alpha = 1.0,\n      seed = seed)\n\n    new ALSModel(\n      productFeatures = m.productFeatures.collectAsMap.toMap,\n      itemStringIntMap = itemStringIntMap,\n      items = items\n    )\n  }\n\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/Preparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.PPreparator\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  override\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(\n      users = trainingData.users,\n      items = trainingData.items,\n      viewEvents = trainingData.viewEvents,\n      likeEvents = trainingData.likeEvents) // ADDED\n  }\n}\n\nclass PreparedData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent],\n  val likeEvents: RDD[LikeEvent] // ADDED\n) extends Serializable\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/Serving.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.LServing\n\nimport breeze.stats.meanAndVariance\nimport breeze.stats.MeanAndVariance\n\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n\n    // MODFIED\n    val standard: Seq[Array[ItemScore]] = if (query.num == 1) {\n      // if query 1 item, don't standardize\n      predictedResults.map(_.itemScores)\n    } else {\n      // Standardize the score before combine\n      val mvList: Seq[MeanAndVariance] = predictedResults.map { pr =>\n        meanAndVariance(pr.itemScores.map(_.score))\n      }\n\n      predictedResults.zipWithIndex\n        .map { case (pr, i) =>\n          pr.itemScores.map { is =>\n            // standardize score (z-score)\n            // if standard deviation is 0 (when all items have the same score,\n            // meaning all items are ranked equally), return 0.\n            val score = if (mvList(i).stdDev == 0) {\n              0\n            } else {\n              (is.score - mvList(i).mean) / mvList(i).stdDev\n            }\n\n            ItemScore(is.item, score)\n          }\n        }\n    }\n\n    // sum the standardized score if same item\n    val combined = standard.flatten // Array of ItemScore\n      .groupBy(_.item) // groupBy item id\n      .mapValues(itemScores => itemScores.map(_.score).reduce(_ + _))\n      .toArray // array of (item id, score)\n      .sortBy(_._2)(Ordering.Double.reverse)\n      .take(query.num)\n      .map { case (k,v) => ItemScore(k, v) }\n\n    PredictedResult(combined)\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/multi-events-multi-algos/template.json",
    "content": "{\"pio\": {\"version\": { \"min\": \"0.10.0-incubating\" }}}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/recommended-user/.gitignore",
    "content": "manifest.json\ntarget/\npio.log\n/pio.sbt"
  },
  {
    "path": "examples/scala-parallel-similarproduct/recommended-user/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nname := \"template-scala-parallel-recommendeduser\"\n\norganization := \"org.apache.predictionio\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\" % \"provided\",\n  \"org.apache.spark\"        %% \"spark-mllib\"              % \"2.4.0\" % \"provided\")\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/recommended-user/data/import_eventserver.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nImport sample data for recommended user engine\n\"\"\"\n\nimport predictionio\nimport argparse\nimport random\n\nSEED = 3\n\ndef import_events(client):\n  random.seed(SEED)\n  count = 0\n  print(client.get_status())\n  print(\"Importing data...\")\n\n  # generate 50 users, with user ids u1,u2,....,u50\n  user_ids = [\"u%s\" % i for i in range(1, 51)]\n  for user_id in user_ids:\n    print(\"Set user\", user_id)\n    client.create_event(\n      event=\"$set\",\n      entity_type=\"user\",\n      entity_id=user_id\n    )\n    count += 1\n\n  # each user randomly follows 10 users\n  for user_id in user_ids:\n    for followed_user in random.sample(user_ids, 10):\n      print \"User\", user_id ,\"follows User\", followed_user\n      client.create_event(\n        event=\"follow\",\n        entity_type=\"user\",\n        entity_id=user_id,\n        target_entity_type=\"user\",\n        target_entity_id=followed_user\n      )\n      count += 1\n\n  print(\"%s events are imported.\" % count)\n\nif __name__ == '__main__':\n  parser = argparse.ArgumentParser(\n    description=\"Import sample data for recommended user engine\")\n  parser.add_argument('--access_key', default='invald_access_key')\n  parser.add_argument('--url', default=\"http://localhost:7070\")\n\n  args = parser.parse_args()\n  print(args)\n\n  client = predictionio.EventClient(\n    access_key=args.access_key,\n    url=args.url,\n    threads=5,\n    qsize=500)\n  import_events(client)\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/recommended-user/data/send_query.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nSend sample query to prediction engine\n\"\"\"\n\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint(engine_client.send_query({\"users\": [\"u1\", \"u3\"], \"num\": 10}))\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/recommended-user/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.similarproduct.RecommendedUserEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\" : 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/recommended-user/project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/recommended-user/project/build.properties",
    "content": "sbt.version=1.2.8\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/recommended-user/src/main/scala/ALSAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport grizzled.slf4j.Logger\nimport org.apache.predictionio.controller.{P2LAlgorithm, Params}\nimport org.apache.predictionio.data.storage.BiMap\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.mllib.recommendation.{ALS, Rating => MLlibRating}\n\nimport scala.collection.mutable\n\ncase class ALSAlgorithmParams(\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]) extends Params\n\nclass ALSModel(\n  val similarUserFeatures: Map[Int, Array[Double]],\n  val similarUserStringIntMap: BiMap[String, Int],\n  val similarUsers: Map[Int, User]\n) extends Serializable {\n\n  @transient lazy val similarUserIntStringMap = similarUserStringIntMap.inverse\n\n  override def toString = {\n    s\" similarUserFeatures: [${similarUserFeatures.size}]\" +\n    s\"(${similarUserFeatures.take(2).toList}...)\" +\n    s\" similarUserStringIntMap: [${similarUserStringIntMap.size}]\" +\n    s\"(${similarUserStringIntMap.take(2).toString()}...)]\" +\n    s\" users: [${similarUsers.size}]\" +\n    s\"(${similarUsers.take(2).toString()}...)]\"\n  }\n}\n\n/**\n  * Use ALS to build user x feature matrix\n  */\nclass ALSAlgorithm(val ap: ALSAlgorithmParams)\n  extends P2LAlgorithm[PreparedData, ALSModel, Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n    require(data.followEvents.take(1).nonEmpty,\n      s\"followEvents in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(data.users.take(1).nonEmpty,\n      s\"users in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    // create User String ID to integer index BiMap\n    val userStringIntMap = BiMap.stringInt(data.users.keys)\n    val similarUserStringIntMap = userStringIntMap\n\n    // collect SimilarUser as Map and convert ID to Int index\n    val similarUsers: Map[Int, User] = data.users.map { case (id, similarUser) =>\n      (similarUserStringIntMap(id), similarUser)\n    }.collectAsMap().toMap\n\n    val mllibRatings = data.followEvents\n      .map { r =>\n        // Convert user and user String IDs to Int index for MLlib\n        val uindex = userStringIntMap.getOrElse(r.user, -1)\n        val iindex = similarUserStringIntMap.getOrElse(r.followedUser, -1)\n\n        if (uindex == -1)\n          logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n            + \" to Int index.\")\n\n        if (iindex == -1)\n          logger.info(s\"Couldn't convert nonexistent followedUser ID ${r.followedUser}\"\n            + \" to Int index.\")\n\n        ((uindex, iindex), 1)\n      }.filter { case ((u, i), v) =>\n        // keep events with valid user and user index\n        (u != -1) && (i != -1)\n      }\n      .map { case ((u, i), v) =>\n        // MLlibRating requires integer index for user and user\n        MLlibRating(u, i, v)\n      }\n      .cache()\n\n    // MLLib ALS cannot handle empty training data.\n    require(mllibRatings.take(1).nonEmpty,\n      s\"mllibRatings cannot be empty.\" +\n      \" Please check if your events contain valid user and followedUser ID.\")\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    val m = ALS.trainImplicit(\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      alpha = 1.0,\n      seed = seed)\n\n    new ALSModel(\n      similarUserFeatures = m.productFeatures.collectAsMap().toMap,\n      similarUserStringIntMap = similarUserStringIntMap,\n      similarUsers = similarUsers\n    )\n  }\n\n  override\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n\n    val similarUserFeatures = model.similarUserFeatures\n\n    // convert similarUsers to Int index\n    val queryList: Set[Int] = query.users.map(model.similarUserStringIntMap.get)\n      .flatten.toSet\n\n    val queryFeatures: Vector[Array[Double]] = queryList.toVector\n      // similarUserFeatures may not contain the requested user\n      .map { similarUser => similarUserFeatures.get(similarUser) }\n      .flatten\n\n    val whiteList: Option[Set[Int]] = query.whiteList.map( set =>\n      set.map(model.similarUserStringIntMap.get).flatten\n    )\n    val blackList: Option[Set[Int]] = query.blackList.map ( set =>\n      set.map(model.similarUserStringIntMap.get).flatten\n    )\n\n    val ord = Ordering.by[(Int, Double), Double](_._2).reverse\n\n    val indexScores: Array[(Int, Double)] = if (queryFeatures.isEmpty) {\n      logger.info(s\"No similarUserFeatures vector for query users ${query.users}.\")\n      Array[(Int, Double)]()\n    } else {\n      similarUserFeatures.par // convert to parallel collection\n        .mapValues { f =>\n          queryFeatures.map { qf =>\n            cosine(qf, f)\n          }.sum\n        }\n        .filter(_._2 > 0) // keep similarUsers with score > 0\n        .seq // convert back to sequential collection\n        .toArray\n    }\n\n    val filteredScore = indexScores.view.filter { case (i, v) =>\n      isCandidateSimilarUser(\n        i = i,\n        similarUsers = model.similarUsers,\n        queryList = queryList,\n        whiteList = whiteList,\n        blackList = blackList\n      )\n    }\n\n    val topScores = getTopN(filteredScore, query.num)(ord).toArray\n\n    val similarUserScores = topScores.map { case (i, s) =>\n      SimilarUserScore(\n        user = model.similarUserIntStringMap(i),\n        score = s\n      )\n    }\n\n    PredictedResult(similarUserScores)\n  }\n\n  private\n  def getTopN[T](s: Seq[T], n: Int)(implicit ord: Ordering[T]): Seq[T] = {\n    val q = mutable.PriorityQueue()\n\n    for (x <- s) {\n      if (q.size < n)\n        q.enqueue(x)\n      else {\n        // q is full\n        if (ord.compare(x, q.head) < 0) {\n          q.dequeue()\n          q.enqueue(x)\n        }\n      }\n    }\n\n    q.dequeueAll.toSeq.reverse\n  }\n\n  private\n  def cosine(v1: Array[Double], v2: Array[Double]): Double = {\n    val size = v1.length\n    var i = 0\n    var n1: Double = 0\n    var n2: Double = 0\n    var d: Double = 0\n    while (i < size) {\n      n1 += v1(i) * v1(i)\n      n2 += v2(i) * v2(i)\n      d += v1(i) * v2(i)\n      i += 1\n    }\n    val n1n2 = math.sqrt(n1) * math.sqrt(n2)\n    if (n1n2 == 0) 0 else d / n1n2\n  }\n\n  private\n  def isCandidateSimilarUser(\n    i: Int,\n    similarUsers: Map[Int, User],\n    queryList: Set[Int],\n    whiteList: Option[Set[Int]],\n    blackList: Option[Set[Int]]\n  ): Boolean = {\n    whiteList.map(_.contains(i)).getOrElse(true) &&\n    blackList.map(!_.contains(i)).getOrElse(true) &&\n    // discard similarUsers in query as well\n    (!queryList.contains(i))\n  }\n\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/recommended-user/src/main/scala/DataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.{EmptyActualResult, EmptyEvaluationInfo, PDataSource, Params}\nimport org.apache.predictionio.data.store.PEventStore\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\nimport grizzled.slf4j.Logger\n\ncase class DataSourceParams(appName: String) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    // create a RDD of (entityID, User)\n    val usersRDD: RDD[(String, User)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"user\"\n    )(sc).map { case (entityId, properties) =>\n      val user = try {\n        User()\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties $properties of\" +\n            s\" user $entityId. Exception: $e.\")\n          throw e\n        }\n      }\n      (entityId, user)\n    }.cache()\n\n    // get all \"user\" \"follow\" \"followedUser\" events\n    val followEventsRDD: RDD[FollowEvent] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"follow\")),\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"user\")))(sc)\n      // eventsDb.find() returns RDD[Event]\n      .map { event =>\n        val followEvent = try {\n          event.event match {\n            case \"follow\" => FollowEvent(\n              user = event.entityId,\n              followedUser = event.targetEntityId.get,\n              t = event.eventTime.getMillis)\n            case _ => throw new Exception(s\"Unexpected event $event is read.\")\n          }\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Cannot convert $event to FollowEvent.\" +\n              s\" Exception: $e.\")\n            throw e\n          }\n        }\n        followEvent\n      }.cache()\n\n    new TrainingData(\n      users = usersRDD,\n      followEvents = followEventsRDD\n    )\n  }\n}\n\ncase class User()\n\ncase class FollowEvent(user: String, followedUser: String, t: Long)\n\nclass TrainingData(\n  val users: RDD[(String, User)],\n  val followEvents: RDD[FollowEvent]\n) extends Serializable {\n  override def toString = {\n    s\"users: [${users.count()} (${users.take(2).toList}...)]\" +\n    s\"followEvents: [${followEvents.count()}] (${followEvents.take(2).toList}...)\"\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/recommended-user/src/main/scala/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.EngineFactory\nimport org.apache.predictionio.controller.Engine\n\ncase class Query(\n  users: List[String],\n  num: Int,\n  whiteList: Option[Set[String]],\n  blackList: Option[Set[String]]\n)\n\ncase class PredictedResult(\n  similarUserScores: Array[SimilarUserScore]\n){\n  override def toString: String = similarUserScores.mkString(\",\")\n}\n\ncase class SimilarUserScore(\n  user: String,\n  score: Double\n)\n\nobject RecommendedUserEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"als\" -> classOf[ALSAlgorithm]),\n      classOf[Serving])\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/recommended-user/src/main/scala/Preparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.PPreparator\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  override\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(\n      users = trainingData.users,\n      followEvents = trainingData.followEvents)\n  }\n}\n\nclass PreparedData(\n  val users: RDD[(String, User)],\n  val followEvents: RDD[FollowEvent]\n) extends Serializable\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/recommended-user/src/main/scala/Serving.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.LServing\n\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/recommended-user/template.json",
    "content": "{\"pio\": {\"version\": { \"min\": \"0.10.0-incubating\" }}}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/return-item-properties/.gitignore",
    "content": "manifest.json\ntarget/\npio.log\n/pio.sbt"
  },
  {
    "path": "examples/scala-parallel-similarproduct/return-item-properties/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nname := \"template-scala-parallel-similarproduct\"\n\norganization := \"org.apache.predictionio\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\" % \"provided\",\n  \"org.apache.spark\"        %% \"spark-mllib\"              % \"2.4.0\" % \"provided\")\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/return-item-properties/data/import_eventserver.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nImport sample data for similar product engine\n\"\"\"\n\nimport predictionio\nimport argparse\nimport random\n\nSEED = 3\n\ndef import_events(client):\n  random.seed(SEED)\n  count = 0\n  print(client.get_status())\n  print(\"Importing data...\")\n\n  # generate 10 users, with user ids u1,u2,....,u10\n  user_ids = [\"u%s\" % i for i in range(1, 11)]\n  for user_id in user_ids:\n    print(\"Set user\", user_id)\n    client.create_event(\n      event=\"$set\",\n      entity_type=\"user\",\n      entity_id=user_id\n    )\n    count += 1\n\n  # generate 50 items, with item ids i1,i2,....,i50\n  # random assign 1 to 4 categories among c1-c6 to items\n  categories = [\"c%s\" % i for i in range(1, 7)]\n  item_ids = [\"i%s\" % i for i in range(1, 51)]\n  for item_id in item_ids:\n    print(\"Set item\", item_id)\n    client.create_event(\n      event=\"$set\",\n      entity_type=\"item\",\n      entity_id=item_id,\n      properties={\n        \"categories\" : random.sample(categories, random.randint(1, 4)),\n        \"title\": \"title for movie \" + item_id,\n        \"date\": 1935 + random.randint(1, 25),\n        \"imdbUrl\": \"http://imdb.com/fake-url/\" + item_id\n      }\n    )\n    count += 1\n\n  # each user randomly viewed 10 items\n  for user_id in user_ids:\n    for viewed_item in random.sample(item_ids, 10):\n      print(\"User\", user_id ,\"views item\", viewed_item)\n      client.create_event(\n        event=\"view\",\n        entity_type=\"user\",\n        entity_id=user_id,\n        target_entity_type=\"item\",\n        target_entity_id=viewed_item\n      )\n      count += 1\n\n  print(\"%s events are imported.\" % count)\n\nif __name__ == '__main__':\n  parser = argparse.ArgumentParser(\n    description=\"Import sample data for similar product engine\")\n  parser.add_argument('--access_key', default='invald_access_key')\n  parser.add_argument('--url', default=\"http://localhost:7070\")\n\n  args = parser.parse_args()\n  print(args)\n\n  client = predictionio.EventClient(\n    access_key=args.access_key,\n    url=args.url,\n    threads=5,\n    qsize=500)\n  import_events(client)\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/return-item-properties/data/send_query.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nSend sample query to prediction engine\n\"\"\"\n\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint(engine_client.send_query({\"items\": [\"i1\", \"i3\"], \"num\": 4}))\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/return-item-properties/engine-cooccurrence.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.similarproduct.SimilarProductEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"cooccurrence\",\n      \"params\": {\n        \"n\": 20\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/return-item-properties/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.similarproduct.SimilarProductEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\" : 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/return-item-properties/project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/return-item-properties/project/build.properties",
    "content": "sbt.version=1.2.8\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/ALSAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.P2LAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.mllib.recommendation.ALS\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\n\nimport grizzled.slf4j.Logger\n\nimport scala.collection.mutable.PriorityQueue\n\ncase class ALSAlgorithmParams(\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]) extends Params\n\nclass ALSModel(\n  val productFeatures: Map[Int, Array[Double]],\n  val itemStringIntMap: BiMap[String, Int],\n  val items: Map[Int, Item]\n) extends Serializable {\n\n  @transient lazy val itemIntStringMap = itemStringIntMap.inverse\n\n  override def toString = {\n    s\" productFeatures: [${productFeatures.size}]\" +\n    s\"(${productFeatures.take(2).toList}...)\" +\n    s\" itemStringIntMap: [${itemStringIntMap.size}]\" +\n    s\"(${itemStringIntMap.take(2).toString}...)]\" +\n    s\" items: [${items.size}]\" +\n    s\"(${items.take(2).toString}...)]\"\n  }\n}\n\n/**\n  * Use ALS to build item x feature matrix\n  */\nclass ALSAlgorithm(val ap: ALSAlgorithmParams)\n  extends P2LAlgorithm[PreparedData, ALSModel, Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n    require(!data.viewEvents.take(1).isEmpty,\n      s\"viewEvents in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(!data.users.take(1).isEmpty,\n      s\"users in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(!data.items.take(1).isEmpty,\n      s\"items in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    // create User and item's String ID to integer index BiMap\n    val userStringIntMap = BiMap.stringInt(data.users.keys)\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    // collect Item as Map and convert ID to Int index\n    val items: Map[Int, Item] = data.items.map { case (id, item) =>\n      (itemStringIntMap(id), item)\n    }.collectAsMap.toMap\n\n    val mllibRatings = data.viewEvents\n      .map { r =>\n        // Convert user and item String IDs to Int index for MLlib\n        val uindex = userStringIntMap.getOrElse(r.user, -1)\n        val iindex = itemStringIntMap.getOrElse(r.item, -1)\n\n        if (uindex == -1)\n          logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n            + \" to Int index.\")\n\n        if (iindex == -1)\n          logger.info(s\"Couldn't convert nonexistent item ID ${r.item}\"\n            + \" to Int index.\")\n\n        ((uindex, iindex), 1)\n      }.filter { case ((u, i), v) =>\n        // keep events with valid user and item index\n        (u != -1) && (i != -1)\n      }.reduceByKey(_ + _) // aggregate all view events of same user-item pair\n      .map { case ((u, i), v) =>\n        // MLlibRating requires integer index for user and item\n        MLlibRating(u, i, v)\n      }\n      .cache()\n\n    // MLLib ALS cannot handle empty training data.\n    require(!mllibRatings.take(1).isEmpty,\n      s\"mllibRatings cannot be empty.\" +\n      \" Please check if your events contain valid user and item ID.\")\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    val m = ALS.trainImplicit(\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      alpha = 1.0,\n      seed = seed)\n\n    new ALSModel(\n      productFeatures = m.productFeatures.collectAsMap.toMap,\n      itemStringIntMap = itemStringIntMap,\n      items = items\n    )\n  }\n\n  override\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n\n    val productFeatures = model.productFeatures\n\n    // convert items to Int index\n    val queryList: Set[Int] = query.items.map(model.itemStringIntMap.get(_))\n      .flatten.toSet\n\n    val queryFeatures: Vector[Array[Double]] = queryList.toVector\n      // productFeatures may not contain the requested item\n      .map { item => productFeatures.get(item) }\n      .flatten\n\n    val whiteList: Option[Set[Int]] = query.whiteList.map( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n    val blackList: Option[Set[Int]] = query.blackList.map ( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n\n    val ord = Ordering.by[(Int, Double), Double](_._2).reverse\n\n    val indexScores: Array[(Int, Double)] = if (queryFeatures.isEmpty) {\n      logger.info(s\"No productFeatures vector for query items ${query.items}.\")\n      Array[(Int, Double)]()\n    } else {\n      productFeatures.par // convert to parallel collection\n        .mapValues { f =>\n          queryFeatures.map{ qf =>\n            cosine(qf, f)\n          }.reduce(_ + _)\n        }\n        .filter(_._2 > 0) // keep items with score > 0\n        .seq // convert back to sequential collection\n        .toArray\n    }\n\n    val filteredScore = indexScores.view.filter { case (i, v) =>\n      isCandidateItem(\n        i = i,\n        items = model.items,\n        categories = query.categories,\n        categoryBlackList = query.categoryBlackList,\n        queryList = queryList,\n        whiteList = whiteList,\n        blackList = blackList\n      )\n    }\n\n    val topScores = getTopN(filteredScore, query.num)(ord).toArray\n\n    val itemScores = topScores.map { case (i, s) =>\n      // MODIFIED\n      val it = model.items(i)\n      ItemScore(\n        item = model.itemIntStringMap(i),\n        title = it.title,\n        date = it.date,\n        imdbUrl = it.imdbUrl,\n        score = s\n      )\n    }\n\n    PredictedResult(itemScores)\n  }\n\n  private\n  def getTopN[T](s: Seq[T], n: Int)(implicit ord: Ordering[T]): Seq[T] = {\n\n    val q = PriorityQueue()\n\n    for (x <- s) {\n      if (q.size < n)\n        q.enqueue(x)\n      else {\n        // q is full\n        if (ord.compare(x, q.head) < 0) {\n          q.dequeue()\n          q.enqueue(x)\n        }\n      }\n    }\n\n    q.dequeueAll.toSeq.reverse\n  }\n\n  private\n  def cosine(v1: Array[Double], v2: Array[Double]): Double = {\n    val size = v1.size\n    var i = 0\n    var n1: Double = 0\n    var n2: Double = 0\n    var d: Double = 0\n    while (i < size) {\n      n1 += v1(i) * v1(i)\n      n2 += v2(i) * v2(i)\n      d += v1(i) * v2(i)\n      i += 1\n    }\n    val n1n2 = (math.sqrt(n1) * math.sqrt(n2))\n    if (n1n2 == 0) 0 else (d / n1n2)\n  }\n\n  private\n  def isCandidateItem(\n    i: Int,\n    items: Map[Int, Item],\n    categories: Option[Set[String]],\n    categoryBlackList: Option[Set[String]],\n    queryList: Set[Int],\n    whiteList: Option[Set[Int]],\n    blackList: Option[Set[Int]]\n  ): Boolean = {\n    whiteList.map(_.contains(i)).getOrElse(true) &&\n    blackList.map(!_.contains(i)).getOrElse(true) &&\n    // discard items in query as well\n    (!queryList.contains(i)) &&\n    // filter categories\n    categories.map { cat =>\n      items(i).categories.map { itemCat =>\n        // keep this item if has ovelap categories with the query\n        !(itemCat.toSet.intersect(cat).isEmpty)\n      }.getOrElse(false) // discard this item if it has no categories\n    }.getOrElse(true) &&\n    categoryBlackList.map { cat =>\n      items(i).categories.map { itemCat =>\n        // discard this item if has ovelap categories with the query\n        (itemCat.toSet.intersect(cat).isEmpty)\n      }.getOrElse(true) // keep this item if it has no categories\n    }.getOrElse(true)\n  }\n\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/CooccurrenceAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.P2LAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\ncase class CooccurrenceAlgorithmParams(\n  n: Int // top co-occurrence\n) extends Params\n\nclass CooccurrenceModel(\n  val topCooccurrences: Map[Int, Array[(Int, Int)]],\n  val itemStringIntMap: BiMap[String, Int],\n  val items: Map[Int, Item]\n) extends Serializable {\n  @transient lazy val itemIntStringMap = itemStringIntMap.inverse\n\n  override def toString(): String = {\n    val s = topCooccurrences.mapValues { v => v.mkString(\",\") }\n    s.toString\n  }\n}\n\nclass CooccurrenceAlgorithm(val ap: CooccurrenceAlgorithmParams)\n  extends P2LAlgorithm[PreparedData, CooccurrenceModel, Query, PredictedResult] {\n\n  override\n  def train(sc: SparkContext, data: PreparedData): CooccurrenceModel = {\n\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    val topCooccurrences = trainCooccurrence(\n      events = data.viewEvents,\n      n = ap.n,\n      itemStringIntMap = itemStringIntMap\n    )\n\n    // collect Item as Map and convert ID to Int index\n    val items: Map[Int, Item] = data.items.map { case (id, item) =>\n      (itemStringIntMap(id), item)\n    }.collectAsMap.toMap\n\n    new CooccurrenceModel(\n      topCooccurrences = topCooccurrences,\n      itemStringIntMap = itemStringIntMap,\n      items = items\n    )\n\n  }\n\n  /* given the user-item events, find out top n co-occurrence pair for each item */\n  def trainCooccurrence(\n    events: RDD[ViewEvent],\n    n: Int,\n    itemStringIntMap: BiMap[String, Int]): Map[Int, Array[(Int, Int)]] = {\n\n    val userItem = events\n      // map item from string to integer index\n      .flatMap {\n        case ViewEvent(user, item, _) if itemStringIntMap.contains(item) =>\n          Some(user, itemStringIntMap(item))\n        case _ => None\n      }\n      // if user view same item multiple times, only count as once\n      .distinct()\n      .cache()\n\n    val cooccurrences: RDD[((Int, Int), Int)] = userItem.join(userItem)\n      // remove duplicate pair in reversed order for each user. eg. (a,b) vs. (b,a)\n      .filter { case (user, (item1, item2)) => item1 < item2 }\n      .map { case (user, (item1, item2)) => ((item1, item2), 1) }\n      .reduceByKey{ (a: Int, b: Int) => a + b }\n\n    val topCooccurrences = cooccurrences\n      .flatMap{ case (pair, count) =>\n        Seq((pair._1, (pair._2, count)), (pair._2, (pair._1, count)))\n      }\n      .groupByKey\n      .map { case (item, itemCounts) =>\n        (item, itemCounts.toArray.sortBy(_._2)(Ordering.Int.reverse).take(n))\n      }\n      .collectAsMap.toMap\n\n    topCooccurrences\n  }\n\n  override\n  def predict(model: CooccurrenceModel, query: Query): PredictedResult = {\n\n    // convert items to Int index\n    val queryList: Set[Int] = query.items\n      .flatMap(model.itemStringIntMap.get(_))\n      .toSet\n\n    val whiteList: Option[Set[Int]] = query.whiteList.map( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n\n    val blackList: Option[Set[Int]] = query.blackList.map ( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n\n    val counts: Array[(Int, Int)] = queryList.toVector\n      .flatMap { q =>\n        model.topCooccurrences.getOrElse(q, Array())\n      }\n      .groupBy { case (index, count) => index }\n      .map { case (index, indexCounts) => (index, indexCounts.map(_._2).sum) }\n      .toArray\n\n    val itemScores = counts\n      .filter { case (i, v) =>\n        isCandidateItem(\n          i = i,\n          items = model.items,\n          categories = query.categories,\n          queryList = queryList,\n          whiteList = whiteList,\n          blackList = blackList\n        )\n      }\n      .sortBy(_._2)(Ordering.Int.reverse)\n      .take(query.num)\n      .map { case (index, count) =>\n        // MODIFIED\n        val it = model.items(index)\n        ItemScore(\n          item = model.itemIntStringMap(index),\n          title = it.title,\n          date = it.date,\n          imdbUrl = it.imdbUrl,\n          score = count\n        )\n      }\n\n    PredictedResult(itemScores)\n\n  }\n\n  private\n  def isCandidateItem(\n    i: Int,\n    items: Map[Int, Item],\n    categories: Option[Set[String]],\n    queryList: Set[Int],\n    whiteList: Option[Set[Int]],\n    blackList: Option[Set[Int]]\n  ): Boolean = {\n    whiteList.map(_.contains(i)).getOrElse(true) &&\n    blackList.map(!_.contains(i)).getOrElse(true) &&\n    // discard items in query as well\n    (!queryList.contains(i)) &&\n    // filter categories\n    categories.map { cat =>\n      items(i).categories.map { itemCat =>\n        // keep this item if has ovelap categories with the query\n        !(itemCat.toSet.intersect(cat).isEmpty)\n      }.getOrElse(false) // discard this item if it has no categories\n    }.getOrElse(true)\n  }\n\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/DataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.PDataSource\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EmptyActualResult\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.store.PEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nimport grizzled.slf4j.Logger\n\ncase class DataSourceParams(appName: String) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    // create a RDD of (entityID, User)\n    val usersRDD: RDD[(String, User)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"user\"\n    )(sc).map { case (entityId, properties) =>\n      val user = try {\n        User()\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties ${properties} of\" +\n            s\" user ${entityId}. Exception: ${e}.\")\n          throw e\n        }\n      }\n      (entityId, user)\n    }.cache()\n\n    // create a RDD of (entityID, Item)\n    val itemsRDD: RDD[(String, Item)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"item\"\n    )(sc).map { case (entityId, properties) =>\n      val item = try {\n        // Assume categories is optional property of item.\n        // MODIFIED\n        Item(\n          title = properties.get[String](\"title\"),\n          date = properties.get[String](\"date\"),\n          imdbUrl = properties.get[String](\"imdbUrl\"),\n          categories = properties.getOpt[List[String]](\"categories\"))\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties ${properties} of\" +\n            s\" item ${entityId}. Exception: ${e}.\")\n          throw e\n        }\n      }\n      (entityId, item)\n    }.cache()\n\n    // get all \"user\" \"view\" \"item\" events\n    val viewEventsRDD: RDD[ViewEvent] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"view\")),\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n      // eventsDb.find() returns RDD[Event]\n      .map { event =>\n        val viewEvent = try {\n          event.event match {\n            case \"view\" => ViewEvent(\n              user = event.entityId,\n              item = event.targetEntityId.get,\n              t = event.eventTime.getMillis)\n            case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n          }\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Cannot convert ${event} to ViewEvent.\" +\n              s\" Exception: ${e}.\")\n            throw e\n          }\n        }\n        viewEvent\n      }.cache()\n\n    new TrainingData(\n      users = usersRDD,\n      items = itemsRDD,\n      viewEvents = viewEventsRDD\n    )\n  }\n}\n\ncase class User()\n\n// MODIFIED\ncase class Item(\n     title: String,\n     date: String,\n     imdbUrl: String,\n     categories: Option[List[String]])\n\ncase class ViewEvent(user: String, item: String, t: Long)\n\nclass TrainingData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent]\n) extends Serializable {\n  override def toString = {\n    s\"users: [${users.count()} (${users.take(2).toList}...)]\" +\n    s\"items: [${items.count()} (${items.take(2).toList}...)]\" +\n    s\"viewEvents: [${viewEvents.count()}] (${viewEvents.take(2).toList}...)\"\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.EngineFactory\nimport org.apache.predictionio.controller.Engine\n\ncase class Query(\n  items: List[String],\n  num: Int,\n  categories: Option[Set[String]],\n  categoryBlackList: Option[Set[String]],\n  whiteList: Option[Set[String]],\n  blackList: Option[Set[String]]\n)\n\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n){\n  override def toString: String = itemScores.mkString(\",\")\n}\n\n// MODIFIED\ncase class ItemScore(\n  item: String,\n  title: String,\n  date: String,\n  imdbUrl: String,\n  score: Double\n)\n\nobject SimilarProductEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\n        \"als\" -> classOf[ALSAlgorithm],\n        \"cooccurrence\" -> classOf[CooccurrenceAlgorithm]),\n      classOf[Serving])\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/Preparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.PPreparator\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  override\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(\n      users = trainingData.users,\n      items = trainingData.items,\n      viewEvents = trainingData.viewEvents)\n  }\n}\n\nclass PreparedData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent]\n) extends Serializable\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/Serving.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.LServing\n\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/return-item-properties/template.json",
    "content": "{\"pio\": {\"version\": { \"min\": \"0.10.0-incubating\" }}}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/rid-user-set-event/.gitignore",
    "content": "manifest.json\ntarget/\npio.log\n/pio.sbt"
  },
  {
    "path": "examples/scala-parallel-similarproduct/rid-user-set-event/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nname := \"template-scala-parallel-similarproduct\"\n\norganization := \"org.apache.predictionio\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\" % \"provided\",\n  \"org.apache.spark\"        %% \"spark-mllib\"              % \"2.4.0\" % \"provided\")\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/rid-user-set-event/data/import_eventserver.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nImport sample data for similar product engine\n\"\"\"\n\nimport predictionio\nimport argparse\nimport random\n\nSEED = 3\n\ndef import_events(client):\n  random.seed(SEED)\n  count = 0\n  print(client.get_status())\n  print(\"Importing data...\")\n\n  # generate 10 users, with user ids u1,u2,....,u10\n  user_ids = [\"u%s\" % i for i in range(1, 11)]\n\n  # generate 50 items, with item ids i1,i2,....,i50\n  # random assign 1 to 4 categories among c1-c6 to items\n  categories = [\"c%s\" % i for i in range(1, 7)]\n  item_ids = [\"i%s\" % i for i in range(1, 51)]\n  for item_id in item_ids:\n    print(\"Set item\", item_id)\n    client.create_event(\n      event=\"$set\",\n      entity_type=\"item\",\n      entity_id=item_id,\n      properties={\n        \"categories\" : random.sample(categories, random.randint(1, 4))\n      }\n    )\n    count += 1\n\n  # each user randomly viewed 10 items\n  for user_id in user_ids:\n    for viewed_item in random.sample(item_ids, 10):\n      print(\"User\", user_id ,\"views item\", viewed_item)\n      client.create_event(\n        event=\"view\",\n        entity_type=\"user\",\n        entity_id=user_id,\n        target_entity_type=\"item\",\n        target_entity_id=viewed_item\n      )\n      count += 1\n\n  print(\"%s events are imported.\" % count)\n\nif __name__ == '__main__':\n  parser = argparse.ArgumentParser(\n    description=\"Import sample data for similar product engine\")\n  parser.add_argument('--access_key', default='invald_access_key')\n  parser.add_argument('--url', default=\"http://localhost:7070\")\n\n  args = parser.parse_args()\n  print(args)\n\n  client = predictionio.EventClient(\n    access_key=args.access_key,\n    url=args.url,\n    threads=5,\n    qsize=500)\n  import_events(client)\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/rid-user-set-event/data/send_query.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nSend sample query to prediction engine\n\"\"\"\n\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint(engine_client.send_query({\"items\": [\"i1\", \"i3\"], \"num\": 4}))\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/rid-user-set-event/engine-cooccurrence.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.similarproduct.SimilarProductEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"cooccurrence\",\n      \"params\": {\n        \"n\": 20\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/rid-user-set-event/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.similarproduct.SimilarProductEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\" : 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/rid-user-set-event/project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/rid-user-set-event/project/build.properties",
    "content": "sbt.version=1.2.8\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/ALSAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.P2LAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.mllib.recommendation.ALS\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\n\nimport grizzled.slf4j.Logger\n\nimport scala.collection.mutable.PriorityQueue\n\ncase class ALSAlgorithmParams(\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]) extends Params\n\nclass ALSModel(\n  val productFeatures: Map[Int, Array[Double]],\n  val itemStringIntMap: BiMap[String, Int],\n  val items: Map[Int, Item]\n) extends Serializable {\n\n  @transient lazy val itemIntStringMap = itemStringIntMap.inverse\n\n  override def toString = {\n    s\" productFeatures: [${productFeatures.size}]\" +\n    s\"(${productFeatures.take(2).toList}...)\" +\n    s\" itemStringIntMap: [${itemStringIntMap.size}]\" +\n    s\"(${itemStringIntMap.take(2).toString}...)]\" +\n    s\" items: [${items.size}]\" +\n    s\"(${items.take(2).toString}...)]\"\n  }\n}\n\n/**\n  * Use ALS to build item x feature matrix\n  */\nclass ALSAlgorithm(val ap: ALSAlgorithmParams)\n  extends P2LAlgorithm[PreparedData, ALSModel, Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n    require(!data.viewEvents.take(1).isEmpty,\n      s\"viewEvents in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(!data.items.take(1).isEmpty,\n      s\"items in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    // create User and item's String ID to integer index BiMap\n    val userStringIntMap = BiMap.stringInt(data.viewEvents.map(_.user)) // MODIFIED\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    // collect Item as Map and convert ID to Int index\n    val items: Map[Int, Item] = data.items.map { case (id, item) =>\n      (itemStringIntMap(id), item)\n    }.collectAsMap.toMap\n\n    val mllibRatings = data.viewEvents\n      .map { r =>\n        // Convert user and item String IDs to Int index for MLlib\n        val uindex = userStringIntMap.getOrElse(r.user, -1)\n        val iindex = itemStringIntMap.getOrElse(r.item, -1)\n\n        if (uindex == -1)\n          logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n            + \" to Int index.\")\n\n        if (iindex == -1)\n          logger.info(s\"Couldn't convert nonexistent item ID ${r.item}\"\n            + \" to Int index.\")\n\n        ((uindex, iindex), 1)\n      }.filter { case ((u, i), v) =>\n        // keep events with valid user and item index\n        (u != -1) && (i != -1)\n      }.reduceByKey(_ + _) // aggregate all view events of same user-item pair\n      .map { case ((u, i), v) =>\n        // MLlibRating requires integer index for user and item\n        MLlibRating(u, i, v)\n      }\n      .cache()\n\n    // MLLib ALS cannot handle empty training data.\n    require(!mllibRatings.take(1).isEmpty,\n      s\"mllibRatings cannot be empty.\" +\n      \" Please check if your events contain valid user and item ID.\")\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    val m = ALS.trainImplicit(\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      alpha = 1.0,\n      seed = seed)\n\n    new ALSModel(\n      productFeatures = m.productFeatures.collectAsMap.toMap,\n      itemStringIntMap = itemStringIntMap,\n      items = items\n    )\n  }\n\n  override\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n\n    val productFeatures = model.productFeatures\n\n    // convert items to Int index\n    val queryList: Set[Int] = query.items.map(model.itemStringIntMap.get(_))\n      .flatten.toSet\n\n    val queryFeatures: Vector[Array[Double]] = queryList.toVector\n      // productFeatures may not contain the requested item\n      .map { item => productFeatures.get(item) }\n      .flatten\n\n    val whiteList: Option[Set[Int]] = query.whiteList.map( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n    val blackList: Option[Set[Int]] = query.blackList.map ( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n\n    val ord = Ordering.by[(Int, Double), Double](_._2).reverse\n\n    val indexScores: Array[(Int, Double)] = if (queryFeatures.isEmpty) {\n      logger.info(s\"No productFeatures vector for query items ${query.items}.\")\n      Array[(Int, Double)]()\n    } else {\n      productFeatures.par // convert to parallel collection\n        .mapValues { f =>\n          queryFeatures.map{ qf =>\n            cosine(qf, f)\n          }.reduce(_ + _)\n        }\n        .filter(_._2 > 0) // keep items with score > 0\n        .seq // convert back to sequential collection\n        .toArray\n    }\n\n    val filteredScore = indexScores.view.filter { case (i, v) =>\n      isCandidateItem(\n        i = i,\n        items = model.items,\n        categories = query.categories,\n        categoryBlackList = query.categoryBlackList,\n        queryList = queryList,\n        whiteList = whiteList,\n        blackList = blackList\n      )\n    }\n\n    val topScores = getTopN(filteredScore, query.num)(ord).toArray\n\n    val itemScores = topScores.map { case (i, s) =>\n      ItemScore(\n        item = model.itemIntStringMap(i),\n        score = s\n      )\n    }\n\n    PredictedResult(itemScores)\n  }\n\n  private\n  def getTopN[T](s: Seq[T], n: Int)(implicit ord: Ordering[T]): Seq[T] = {\n\n    val q = PriorityQueue()\n\n    for (x <- s) {\n      if (q.size < n)\n        q.enqueue(x)\n      else {\n        // q is full\n        if (ord.compare(x, q.head) < 0) {\n          q.dequeue()\n          q.enqueue(x)\n        }\n      }\n    }\n\n    q.dequeueAll.toSeq.reverse\n  }\n\n  private\n  def cosine(v1: Array[Double], v2: Array[Double]): Double = {\n    val size = v1.size\n    var i = 0\n    var n1: Double = 0\n    var n2: Double = 0\n    var d: Double = 0\n    while (i < size) {\n      n1 += v1(i) * v1(i)\n      n2 += v2(i) * v2(i)\n      d += v1(i) * v2(i)\n      i += 1\n    }\n    val n1n2 = (math.sqrt(n1) * math.sqrt(n2))\n    if (n1n2 == 0) 0 else (d / n1n2)\n  }\n\n  private\n  def isCandidateItem(\n    i: Int,\n    items: Map[Int, Item],\n    categories: Option[Set[String]],\n    categoryBlackList: Option[Set[String]],\n    queryList: Set[Int],\n    whiteList: Option[Set[Int]],\n    blackList: Option[Set[Int]]\n  ): Boolean = {\n    whiteList.map(_.contains(i)).getOrElse(true) &&\n    blackList.map(!_.contains(i)).getOrElse(true) &&\n    // discard items in query as well\n    (!queryList.contains(i)) &&\n    // filter categories\n    categories.map { cat =>\n      items(i).categories.map { itemCat =>\n        // keep this item if has ovelap categories with the query\n        !(itemCat.toSet.intersect(cat).isEmpty)\n      }.getOrElse(false) // discard this item if it has no categories\n    }.getOrElse(true) &&\n    categoryBlackList.map { cat =>\n      items(i).categories.map { itemCat =>\n        // discard this item if has ovelap categories with the query\n        (itemCat.toSet.intersect(cat).isEmpty)\n      }.getOrElse(true) // keep this item if it has no categories\n    }.getOrElse(true)\n  }\n\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/CooccurrenceAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.P2LAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\ncase class CooccurrenceAlgorithmParams(\n  n: Int // top co-occurrence\n) extends Params\n\nclass CooccurrenceModel(\n  val topCooccurrences: Map[Int, Array[(Int, Int)]],\n  val itemStringIntMap: BiMap[String, Int],\n  val items: Map[Int, Item]\n) extends Serializable {\n  @transient lazy val itemIntStringMap = itemStringIntMap.inverse\n\n  override def toString(): String = {\n    val s = topCooccurrences.mapValues { v => v.mkString(\",\") }\n    s.toString\n  }\n}\n\nclass CooccurrenceAlgorithm(val ap: CooccurrenceAlgorithmParams)\n  extends P2LAlgorithm[PreparedData, CooccurrenceModel, Query, PredictedResult] {\n\n  override\n  def train(sc: SparkContext, data: PreparedData): CooccurrenceModel = {\n\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    val topCooccurrences = trainCooccurrence(\n      events = data.viewEvents,\n      n = ap.n,\n      itemStringIntMap = itemStringIntMap\n    )\n\n    // collect Item as Map and convert ID to Int index\n    val items: Map[Int, Item] = data.items.map { case (id, item) =>\n      (itemStringIntMap(id), item)\n    }.collectAsMap.toMap\n\n    new CooccurrenceModel(\n      topCooccurrences = topCooccurrences,\n      itemStringIntMap = itemStringIntMap,\n      items = items\n    )\n\n  }\n\n  /* given the user-item events, find out top n co-occurrence pair for each item */\n  def trainCooccurrence(\n    events: RDD[ViewEvent],\n    n: Int,\n    itemStringIntMap: BiMap[String, Int]): Map[Int, Array[(Int, Int)]] = {\n\n    val userItem = events\n      // map item from string to integer index\n      .flatMap {\n        case ViewEvent(user, item, _) if itemStringIntMap.contains(item) =>\n          Some(user, itemStringIntMap(item))\n        case _ => None\n      }\n      // if user view same item multiple times, only count as once\n      .distinct()\n      .cache()\n\n    val cooccurrences: RDD[((Int, Int), Int)] = userItem.join(userItem)\n      // remove duplicate pair in reversed order for each user. eg. (a,b) vs. (b,a)\n      .filter { case (user, (item1, item2)) => item1 < item2 }\n      .map { case (user, (item1, item2)) => ((item1, item2), 1) }\n      .reduceByKey{ (a: Int, b: Int) => a + b }\n\n    val topCooccurrences = cooccurrences\n      .flatMap{ case (pair, count) =>\n        Seq((pair._1, (pair._2, count)), (pair._2, (pair._1, count)))\n      }\n      .groupByKey\n      .map { case (item, itemCounts) =>\n        (item, itemCounts.toArray.sortBy(_._2)(Ordering.Int.reverse).take(n))\n      }\n      .collectAsMap.toMap\n\n    topCooccurrences\n  }\n\n  override\n  def predict(model: CooccurrenceModel, query: Query): PredictedResult = {\n\n    // convert items to Int index\n    val queryList: Set[Int] = query.items\n      .flatMap(model.itemStringIntMap.get(_))\n      .toSet\n\n    val whiteList: Option[Set[Int]] = query.whiteList.map( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n\n    val blackList: Option[Set[Int]] = query.blackList.map ( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n\n    val counts: Array[(Int, Int)] = queryList.toVector\n      .flatMap { q =>\n        model.topCooccurrences.getOrElse(q, Array())\n      }\n      .groupBy { case (index, count) => index }\n      .map { case (index, indexCounts) => (index, indexCounts.map(_._2).sum) }\n      .toArray\n\n    val itemScores = counts\n      .filter { case (i, v) =>\n        isCandidateItem(\n          i = i,\n          items = model.items,\n          categories = query.categories,\n          queryList = queryList,\n          whiteList = whiteList,\n          blackList = blackList\n        )\n      }\n      .sortBy(_._2)(Ordering.Int.reverse)\n      .take(query.num)\n      .map { case (index, count) =>\n        ItemScore(\n          item = model.itemIntStringMap(index),\n          score = count\n        )\n      }\n\n    PredictedResult(itemScores)\n\n  }\n\n  private\n  def isCandidateItem(\n    i: Int,\n    items: Map[Int, Item],\n    categories: Option[Set[String]],\n    queryList: Set[Int],\n    whiteList: Option[Set[Int]],\n    blackList: Option[Set[Int]]\n  ): Boolean = {\n    whiteList.map(_.contains(i)).getOrElse(true) &&\n    blackList.map(!_.contains(i)).getOrElse(true) &&\n    // discard items in query as well\n    (!queryList.contains(i)) &&\n    // filter categories\n    categories.map { cat =>\n      items(i).categories.map { itemCat =>\n        // keep this item if has ovelap categories with the query\n        !(itemCat.toSet.intersect(cat).isEmpty)\n      }.getOrElse(false) // discard this item if it has no categories\n    }.getOrElse(true)\n  }\n\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/DataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.PDataSource\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EmptyActualResult\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.store.PEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nimport grizzled.slf4j.Logger\n\ncase class DataSourceParams(appName: String) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    // create a RDD of (entityID, Item)\n    val itemsRDD: RDD[(String, Item)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"item\"\n    )(sc).map { case (entityId, properties) =>\n      val item = try {\n        // Assume categories is optional property of item.\n        Item(categories = properties.getOpt[List[String]](\"categories\"))\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties ${properties} of\" +\n            s\" item ${entityId}. Exception: ${e}.\")\n          throw e\n        }\n      }\n      (entityId, item)\n    }.cache()\n\n    // get all \"user\" \"view\" \"item\" events\n    val viewEventsRDD: RDD[ViewEvent] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"view\")),\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n      // eventsDb.find() returns RDD[Event]\n      .map { event =>\n        val viewEvent = try {\n          event.event match {\n            case \"view\" => ViewEvent(\n              user = event.entityId,\n              item = event.targetEntityId.get,\n              t = event.eventTime.getMillis)\n            case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n          }\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Cannot convert ${event} to ViewEvent.\" +\n              s\" Exception: ${e}.\")\n            throw e\n          }\n        }\n        viewEvent\n      }.cache()\n\n    new TrainingData(\n      items = itemsRDD,\n      viewEvents = viewEventsRDD\n    )\n  }\n}\n\ncase class Item(categories: Option[List[String]])\n\ncase class ViewEvent(user: String, item: String, t: Long)\n\nclass TrainingData(\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent]\n) extends Serializable {\n  override def toString = {\n    s\"items: [${items.count()} (${items.take(2).toList}...)]\" +\n    s\"viewEvents: [${viewEvents.count()}] (${viewEvents.take(2).toList}...)\"\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.EngineFactory\nimport org.apache.predictionio.controller.Engine\n\ncase class Query(\n  items: List[String],\n  num: Int,\n  categories: Option[Set[String]],\n  categoryBlackList: Option[Set[String]],\n  whiteList: Option[Set[String]],\n  blackList: Option[Set[String]]\n)\n\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n){\n  override def toString: String = itemScores.mkString(\",\")\n}\n\ncase class ItemScore(\n  item: String,\n  score: Double\n)\n\nobject SimilarProductEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\n        \"als\" -> classOf[ALSAlgorithm],\n        \"cooccurrence\" -> classOf[CooccurrenceAlgorithm]),\n      classOf[Serving])\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/Preparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.PPreparator\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  override\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(\n      items = trainingData.items,\n      viewEvents = trainingData.viewEvents)\n  }\n}\n\nclass PreparedData(\n  val items: RDD[(String, Item)],\n  val viewEvents: RDD[ViewEvent]\n) extends Serializable\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/Serving.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.LServing\n\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/rid-user-set-event/template.json",
    "content": "{\"pio\": {\"version\": { \"min\": \"0.10.0-incubating\" }}}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/train-with-rate-event/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nname := \"template-scala-parallel-similarproduct\"\n\norganization := \"org.apache.predictionio\"\nscalaVersion := \"2.11.12\"\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.14.0\" % \"provided\",\n  \"org.apache.spark\"        %% \"spark-mllib\"              % \"2.4.0\" % \"provided\")\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/train-with-rate-event/data/import_eventserver.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nImport sample data for similar product engine\n\"\"\"\n\nimport predictionio\nimport argparse\nimport random\n\nSEED = 3\n\ndef import_events(client):\n  random.seed(SEED)\n  count = 0\n  print(client.get_status())\n  print(\"Importing data...\")\n\n  # generate 10 users, with user ids u1,u2,....,u10\n  user_ids = [\"u%s\" % i for i in range(1, 11)]\n  for user_id in user_ids:\n    print(\"Set user\", user_id)\n    client.create_event(\n      event=\"$set\",\n      entity_type=\"user\",\n      entity_id=user_id\n    )\n    count += 1\n\n  # generate 50 items, with item ids i1,i2,....,i50\n  # random assign 1 to 4 categories among c1-c6 to items\n  categories = [\"c%s\" % i for i in range(1, 7)]\n  item_ids = [\"i%s\" % i for i in range(1, 51)]\n  for item_id in item_ids:\n    print(\"Set item\", item_id)\n    client.create_event(\n      event=\"$set\",\n      entity_type=\"item\",\n      entity_id=item_id,\n      properties={\n        \"categories\" : random.sample(categories, random.randint(1, 4))\n      }\n    )\n    count += 1\n\n  # each user randomly viewed 10 items\n  for user_id in user_ids:\n    for viewed_item in random.sample(item_ids, 10):\n      print(\"User\", user_id ,\"views item\", viewed_item)\n      client.create_event(\n        event=\"view\",\n        entity_type=\"user\",\n        entity_id=user_id,\n        target_entity_type=\"item\",\n        target_entity_id=viewed_item\n      )\n      count += 1\n      # randomly rate some of the viewed items\n      if random.choice([True, False]):\n        rating = random.choice(range(1,6))\n        print(\"User\", user_id ,\"rates item\", viewed_item, \"rating\", rating)\n        client.create_event(\n          event=\"rate\",\n          entity_type=\"user\",\n          entity_id=user_id,\n          target_entity_type=\"item\",\n          target_entity_id=viewed_item,\n          properties={\n            \"rating\": rating\n          }\n        )\n        count += 1\n\n  print(\"%s events are imported.\" % count)\n\nif __name__ == '__main__':\n  parser = argparse.ArgumentParser(\n    description=\"Import sample data for similar product engine\")\n  parser.add_argument('--access_key', default='invald_access_key')\n  parser.add_argument('--url', default=\"http://localhost:7070\")\n\n  args = parser.parse_args()\n  print(args)\n\n  client = predictionio.EventClient(\n    access_key=args.access_key,\n    url=args.url,\n    threads=5,\n    qsize=500)\n  import_events(client)\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/train-with-rate-event/data/send_query.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nSend sample query to prediction engine\n\"\"\"\n\nimport predictionio\nengine_client = predictionio.EngineClient(url=\"http://localhost:8000\")\nprint(engine_client.send_query({\"items\": [\"i1\", \"i3\"], \"num\": 4}))\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/train-with-rate-event/engine-cooccurrence.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.similarproduct.SimilarProductEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"cooccurrence\",\n      \"params\": {\n        \"n\": 20\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/train-with-rate-event/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.apache.predictionio.examples.similarproduct.SimilarProductEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\" : 20,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/train-with-rate-event/project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/train-with-rate-event/project/build.properties",
    "content": "sbt.version=1.2.8\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/ALSAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.P2LAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.mllib.recommendation.ALS\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\n\nimport grizzled.slf4j.Logger\n\nimport scala.collection.mutable.PriorityQueue\n\ncase class ALSAlgorithmParams(\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]) extends Params\n\nclass ALSModel(\n  val productFeatures: Map[Int, Array[Double]],\n  val itemStringIntMap: BiMap[String, Int],\n  val items: Map[Int, Item]\n) extends Serializable {\n\n  @transient lazy val itemIntStringMap = itemStringIntMap.inverse\n\n  override def toString = {\n    s\" productFeatures: [${productFeatures.size}]\" +\n    s\"(${productFeatures.take(2).toList}...)\" +\n    s\" itemStringIntMap: [${itemStringIntMap.size}]\" +\n    s\"(${itemStringIntMap.take(2).toString}...)]\" +\n    s\" items: [${items.size}]\" +\n    s\"(${items.take(2).toString}...)]\"\n  }\n}\n\n/**\n  * Use ALS to build item x feature matrix\n  */\nclass ALSAlgorithm(val ap: ALSAlgorithmParams)\n  extends P2LAlgorithm[PreparedData, ALSModel, Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def train(sc:SparkContext ,data: PreparedData): ALSModel = {\n    require(!data.rateEvents.take(1).isEmpty, // MODIFIED\n      s\"rateEvents in PreparedData cannot be empty.\" + // MODIFIED\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(!data.users.take(1).isEmpty,\n      s\"users in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    require(!data.items.take(1).isEmpty,\n      s\"items in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    // create User and item's String ID to integer index BiMap\n    val userStringIntMap = BiMap.stringInt(data.users.keys)\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    // collect Item as Map and convert ID to Int index\n    val items: Map[Int, Item] = data.items.map { case (id, item) =>\n      (itemStringIntMap(id), item)\n    }.collectAsMap.toMap\n\n    val mllibRatings = data.rateEvents // MODIFIED\n      .map { r =>\n        // Convert user and item String IDs to Int index for MLlib\n        val uindex = userStringIntMap.getOrElse(r.user, -1)\n        val iindex = itemStringIntMap.getOrElse(r.item, -1)\n\n        if (uindex == -1)\n          logger.info(s\"Couldn't convert nonexistent user ID ${r.user}\"\n            + \" to Int index.\")\n\n        if (iindex == -1)\n          logger.info(s\"Couldn't convert nonexistent item ID ${r.item}\"\n            + \" to Int index.\")\n\n        ((uindex, iindex), (r.rating,r.t)) //MODIFIED\n      }.filter { case ((u, i), v) =>\n        // keep events with valid user and item index\n        (u != -1) && (i != -1)\n      }\n      .reduceByKey { case (v1, v2) => // MODIFIED\n        // if a user may rate same item with different value at different times,\n        // use the latest value for this case.\n        // Can remove this reduceByKey() if no need to support this case.\n        val (rating1, t1) = v1\n        val (rating2, t2) = v2\n        // keep the latest value\n        if (t1 > t2) v1 else v2\n      }\n      .map { case ((u, i), (rating, t)) => // MODIFIED\n        // MLlibRating requires integer index for user and item\n        MLlibRating(u, i, rating) // MODIFIED\n      }\n      .cache()\n\n    // MLLib ALS cannot handle empty training data.\n    require(!mllibRatings.take(1).isEmpty,\n      s\"mllibRatings cannot be empty.\" +\n      \" Please check if your events contain valid user and item ID.\")\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    val m = ALS.train( // MODIFIED\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      seed = seed)\n\n    new ALSModel(\n      productFeatures = m.productFeatures.collectAsMap.toMap,\n      itemStringIntMap = itemStringIntMap,\n      items = items\n    )\n  }\n\n  override\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n\n    val productFeatures = model.productFeatures\n\n    // convert items to Int index\n    val queryList: Set[Int] = query.items.map(model.itemStringIntMap.get(_))\n      .flatten.toSet\n\n    val queryFeatures: Vector[Array[Double]] = queryList.toVector\n      // productFeatures may not contain the requested item\n      .map { item => productFeatures.get(item) }\n      .flatten\n\n    val whiteList: Option[Set[Int]] = query.whiteList.map( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n    val blackList: Option[Set[Int]] = query.blackList.map ( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n\n    val ord = Ordering.by[(Int, Double), Double](_._2).reverse\n\n    val indexScores: Array[(Int, Double)] = if (queryFeatures.isEmpty) {\n      logger.info(s\"No productFeatures vector for query items ${query.items}.\")\n      Array[(Int, Double)]()\n    } else {\n      productFeatures.par // convert to parallel collection\n        .mapValues { f =>\n          queryFeatures.map{ qf =>\n            cosine(qf, f)\n          }.reduce(_ + _)\n        }\n        .filter(_._2 > 0) // keep items with score > 0\n        .seq // convert back to sequential collection\n        .toArray\n    }\n\n    val filteredScore = indexScores.view.filter { case (i, v) =>\n      isCandidateItem(\n        i = i,\n        items = model.items,\n        categories = query.categories,\n        categoryBlackList = query.categoryBlackList,\n        queryList = queryList,\n        whiteList = whiteList,\n        blackList = blackList\n      )\n    }\n\n    val topScores = getTopN(filteredScore, query.num)(ord).toArray\n\n    val itemScores = topScores.map { case (i, s) =>\n      ItemScore(\n        item = model.itemIntStringMap(i),\n        score = s\n      )\n    }\n\n    PredictedResult(itemScores)\n  }\n\n  private\n  def getTopN[T](s: Seq[T], n: Int)(implicit ord: Ordering[T]): Seq[T] = {\n\n    val q = PriorityQueue()\n\n    for (x <- s) {\n      if (q.size < n)\n        q.enqueue(x)\n      else {\n        // q is full\n        if (ord.compare(x, q.head) < 0) {\n          q.dequeue()\n          q.enqueue(x)\n        }\n      }\n    }\n\n    q.dequeueAll.toSeq.reverse\n  }\n\n  private\n  def cosine(v1: Array[Double], v2: Array[Double]): Double = {\n    val size = v1.size\n    var i = 0\n    var n1: Double = 0\n    var n2: Double = 0\n    var d: Double = 0\n    while (i < size) {\n      n1 += v1(i) * v1(i)\n      n2 += v2(i) * v2(i)\n      d += v1(i) * v2(i)\n      i += 1\n    }\n    val n1n2 = (math.sqrt(n1) * math.sqrt(n2))\n    if (n1n2 == 0) 0 else (d / n1n2)\n  }\n\n  private\n  def isCandidateItem(\n    i: Int,\n    items: Map[Int, Item],\n    categories: Option[Set[String]],\n    categoryBlackList: Option[Set[String]],\n    queryList: Set[Int],\n    whiteList: Option[Set[Int]],\n    blackList: Option[Set[Int]]\n  ): Boolean = {\n    whiteList.map(_.contains(i)).getOrElse(true) &&\n    blackList.map(!_.contains(i)).getOrElse(true) &&\n    // discard items in query as well\n    (!queryList.contains(i)) &&\n    // filter categories\n    categories.map { cat =>\n      items(i).categories.map { itemCat =>\n        // keep this item if has ovelap categories with the query\n        !(itemCat.toSet.intersect(cat).isEmpty)\n      }.getOrElse(false) // discard this item if it has no categories\n    }.getOrElse(true) &&\n    categoryBlackList.map { cat =>\n      items(i).categories.map { itemCat =>\n        // discard this item if has ovelap categories with the query\n        (itemCat.toSet.intersect(cat).isEmpty)\n      }.getOrElse(true) // keep this item if it has no categories\n    }.getOrElse(true)\n  }\n\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/CooccurrenceAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.P2LAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\n\ncase class CooccurrenceAlgorithmParams(\n  n: Int // top co-occurrence\n) extends Params\n\nclass CooccurrenceModel(\n  val topCooccurrences: Map[Int, Array[(Int, Int)]],\n  val itemStringIntMap: BiMap[String, Int],\n  val items: Map[Int, Item]\n) extends Serializable {\n  @transient lazy val itemIntStringMap = itemStringIntMap.inverse\n\n  override def toString(): String = {\n    val s = topCooccurrences.mapValues { v => v.mkString(\",\") }\n    s.toString\n  }\n}\n\nclass CooccurrenceAlgorithm(val ap: CooccurrenceAlgorithmParams)\n  extends P2LAlgorithm[PreparedData, CooccurrenceModel, Query, PredictedResult] {\n\n  override\n  def train(sc: SparkContext, data: PreparedData): CooccurrenceModel = {\n\n    val itemStringIntMap = BiMap.stringInt(data.items.keys)\n\n    val topCooccurrences = trainCooccurrence(\n      events = data.rateEvents, // MODIFIED\n      n = ap.n,\n      itemStringIntMap = itemStringIntMap\n    )\n\n    // collect Item as Map and convert ID to Int index\n    val items: Map[Int, Item] = data.items.map { case (id, item) =>\n      (itemStringIntMap(id), item)\n    }.collectAsMap.toMap\n\n    new CooccurrenceModel(\n      topCooccurrences = topCooccurrences,\n      itemStringIntMap = itemStringIntMap,\n      items = items\n    )\n\n  }\n\n  /* given the user-item events, find out top n co-occurrence pair for each item */\n  def trainCooccurrence(\n    events: RDD[RateEvent], // MODIFIED\n    n: Int,\n    itemStringIntMap: BiMap[String, Int]): Map[Int, Array[(Int, Int)]] = {\n\n    val userItem = events\n      // map item from string to integer index\n      .flatMap {\n        // MODIFIED\n        case RateEvent(user, item, _, _) if itemStringIntMap.contains(item) =>\n          Some(user, itemStringIntMap(item))\n        case _ => None\n      }\n      // if user view same item multiple times, only count as once\n      .distinct()\n      .cache()\n\n    val cooccurrences: RDD[((Int, Int), Int)] = userItem.join(userItem)\n      // remove duplicate pair in reversed order for each user. eg. (a,b) vs. (b,a)\n      .filter { case (user, (item1, item2)) => item1 < item2 }\n      .map { case (user, (item1, item2)) => ((item1, item2), 1) }\n      .reduceByKey{ (a: Int, b: Int) => a + b }\n\n    val topCooccurrences = cooccurrences\n      .flatMap{ case (pair, count) =>\n        Seq((pair._1, (pair._2, count)), (pair._2, (pair._1, count)))\n      }\n      .groupByKey\n      .map { case (item, itemCounts) =>\n        (item, itemCounts.toArray.sortBy(_._2)(Ordering.Int.reverse).take(n))\n      }\n      .collectAsMap.toMap\n\n    topCooccurrences\n  }\n\n  override\n  def predict(model: CooccurrenceModel, query: Query): PredictedResult = {\n\n    // convert items to Int index\n    val queryList: Set[Int] = query.items\n      .flatMap(model.itemStringIntMap.get(_))\n      .toSet\n\n    val whiteList: Option[Set[Int]] = query.whiteList.map( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n\n    val blackList: Option[Set[Int]] = query.blackList.map ( set =>\n      set.map(model.itemStringIntMap.get(_)).flatten\n    )\n\n    val counts: Array[(Int, Int)] = queryList.toVector\n      .flatMap { q =>\n        model.topCooccurrences.getOrElse(q, Array())\n      }\n      .groupBy { case (index, count) => index }\n      .map { case (index, indexCounts) => (index, indexCounts.map(_._2).sum) }\n      .toArray\n\n    val itemScores = counts\n      .filter { case (i, v) =>\n        isCandidateItem(\n          i = i,\n          items = model.items,\n          categories = query.categories,\n          queryList = queryList,\n          whiteList = whiteList,\n          blackList = blackList\n        )\n      }\n      .sortBy(_._2)(Ordering.Int.reverse)\n      .take(query.num)\n      .map { case (index, count) =>\n        ItemScore(\n          item = model.itemIntStringMap(index),\n          score = count\n        )\n      }\n\n    PredictedResult(itemScores)\n\n  }\n\n  private\n  def isCandidateItem(\n    i: Int,\n    items: Map[Int, Item],\n    categories: Option[Set[String]],\n    queryList: Set[Int],\n    whiteList: Option[Set[Int]],\n    blackList: Option[Set[Int]]\n  ): Boolean = {\n    whiteList.map(_.contains(i)).getOrElse(true) &&\n    blackList.map(!_.contains(i)).getOrElse(true) &&\n    // discard items in query as well\n    (!queryList.contains(i)) &&\n    // filter categories\n    categories.map { cat =>\n      items(i).categories.map { itemCat =>\n        // keep this item if has ovelap categories with the query\n        !(itemCat.toSet.intersect(cat).isEmpty)\n      }.getOrElse(false) // discard this item if it has no categories\n    }.getOrElse(true)\n  }\n\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/DataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.PDataSource\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EmptyActualResult\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.store.PEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nimport grizzled.slf4j.Logger\n\ncase class DataSourceParams(appName: String) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, EmptyActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n\n    // create a RDD of (entityID, User)\n    val usersRDD: RDD[(String, User)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"user\"\n    )(sc).map { case (entityId, properties) =>\n      val user = try {\n        User()\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties ${properties} of\" +\n            s\" user ${entityId}. Exception: ${e}.\")\n          throw e\n        }\n      }\n      (entityId, user)\n    }.cache()\n\n    // create a RDD of (entityID, Item)\n    val itemsRDD: RDD[(String, Item)] = PEventStore.aggregateProperties(\n      appName = dsp.appName,\n      entityType = \"item\"\n    )(sc).map { case (entityId, properties) =>\n      val item = try {\n        // Assume categories is optional property of item.\n        Item(categories = properties.getOpt[List[String]](\"categories\"))\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Failed to get properties ${properties} of\" +\n            s\" item ${entityId}. Exception: ${e}.\")\n          throw e\n        }\n      }\n      (entityId, item)\n    }.cache()\n\n    // get all \"user\" \"rate\" \"item\" events\n    val rateEventsRDD: RDD[RateEvent] = PEventStore.find( // MODIFIED\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"rate\")), // MODIFIED\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n      // eventsDb.find() returns RDD[Event]\n      .map { event =>\n        val rateEvent = try { // MODIFIED\n          event.event match {\n            case \"rate\" => RateEvent( // MODIFIED\n              user = event.entityId,\n              item = event.targetEntityId.get,\n              rating = event.properties.get[Double](\"rating\"), // ADDED\n              t = event.eventTime.getMillis)\n            case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n          }\n        } catch {\n          case e: Exception => {\n            logger.error(s\"Cannot convert ${event} to RateEvent.\" + // MODIFIED\n              s\" Exception: ${e}.\")\n            throw e\n          }\n        }\n        rateEvent // MODIFIED\n      }.cache()\n\n    new TrainingData(\n      users = usersRDD,\n      items = itemsRDD,\n      rateEvents = rateEventsRDD // MODIFIED\n    )\n  }\n}\n\ncase class User()\n\ncase class Item(categories: Option[List[String]])\n\n// MODIFIED\ncase class RateEvent(user: String, item: String, rating: Double, t: Long)\n\nclass TrainingData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val rateEvents: RDD[RateEvent] // MODIFIED\n) extends Serializable {\n  override def toString = {\n    s\"users: [${users.count()} (${users.take(2).toList}...)]\" +\n    s\"items: [${items.count()} (${items.take(2).toList}...)]\" +\n    // MODIFIED\n    s\"rateEvents: [${rateEvents.count()}] (${rateEvents.take(2).toList}...)\"\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.EngineFactory\nimport org.apache.predictionio.controller.Engine\n\ncase class Query(\n  items: List[String],\n  num: Int,\n  categories: Option[Set[String]],\n  categoryBlackList: Option[Set[String]],\n  whiteList: Option[Set[String]],\n  blackList: Option[Set[String]]\n)\n\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n){\n  override def toString: String = itemScores.mkString(\",\")\n}\n\ncase class ItemScore(\n  item: String,\n  score: Double\n)\n\nobject SimilarProductEngine extends EngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\n        \"als\" -> classOf[ALSAlgorithm],\n        \"cooccurrence\" -> classOf[CooccurrenceAlgorithm]),\n      classOf[Serving])\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/Preparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.PPreparator\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  override\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(\n      users = trainingData.users,\n      items = trainingData.items,\n      rateEvents = trainingData.rateEvents) // MODIFIED\n  }\n}\n\nclass PreparedData(\n  val users: RDD[(String, User)],\n  val items: RDD[(String, Item)],\n  val rateEvents: RDD[RateEvent] // MODIFIED\n) extends Serializable\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/Serving.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.examples.similarproduct\n\nimport org.apache.predictionio.controller.LServing\n\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n"
  },
  {
    "path": "examples/scala-parallel-similarproduct/train-with-rate-event/template.json",
    "content": "{\"pio\": {\"version\": { \"min\": \"0.10.0-incubating\" }}}\n"
  },
  {
    "path": "make-distribution.sh",
    "content": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nset -e\n\nusage ()\n{\n    echo \"Usage: $0 [-h|--help]\"\n    echo \"\"\n    echo \"  -h|--help    Show usage\"\n    echo \"\"\n    echo \"  --with-rpm   Build distribution for RPM package\"\n    echo \"  --with-deb   Build distribution for DEB package\"\n}\n\nJAVA_PROPS=()\n\nfor i in \"$@\"\ndo\ncase $i in\n    -h|--help)\n    usage\n    shift\n    exit\n    ;;\n    -D*)\n    JAVA_PROPS+=(\"$i\")\n    shift\n    ;;\n    --with-rpm)\n    RPM_BUILD=true\n    shift\n    ;;\n    --with-deb)\n    DEB_BUILD=true\n    shift\n    ;;\n    *)\n    usage\n    exit 1\n    ;;\nesac\ndone\n\nFWDIR=\"$(cd `dirname $0`; pwd)\"\nDISTDIR=\"${FWDIR}/dist\"\n\nVERSION=$(grep ^version ${FWDIR}/build.sbt | grep ThisBuild | grep -o '\".*\"' | sed 's/\"//g')\n\necho \"Building binary distribution for PredictionIO $VERSION...\"\n\ncd ${FWDIR}\nset -x\nsbt/sbt \"${JAVA_PROPS[@]}\" clean\nsbt/sbt \"${JAVA_PROPS[@]}\" printBuildInfo\nsbt/sbt \"${JAVA_PROPS[@]}\" publishLocal assembly storage/publishLocal storage/assembly\nsbt/sbt \"${JAVA_PROPS[@]}\" assembly/clean assembly/universal:packageBin assembly/universal:packageZipTarball\nif [ x$RPM_BUILD = \"xtrue\" ] ; then\n    sbt/sbt \"${JAVA_PROPS[@]}\" assembly/rpm:packageBin\nfi\nif [ x$DEB_BUILD = \"xtrue\" ] ; then\n    sbt/sbt \"${JAVA_PROPS[@]}\" assembly/debian:packageBin\nfi\nset +x\n\ncd ${FWDIR}\nrm -rf ${DISTDIR}\nmkdir -p ${DISTDIR}/bin\nmkdir -p ${DISTDIR}/conf\nmkdir -p ${DISTDIR}/python\nmkdir -p ${DISTDIR}/lib\nmkdir -p ${DISTDIR}/lib/spark\nmkdir -p ${DISTDIR}/project\n\nmkdir -p ${DISTDIR}/sbt\n\ncp ${FWDIR}/bin/* ${DISTDIR}/bin || :\ncp ${FWDIR}/conf/* ${DISTDIR}/conf\ncp -r ${FWDIR}/python/* ${DISTDIR}/python\ncp ${FWDIR}/project/build.properties ${DISTDIR}/project\ncp ${FWDIR}/sbt/sbt ${DISTDIR}/sbt\ncp ${FWDIR}/assembly/src/universal/lib/*assembly*jar ${DISTDIR}/lib\ncp ${FWDIR}/assembly/src/universal/lib/spark/*jar ${DISTDIR}/lib/spark\n\nrm -f ${DISTDIR}/lib/*javadoc.jar\nrm -f ${DISTDIR}/lib/*sources.jar\nrm -f ${DISTDIR}/conf/pio-env.sh\nmv ${DISTDIR}/conf/pio-env.sh.template ${DISTDIR}/conf/pio-env.sh\n\ntouch ${DISTDIR}/RELEASE\n\nTARNAME=\"PredictionIO-$VERSION.tar.gz\"\nTARDIR=\"PredictionIO-$VERSION\"\ncp -r ${DISTDIR} ${TARDIR}\n\ncp LICENSE.txt ${TARDIR}\ncp NOTICE.txt ${TARDIR}\n\n# Allows override for `tar` command\n# This enables using GNU tar on systems such as macOS\nif [ -z \"$TAR\" ] ; then\n  TAR=tar\nfi\n$TAR zcvf ${TARNAME} ${TARDIR}\nrm -rf ${TARDIR}\n\necho -e \"\\033[0;32mPredictionIO binary distribution created at $TARNAME\\033[0m\"\n"
  },
  {
    "path": "project/PIOBuild.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport sbt._\n\nobject PIOBuild {\n  val elasticsearchVersion = settingKey[String](\"The version of Elasticsearch used for building\")\n  val hbaseVersion = settingKey[String](\"The version of Hbase used for building\")\n  val json4sVersion = settingKey[String](\"The version of JSON4S used for building\")\n  val sparkVersion = settingKey[String](\"The version of Apache Spark used for building\")\n  val sparkBinaryVersion = settingKey[String](\"The binary version of Apache Spark used for building\")\n  val hadoopVersion = settingKey[String](\"The version of Apache Hadoop used for building\")\n  val akkaVersion = settingKey[String](\"The version of Akka used for building\")\n\n  val childrenPomExtra = settingKey[scala.xml.NodeSeq](\"Extra POM data for children projects\")\n\n  def binaryVersion(versionString: String): String = versionString.split('.').take(2).mkString(\".\")\n  def majorVersion(versionString: String): Int = versionString.split('.')(0).toInt\n  def minorVersion(versionString: String): Int = versionString.split('.')(1).toInt\n\n  lazy val printBuildInfo = taskKey[Unit](\"Print build information\")\n}\n"
  },
  {
    "path": "project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")\n"
  },
  {
    "path": "project/build.properties",
    "content": "sbt.version=1.2.8"
  },
  {
    "path": "project/plugins.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-buildinfo\" % \"0.9.0\")\n\naddSbtPlugin(\"com.jsuereth\" % \"sbt-pgp\" % \"1.1.2\")\n\naddSbtPlugin(\"com.typesafe.sbt\" % \"sbt-twirl\" % \"1.4.1\")\n\naddSbtPlugin(\"org.xerial.sbt\" % \"sbt-sonatype\" % \"2.5\")\n\naddSbtPlugin(\"org.scalastyle\" %% \"scalastyle-sbt-plugin\" % \"1.0.0\")\n\nresolvers += \"sonatype-releases\" at \"https://oss.sonatype.org/content/repositories/releases/\"\n\naddSbtPlugin(\"org.scoverage\" % \"sbt-scoverage\" % \"1.5.1\")\n\naddSbtPlugin(\"com.typesafe.sbt\" % \"sbt-native-packager\" % \"1.3.22\")\n\naddSbtPlugin(\"com.typesafe.sbt\" % \"sbt-license-report\" % \"1.2.0\")"
  },
  {
    "path": "project/unidoc.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-unidoc\" % \"0.4.2\")\n"
  },
  {
    "path": "python/pypio/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\"\"\"\nPyPIO is the Python API for PredictionIO.\n\"\"\"\n\nfrom __future__ import absolute_import\n\nfrom pypio.pypio import *\n\n\n__all__ = [\n    'pypio'\n]\n"
  },
  {
    "path": "python/pypio/data/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nfrom __future__ import absolute_import\n\nfrom pypio.data.eventstore import PEventStore\n\n\n__all__ = [\n    'PEventStore'\n]\n"
  },
  {
    "path": "python/pypio/data/eventstore.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nfrom __future__ import absolute_import\n\nfrom pyspark.sql.dataframe import DataFrame\nfrom pyspark.sql import utils\n\n__all__ = [\"PEventStore\"]\n\n\nclass PEventStore(object):\n\n    def __init__(self, jss, sql_ctx):\n        self._jss = jss\n        self.sql_ctx = sql_ctx\n        self._sc = sql_ctx and sql_ctx._sc\n\n    def find(self, app_name, channel_name=None, start_time=None, until_time=None,\n             entity_type=None, entity_id=None, event_names=None, target_entity_type=None,\n             target_entity_id=None):\n        pes = self._sc._jvm.org.apache.predictionio.data.store.python.PPythonEventStore\n        jdf = pes.find(app_name, channel_name, start_time, until_time, entity_type, entity_id,\n                       event_names, target_entity_type, target_entity_id, self._jss)\n        return DataFrame(jdf, self.sql_ctx)\n\n    def aggregate_properties(self, app_name, entity_type, channel_name=None,\n                             start_time=None, until_time=None, required=None):\n        pes = self._sc._jvm.org.apache.predictionio.data.store.python.PPythonEventStore\n        jdf = pes.aggregateProperties(app_name, entity_type, channel_name,\n                                      start_time, until_time,\n                                      utils.toJArray(self._sc._gateway, self._sc._gateway.jvm.String, required),\n                                      self._jss)\n        return DataFrame(jdf, self.sql_ctx)\n\n\n"
  },
  {
    "path": "python/pypio/pypio.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nfrom __future__ import absolute_import\n\nimport atexit\nimport json\nimport os\nimport sys\n\nfrom pypio.data import PEventStore\nfrom pypio.utils import dict_to_scalamap, list_to_dict\nfrom pypio.workflow import CleanupFunctions\nfrom pyspark.sql import SparkSession\n\n\ndef init():\n    global spark\n    spark = SparkSession.builder.getOrCreate()\n    global sc\n    sc = spark.sparkContext\n    global sqlContext\n    sqlContext = spark._wrapped\n    global p_event_store\n    p_event_store = PEventStore(spark._jsparkSession, sqlContext)\n\n    cleanup_functions = CleanupFunctions(sqlContext)\n    atexit.register(lambda: cleanup_functions.run())\n    atexit.register(lambda: sc.stop())\n    print(\"Initialized pypio\")\n\n\ndef find_events(app_name):\n    \"\"\"\n    Returns a dataset of the specified app.\n\n    :param app_name: app name\n    :return: :py:class:`pyspark.sql.DataFrame`\n    \"\"\"\n    return p_event_store.find(app_name)\n\n\ndef save_model(model, predict_columns):\n    \"\"\"\n    Save a PipelineModel object to storage.\n\n    :param model: :py:class:`pyspark.ml.pipeline.PipelineModel`\n    :param predict_columns: prediction columns\n    :return: identifier for the trained model to use for predict\n    \"\"\"\n    if not predict_columns:\n        raise ValueError(\"predict_columns should have more than one value\")\n    if os.environ.get('PYSPARK_PYTHON') is None:\n        # spark-submit\n        d = list_to_dict(sys.argv[1:])\n        pio_env = list_to_dict([v for e in d['--env'].split(',') for v in e.split('=')])\n    else:\n        # pyspark\n        pio_env = {k: v for k, v in os.environ.items() if k.startswith('PIO_')}\n\n    meta_storage = sc._jvm.org.apache.predictionio.data.storage.Storage.getMetaDataEngineInstances()\n\n    meta = sc._jvm.org.apache.predictionio.data.storage.EngineInstance.apply(\n        \"\",\n        \"INIT\", # status\n        sc._jvm.org.joda.time.DateTime.now(), # startTime\n        sc._jvm.org.joda.time.DateTime.now(), # endTime\n        \"org.apache.predictionio.e2.engine.PythonEngine\", # engineId\n        \"1\", # engineVersion\n        \"default\", # engineVariant\n        \"org.apache.predictionio.e2.engine.PythonEngine\", # engineFactory\n        \"\", # batch\n        dict_to_scalamap(sc._jvm, pio_env), # env\n        sc._jvm.scala.Predef.Map().empty(), # sparkConf\n        \"{\\\"\\\":{}}\", # dataSourceParams\n        \"{\\\"\\\":{}}\", # preparatorParams\n        \"[{\\\"default\\\":{}}]\", # algorithmsParams\n        json.dumps({\"\":{\"columns\":[v for v in predict_columns]}}) # servingParams\n    )\n    id = meta_storage.insert(meta)\n\n    engine = sc._jvm.org.apache.predictionio.e2.engine.PythonEngine\n    data = sc._jvm.org.apache.predictionio.data.storage.Model(id, engine.models(model._to_java()))\n    model_storage = sc._jvm.org.apache.predictionio.data.storage.Storage.getModelDataModels()\n    model_storage.insert(data)\n\n    meta_storage.update(\n        sc._jvm.org.apache.predictionio.data.storage.EngineInstance.apply(\n            id, \"COMPLETED\", meta.startTime(), sc._jvm.org.joda.time.DateTime.now(),\n            meta.engineId(), meta.engineVersion(), meta.engineVariant(),\n            meta.engineFactory(), meta.batch(), meta.env(), meta.sparkConf(),\n            meta.dataSourceParams(), meta.preparatorParams(), meta.algorithmsParams(), meta.servingParams()\n        )\n    )\n\n    return id\n\n"
  },
  {
    "path": "python/pypio/utils.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n\ndef dict_to_scalamap(jvm, d):\n    \"\"\"\n    Convert python dictionary to scala type map\n\n    :param jvm: sc._jvm\n    :param d: python type dictionary\n    \"\"\"\n    if d is None:\n        return None\n    sm = jvm.scala.Predef.Map().empty()\n    for k, v in d.items():\n        sm = sm.updated(k, v)\n    return sm\n\ndef list_to_dict(l):\n    \"\"\"\n    Convert python list to python dictionary\n\n    :param l: python type list\n\n    >>> list = [\"key1\", 1, \"key2\", 2, \"key3\", 3]\n    >>> list_to_dict(list) == {'key1': 1, 'key2': 2, 'key3': 3}\n    True\n    \"\"\"\n    if l is None:\n        return None\n    return dict(zip(l[0::2], l[1::2]))\n\n\nif __name__ == \"__main__\":\n    import doctest\n    import sys\n    (failure_count, test_count) = doctest.testmod()\n    if failure_count:\n        sys.exit(-1)"
  },
  {
    "path": "python/pypio/workflow/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nfrom __future__ import absolute_import\n\nfrom pypio.workflow.cleanup_functions import CleanupFunctions\n\n\n__all__ = [\n    'CleanupFunctions'\n]\n"
  },
  {
    "path": "python/pypio/workflow/cleanup_functions.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nfrom __future__ import absolute_import\n\n__all__ = [\"CleanupFunctions\"]\n\n\nclass CleanupFunctions(object):\n\n    def __init__(self, sql_ctx):\n        self.sql_ctx = sql_ctx\n        self._sc = sql_ctx and sql_ctx._sc\n\n    def run(self):\n        cf = self._sc._jvm.org.apache.predictionio.workflow.CleanupFunctions\n        cf.run()\n\n"
  },
  {
    "path": "sbt/sbt",
    "content": "#!/usr/bin/env bash\n#\n# A more capable sbt runner, coincidentally also called sbt.\n# Author: Paul Phillips <paulp@improving.org>\n\nset -o pipefail\n\ndeclare -r sbt_release_version=\"0.13.13\"\ndeclare -r sbt_unreleased_version=\"0.13.13\"\n\ndeclare -r latest_212=\"2.12.1\"\ndeclare -r latest_211=\"2.11.8\"\ndeclare -r latest_210=\"2.10.6\"\ndeclare -r latest_29=\"2.9.3\"\ndeclare -r latest_28=\"2.8.2\"\n\ndeclare -r buildProps=\"project/build.properties\"\n\ndeclare -r sbt_launch_ivy_release_repo=\"http://repo.typesafe.com/typesafe/ivy-releases\"\ndeclare -r sbt_launch_ivy_snapshot_repo=\"https://repo.scala-sbt.org/scalasbt/ivy-snapshots\"\ndeclare -r sbt_launch_mvn_release_repo=\"http://repo.scala-sbt.org/scalasbt/maven-releases\"\ndeclare -r sbt_launch_mvn_snapshot_repo=\"http://repo.scala-sbt.org/scalasbt/maven-snapshots\"\n\ndeclare -r default_jvm_opts_common=\"-Xms512m -Xmx1536m -Xss2m\"\ndeclare -r noshare_opts=\"-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy\"\n\ndeclare sbt_jar sbt_dir sbt_create sbt_version sbt_script sbt_new\ndeclare sbt_explicit_version\ndeclare verbose noshare batch trace_level\ndeclare sbt_saved_stty debugUs\n\ndeclare java_cmd=\"java\"\ndeclare sbt_launch_dir=\"$HOME/.sbt/launchers\"\ndeclare sbt_launch_repo\n\n# pull -J and -D options to give to java.\ndeclare -a java_args scalac_args sbt_commands residual_args\n\n# args to jvm/sbt via files or environment variables\ndeclare -a extra_jvm_opts extra_sbt_opts\n\nechoerr () { echo >&2 \"$@\"; }\nvlog ()    { [[ -n \"$verbose\" ]] && echoerr \"$@\"; }\ndie ()     { echo \"Aborting: $@\" ; exit 1; }\n\n# restore stty settings (echo in particular)\nonSbtRunnerExit() {\n  [[ -n \"$sbt_saved_stty\" ]] || return\n  vlog \"\"\n  vlog \"restoring stty: $sbt_saved_stty\"\n  stty \"$sbt_saved_stty\"\n  unset sbt_saved_stty\n}\n\n# save stty and trap exit, to ensure echo is re-enabled if we are interrupted.\ntrap onSbtRunnerExit EXIT\nsbt_saved_stty=\"$(stty -g 2>/dev/null)\"\nvlog \"Saved stty: $sbt_saved_stty\"\n\n# this seems to cover the bases on OSX, and someone will\n# have to tell me about the others.\nget_script_path () {\n  local path=\"$1\"\n  [[ -L \"$path\" ]] || { echo \"$path\" ; return; }\n\n  local target=\"$(readlink \"$path\")\"\n  if [[ \"${target:0:1}\" == \"/\" ]]; then\n    echo \"$target\"\n  else\n    echo \"${path%/*}/$target\"\n  fi\n}\n\ndeclare -r script_path=\"$(get_script_path \"$BASH_SOURCE\")\"\ndeclare -r script_name=\"${script_path##*/}\"\n\ninit_default_option_file () {\n  local overriding_var=\"${!1}\"\n  local default_file=\"$2\"\n  if [[ ! -r \"$default_file\" && \"$overriding_var\" =~ ^@(.*)$ ]]; then\n    local envvar_file=\"${BASH_REMATCH[1]}\"\n    if [[ -r \"$envvar_file\" ]]; then\n      default_file=\"$envvar_file\"\n    fi\n  fi\n  echo \"$default_file\"\n}\n\ndeclare sbt_opts_file=\"$(init_default_option_file SBT_OPTS .sbtopts)\"\ndeclare jvm_opts_file=\"$(init_default_option_file JVM_OPTS .jvmopts)\"\n\nbuild_props_sbt () {\n  [[ -r \"$buildProps\" ]] && \\\n    grep '^sbt\\.version' \"$buildProps\" | tr '=\\r' ' ' | awk '{ print $2; }'\n}\n\nupdate_build_props_sbt () {\n  local ver=\"$1\"\n  local old=\"$(build_props_sbt)\"\n\n  [[ -r \"$buildProps\" ]] && [[ \"$ver\" != \"$old\" ]] && {\n    perl -pi -e \"s/^sbt\\.version\\b.*\\$/sbt.version=${ver}/\" \"$buildProps\"\n    grep -q '^sbt.version[ =]' \"$buildProps\" || printf \"\\nsbt.version=%s\\n\" \"$ver\" >> \"$buildProps\"\n\n    vlog \"!!!\"\n    vlog \"!!! Updated file $buildProps setting sbt.version to: $ver\"\n    vlog \"!!! Previous value was: $old\"\n    vlog \"!!!\"\n  }\n}\n\nset_sbt_version () {\n  sbt_version=\"${sbt_explicit_version:-$(build_props_sbt)}\"\n  [[ -n \"$sbt_version\" ]] || sbt_version=$sbt_release_version\n  export sbt_version\n}\n\nurl_base () {\n  local version=\"$1\"\n\n  case \"$version\" in\n        0.7.*) echo \"http://simple-build-tool.googlecode.com\" ;;\n      0.10.* ) echo \"$sbt_launch_ivy_release_repo\" ;;\n    0.11.[12]) echo \"$sbt_launch_ivy_release_repo\" ;;\n    0.*-[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9][0-9][0-9]) # ie \"*-yyyymmdd-hhMMss\"\n               echo \"$sbt_launch_ivy_snapshot_repo\" ;;\n          0.*) echo \"$sbt_launch_ivy_release_repo\" ;;\n    *-[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9][0-9][0-9]) # ie \"*-yyyymmdd-hhMMss\"\n               echo \"$sbt_launch_mvn_snapshot_repo\" ;;\n            *) echo \"$sbt_launch_mvn_release_repo\" ;;\n  esac\n}\n\nmake_url () {\n  local version=\"$1\"\n\n  local base=\"${sbt_launch_repo:-$(url_base \"$version\")}\"\n\n  case \"$version\" in\n        0.7.*) echo \"$base/files/sbt-launch-0.7.7.jar\" ;;\n      0.10.* ) echo \"$base/org.scala-tools.sbt/sbt-launch/$version/sbt-launch.jar\" ;;\n    0.11.[12]) echo \"$base/org.scala-tools.sbt/sbt-launch/$version/sbt-launch.jar\" ;;\n          0.*) echo \"$base/org.scala-sbt/sbt-launch/$version/sbt-launch.jar\" ;;\n            *) echo \"$base/org/scala-sbt/sbt-launch/$version/sbt-launch.jar\" ;;\n  esac\n}\n\naddJava ()     { vlog \"[addJava] arg = '$1'\"   ;     java_args+=(\"$1\"); }\naddSbt ()      { vlog \"[addSbt] arg = '$1'\"    ;  sbt_commands+=(\"$1\"); }\naddScalac ()   { vlog \"[addScalac] arg = '$1'\" ;   scalac_args+=(\"$1\"); }\naddResidual () { vlog \"[residual] arg = '$1'\"  ; residual_args+=(\"$1\"); }\n\naddResolver () { addSbt \"set resolvers += $1\"; }\naddDebugger () { addJava \"-Xdebug\" ; addJava \"-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=$1\"; }\nsetThisBuild () {\n  vlog \"[addBuild] args = '$@'\"\n  local key=\"$1\" && shift\n  addSbt \"set $key in ThisBuild := $@\"\n}\nsetScalaVersion () {\n  [[ \"$1\" == *\"-SNAPSHOT\" ]] && addResolver 'Resolver.sonatypeRepo(\"snapshots\")'\n  addSbt \"++ $1\"\n}\nsetJavaHome () {\n  java_cmd=\"$1/bin/java\"\n  setThisBuild javaHome \"_root_.scala.Some(file(\\\"$1\\\"))\"\n  export JAVA_HOME=\"$1\"\n  export JDK_HOME=\"$1\"\n  export PATH=\"$JAVA_HOME/bin:$PATH\"\n}\n\ngetJavaVersion() { \"$1\" -version 2>&1 | grep -E -e '(java|openjdk) version' | awk '{ print $3 }' | tr -d \\\"; }\n\ncheckJava() {\n  # Warn if there is a Java version mismatch between PATH and JAVA_HOME/JDK_HOME\n\n  [[ -n \"$JAVA_HOME\" && -e \"$JAVA_HOME/bin/java\"     ]] && java=\"$JAVA_HOME/bin/java\"\n  [[ -n \"$JDK_HOME\"  && -e \"$JDK_HOME/lib/tools.jar\" ]] && java=\"$JDK_HOME/bin/java\"\n\n  if [[ -n \"$java\" ]]; then\n    pathJavaVersion=$(getJavaVersion java)\n    homeJavaVersion=$(getJavaVersion \"$java\")\n    if [[ \"$pathJavaVersion\" != \"$homeJavaVersion\" ]]; then\n      echoerr \"Warning: Java version mismatch between PATH and JAVA_HOME/JDK_HOME, sbt will use the one in PATH\"\n      echoerr \"  Either: fix your PATH, remove JAVA_HOME/JDK_HOME or use -java-home\"\n      echoerr \"  java version from PATH:               $pathJavaVersion\"\n      echoerr \"  java version from JAVA_HOME/JDK_HOME: $homeJavaVersion\"\n    fi\n  fi\n}\n\njava_version () {\n  local version=$(getJavaVersion \"$java_cmd\")\n  vlog \"Detected Java version: $version\"\n  echo \"${version:2:1}\"\n}\n\n# MaxPermSize critical on pre-8 JVMs but incurs noisy warning on 8+\ndefault_jvm_opts () {\n  local v=\"$(java_version)\"\n  if [[ $v -ge 8 ]]; then\n    echo \"$default_jvm_opts_common\"\n  else\n    echo \"-XX:MaxPermSize=384m $default_jvm_opts_common\"\n  fi\n}\n\nbuild_props_scala () {\n  if [[ -r \"$buildProps\" ]]; then\n    versionLine=\"$(grep '^build.scala.versions' \"$buildProps\")\"\n    versionString=\"${versionLine##build.scala.versions=}\"\n    echo \"${versionString%% .*}\"\n  fi\n}\n\nexecRunner () {\n  # print the arguments one to a line, quoting any containing spaces\n  vlog \"# Executing command line:\" && {\n    for arg; do\n      if [[ -n \"$arg\" ]]; then\n        if printf \"%s\\n\" \"$arg\" | grep -q ' '; then\n          printf >&2 \"\\\"%s\\\"\\n\" \"$arg\"\n        else\n          printf >&2 \"%s\\n\" \"$arg\"\n        fi\n      fi\n    done\n    vlog \"\"\n  }\n\n  [[ -n \"$batch\" ]] && exec </dev/null\n  exec \"$@\"\n}\n\njar_url ()  { make_url \"$1\"; }\n\nis_cygwin () [[ \"$(uname -a)\" == \"CYGWIN\"* ]]\n\njar_file () {\n  is_cygwin \\\n  && echo \"$(cygpath -w $sbt_launch_dir/\"$1\"/sbt-launch.jar)\" \\\n  || echo \"$sbt_launch_dir/$1/sbt-launch.jar\"\n}\n\ndownload_url () {\n  local url=\"$1\"\n  local jar=\"$2\"\n\n  echoerr \"Downloading sbt launcher for $sbt_version:\"\n  echoerr \"  From  $url\"\n  echoerr \"    To  $jar\"\n\n  mkdir -p \"${jar%/*}\" && {\n    if which curl >/dev/null; then\n      curl --fail --silent --location \"$url\" --output \"$jar\"\n    elif which wget >/dev/null; then\n      wget -q -O \"$jar\" \"$url\"\n    fi\n  } && [[ -r \"$jar\" ]]\n}\n\nacquire_sbt_jar () {\n  {\n    sbt_jar=\"$(jar_file \"$sbt_version\")\"\n    [[ -r \"$sbt_jar\" ]]\n  } || {\n    sbt_jar=\"$HOME/.ivy2/local/org.scala-sbt/sbt-launch/$sbt_version/jars/sbt-launch.jar\"\n    [[ -r \"$sbt_jar\" ]]\n  } || {\n    sbt_jar=\"$(jar_file \"$sbt_version\")\"\n    download_url \"$(make_url \"$sbt_version\")\" \"$sbt_jar\"\n  }\n}\n\nusage () {\n  set_sbt_version\n  cat <<EOM\nUsage: $script_name [options]\n\nNote that options which are passed along to sbt begin with -- whereas\noptions to this runner use a single dash. Any sbt command can be scheduled\nto run first by prefixing the command with --, so --warn, --error and so on\nare not special.\n\nOutput filtering: if there is a file in the home directory called .sbtignore\nand this is not an interactive sbt session, the file is treated as a list of\nbash regular expressions. Output lines which match any regex are not echoed.\nOne can see exactly which lines would have been suppressed by starting this\nrunner with the -x option.\n\n  -h | -help         print this message\n  -v                 verbose operation (this runner is chattier)\n  -d, -w, -q         aliases for --debug, --warn, --error (q means quiet)\n  -x                 debug this script\n  -trace <level>     display stack traces with a max of <level> frames (default: -1, traces suppressed)\n  -debug-inc         enable debugging log for the incremental compiler\n  -no-colors         disable ANSI color codes\n  -sbt-create        start sbt even if current directory contains no sbt project\n  -sbt-dir   <path>  path to global settings/plugins directory (default: ~/.sbt/<version>)\n  -sbt-boot  <path>  path to shared boot directory (default: ~/.sbt/boot in 0.11+)\n  -ivy       <path>  path to local Ivy repository (default: ~/.ivy2)\n  -no-share          use all local caches; no sharing\n  -offline           put sbt in offline mode\n  -jvm-debug <port>  Turn on JVM debugging, open at the given port.\n  -batch             Disable interactive mode\n  -prompt <expr>     Set the sbt prompt; in expr, 's' is the State and 'e' is Extracted\n  -script <file>     Run the specified file as a scala script\n\n  # sbt version (default: sbt.version from $buildProps if present, otherwise $sbt_release_version)\n  -sbt-force-latest         force the use of the latest release of sbt: $sbt_release_version\n  -sbt-version  <version>   use the specified version of sbt (default: $sbt_release_version)\n  -sbt-dev                  use the latest pre-release version of sbt: $sbt_unreleased_version\n  -sbt-jar      <path>      use the specified jar as the sbt launcher\n  -sbt-launch-dir <path>    directory to hold sbt launchers (default: $sbt_launch_dir)\n  -sbt-launch-repo <url>    repo url for downloading sbt launcher jar (default: $(url_base \"$sbt_version\"))\n\n  # scala version (default: as chosen by sbt)\n  -28                       use $latest_28\n  -29                       use $latest_29\n  -210                      use $latest_210\n  -211                      use $latest_211\n  -212                      use $latest_212\n  -scala-home <path>        use the scala build at the specified directory\n  -scala-version <version>  use the specified version of scala\n  -binary-version <version> use the specified scala version when searching for dependencies\n\n  # java version (default: java from PATH, currently $(java -version 2>&1 | grep version))\n  -java-home <path>         alternate JAVA_HOME\n\n  # passing options to the jvm - note it does NOT use JAVA_OPTS due to pollution\n  # The default set is used if JVM_OPTS is unset and no -jvm-opts file is found\n  <default>        $(default_jvm_opts)\n  JVM_OPTS         environment variable holding either the jvm args directly, or\n                   the reference to a file containing jvm args if given path is prepended by '@' (e.g. '@/etc/jvmopts')\n                   Note: \"@\"-file is overridden by local '.jvmopts' or '-jvm-opts' argument.\n  -jvm-opts <path> file containing jvm args (if not given, .jvmopts in project root is used if present)\n  -Dkey=val        pass -Dkey=val directly to the jvm\n  -J-X             pass option -X directly to the jvm (-J is stripped)\n\n  # passing options to sbt, OR to this runner\n  SBT_OPTS         environment variable holding either the sbt args directly, or\n                   the reference to a file containing sbt args if given path is prepended by '@' (e.g. '@/etc/sbtopts')\n                   Note: \"@\"-file is overridden by local '.sbtopts' or '-sbt-opts' argument.\n  -sbt-opts <path> file containing sbt args (if not given, .sbtopts in project root is used if present)\n  -S-X             add -X to sbt's scalacOptions (-S is stripped)\nEOM\n}\n\nprocess_args () {\n  require_arg () {\n    local type=\"$1\"\n    local opt=\"$2\"\n    local arg=\"$3\"\n\n    if [[ -z \"$arg\" ]] || [[ \"${arg:0:1}\" == \"-\" ]]; then\n      die \"$opt requires <$type> argument\"\n    fi\n  }\n  while [[ $# -gt 0 ]]; do\n    case \"$1\" in\n          -h|-help) usage; exit 1 ;;\n                -v) verbose=true && shift ;;\n                -d) addSbt \"--debug\" && shift ;;\n                -w) addSbt \"--warn\"  && shift ;;\n                -q) addSbt \"--error\" && shift ;;\n                -x) debugUs=true && shift ;;\n            -trace) require_arg integer \"$1\" \"$2\" && trace_level=\"$2\" && shift 2 ;;\n              -ivy) require_arg path \"$1\" \"$2\" && addJava \"-Dsbt.ivy.home=$2\" && shift 2 ;;\n        -no-colors) addJava \"-Dsbt.log.noformat=true\" && shift ;;\n         -no-share) noshare=true && shift ;;\n         -sbt-boot) require_arg path \"$1\" \"$2\" && addJava \"-Dsbt.boot.directory=$2\" && shift 2 ;;\n          -sbt-dir) require_arg path \"$1\" \"$2\" && sbt_dir=\"$2\" && shift 2 ;;\n        -debug-inc) addJava \"-Dxsbt.inc.debug=true\" && shift ;;\n          -offline) addSbt \"set offline in Global := true\" && shift ;;\n        -jvm-debug) require_arg port \"$1\" \"$2\" && addDebugger \"$2\" && shift 2 ;;\n            -batch) batch=true && shift ;;\n           -prompt) require_arg \"expr\" \"$1\" \"$2\" && setThisBuild shellPrompt \"(s => { val e = Project.extract(s) ; $2 })\" && shift 2 ;;\n           -script) require_arg file \"$1\" \"$2\" && sbt_script=\"$2\" && addJava \"-Dsbt.main.class=sbt.ScriptMain\" && shift 2 ;;\n\n       -sbt-create) sbt_create=true && shift ;;\n          -sbt-jar) require_arg path \"$1\" \"$2\" && sbt_jar=\"$2\" && shift 2 ;;\n      -sbt-version) require_arg version \"$1\" \"$2\" && sbt_explicit_version=\"$2\" && shift 2 ;;\n -sbt-force-latest) sbt_explicit_version=\"$sbt_release_version\" && shift ;;\n          -sbt-dev) sbt_explicit_version=\"$sbt_unreleased_version\" && shift ;;\n   -sbt-launch-dir) require_arg path \"$1\" \"$2\" && sbt_launch_dir=\"$2\" && shift 2 ;;\n  -sbt-launch-repo) require_arg path \"$1\" \"$2\" && sbt_launch_repo=\"$2\" && shift 2 ;;\n    -scala-version) require_arg version \"$1\" \"$2\" && setScalaVersion \"$2\" && shift 2 ;;\n   -binary-version) require_arg version \"$1\" \"$2\" && setThisBuild scalaBinaryVersion \"\\\"$2\\\"\" && shift 2 ;;\n       -scala-home) require_arg path \"$1\" \"$2\" && setThisBuild scalaHome \"_root_.scala.Some(file(\\\"$2\\\"))\" && shift 2 ;;\n        -java-home) require_arg path \"$1\" \"$2\" && setJavaHome \"$2\" && shift 2 ;;\n         -sbt-opts) require_arg path \"$1\" \"$2\" && sbt_opts_file=\"$2\" && shift 2 ;;\n         -jvm-opts) require_arg path \"$1\" \"$2\" && jvm_opts_file=\"$2\" && shift 2 ;;\n\n               -D*) addJava \"$1\" && shift ;;\n               -J*) addJava \"${1:2}\" && shift ;;\n               -S*) addScalac \"${1:2}\" && shift ;;\n               -28) setScalaVersion \"$latest_28\" && shift ;;\n               -29) setScalaVersion \"$latest_29\" && shift ;;\n              -210) setScalaVersion \"$latest_210\" && shift ;;\n              -211) setScalaVersion \"$latest_211\" && shift ;;\n              -212) setScalaVersion \"$latest_212\" && shift ;;\n               new) sbt_new=true && sbt_explicit_version=\"$sbt_release_version\"  && addResidual \"$1\" && shift ;;\n                 *) addResidual \"$1\" && shift ;;\n    esac\n  done\n}\n\n# process the direct command line arguments\nprocess_args \"$@\"\n\n# skip #-styled comments and blank lines\nreadConfigFile() {\n  local end=false\n  until $end; do\n    read || end=true\n    [[ $REPLY =~ ^# ]] || [[ -z $REPLY ]] || echo \"$REPLY\"\n  done < \"$1\"\n}\n\n# if there are file/environment sbt_opts, process again so we\n# can supply args to this runner\nif [[ -r \"$sbt_opts_file\" ]]; then\n  vlog \"Using sbt options defined in file $sbt_opts_file\"\n  while read opt; do extra_sbt_opts+=(\"$opt\"); done < <(readConfigFile \"$sbt_opts_file\")\nelif [[ -n \"$SBT_OPTS\" && ! (\"$SBT_OPTS\" =~ ^@.*) ]]; then\n  vlog \"Using sbt options defined in variable \\$SBT_OPTS\"\n  extra_sbt_opts=( $SBT_OPTS )\nelse\n  vlog \"No extra sbt options have been defined\"\nfi\n\n[[ -n \"${extra_sbt_opts[*]}\" ]] && process_args \"${extra_sbt_opts[@]}\"\n\n# reset \"$@\" to the residual args\nset -- \"${residual_args[@]}\"\nargumentCount=$#\n\n# set sbt version\nset_sbt_version\n\ncheckJava\n\n# only exists in 0.12+\nsetTraceLevel() {\n  case \"$sbt_version\" in\n    \"0.7.\"* | \"0.10.\"* | \"0.11.\"* ) echoerr \"Cannot set trace level in sbt version $sbt_version\" ;;\n                                 *) setThisBuild traceLevel $trace_level ;;\n  esac\n}\n\n# set scalacOptions if we were given any -S opts\n[[ ${#scalac_args[@]} -eq 0 ]] || addSbt \"set scalacOptions in ThisBuild += \\\"${scalac_args[@]}\\\"\"\n\n# Update build.properties on disk to set explicit version - sbt gives us no choice\n[[ -n \"$sbt_explicit_version\" && -z \"$sbt_new\" ]] && update_build_props_sbt \"$sbt_explicit_version\"\nvlog \"Detected sbt version $sbt_version\"\n\nif [[ -n \"$sbt_script\" ]]; then\n  residual_args=( $sbt_script ${residual_args[@]} )\nelse\n  # no args - alert them there's stuff in here\n  (( argumentCount > 0 )) || {\n    vlog \"Starting $script_name: invoke with -help for other options\"\n    residual_args=( shell )\n  }\nfi\n\n# verify this is an sbt dir, -create was given or user attempts to run a scala script\n[[ -r ./build.sbt || -d ./project || -n \"$sbt_create\" || -n \"$sbt_script\" || -n \"$sbt_new\" ]] || {\n  cat <<EOM\n$(pwd) doesn't appear to be an sbt project.\nIf you want to start sbt anyway, run:\n  $0 -sbt-create\n\nEOM\n  exit 1\n}\n\n# pick up completion if present; todo\n[[ -r .sbt_completion.sh ]] && source .sbt_completion.sh\n\n# directory to store sbt launchers\n[[ -d \"$sbt_launch_dir\" ]] || mkdir -p \"$sbt_launch_dir\"\n[[ -w \"$sbt_launch_dir\" ]] || sbt_launch_dir=\"$(mktemp -d -t sbt_extras_launchers.XXXXXX)\"\n\n# no jar? download it.\n[[ -r \"$sbt_jar\" ]] || acquire_sbt_jar || {\n  # still no jar? uh-oh.\n  echo \"Download failed. Obtain the jar manually and place it at $sbt_jar\"\n  exit 1\n}\n\nif [[ -n \"$noshare\" ]]; then\n  for opt in ${noshare_opts}; do\n    addJava \"$opt\"\n  done\nelse\n  case \"$sbt_version\" in\n    \"0.7.\"* | \"0.10.\"* | \"0.11.\"* | \"0.12.\"* )\n      [[ -n \"$sbt_dir\" ]] || {\n        sbt_dir=\"$HOME/.sbt/$sbt_version\"\n        vlog \"Using $sbt_dir as sbt dir, -sbt-dir to override.\"\n      }\n    ;;\n  esac\n\n  if [[ -n \"$sbt_dir\" ]]; then\n    addJava \"-Dsbt.global.base=$sbt_dir\"\n  fi\nfi\n\nif [[ -r \"$jvm_opts_file\" ]]; then\n  vlog \"Using jvm options defined in file $jvm_opts_file\"\n  while read opt; do extra_jvm_opts+=(\"$opt\"); done < <(readConfigFile \"$jvm_opts_file\")\nelif [[ -n \"$JVM_OPTS\" && ! (\"$JVM_OPTS\" =~ ^@.*) ]]; then\n  vlog \"Using jvm options defined in \\$JVM_OPTS variable\"\n  extra_jvm_opts=( $JVM_OPTS )\nelse\n  vlog \"Using default jvm options\"\n  extra_jvm_opts=( $(default_jvm_opts) )\nfi\n\n# traceLevel is 0.12+\n[[ -n \"$trace_level\" ]] && setTraceLevel\n\nmain () {\n  execRunner \"$java_cmd\" \\\n    \"${extra_jvm_opts[@]}\" \\\n    \"${java_args[@]}\" \\\n    -jar \"$sbt_jar\" \\\n    \"${sbt_commands[@]}\" \\\n    \"${residual_args[@]}\"\n}\n\n# sbt inserts this string on certain lines when formatting is enabled:\n#   val OverwriteLine = \"\\r\\u001BM\\u001B[2K\"\n# ...in order not to spam the console with a million \"Resolving\" lines.\n# Unfortunately that makes it that much harder to work with when\n# we're not going to print those lines anyway. We strip that bit of\n# line noise, but leave the other codes to preserve color.\nmainFiltered () {\n  local ansiOverwrite='\\r\\x1BM\\x1B[2K'\n  local excludeRegex=$(egrep -v '^#|^$' ~/.sbtignore | paste -sd'|' -)\n\n  echoLine () {\n    local line=\"$1\"\n    local line1=\"$(echo \"$line\" | sed 's/\\r\\x1BM\\x1B\\[2K//g')\"       # This strips the OverwriteLine code.\n    local line2=\"$(echo \"$line1\" | sed 's/\\x1B\\[[0-9;]*[JKmsu]//g')\" # This strips all codes - we test regexes against this.\n\n    if [[ $line2 =~ $excludeRegex ]]; then\n      [[ -n $debugUs ]] && echo \"[X] $line1\"\n    else\n      [[ -n $debugUs ]] && echo \"    $line1\" || echo \"$line1\"\n    fi\n  }\n\n  echoLine \"Starting sbt with output filtering enabled.\"\n  main | while read -r line; do echoLine \"$line\"; done\n}\n\n# Only filter if there's a filter file and we don't see a known interactive command.\n# Obviously this is super ad hoc but I don't know how to improve on it. Testing whether\n# stdin is a terminal is useless because most of my use cases for this filtering are\n# exactly when I'm at a terminal, running sbt non-interactively.\nshouldFilter () { [[ -f ~/.sbtignore ]] && ! egrep -q '\\b(shell|console|consoleProject)\\b' <<<\"${residual_args[@]}\"; }\n\n# run sbt\nif shouldFilter; then mainFiltered; else main; fi\n"
  },
  {
    "path": "scalastyle-config.xml",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n   http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n<scalastyle>\n    <name>Scalastyle standard configuration</name>\n    <check level=\"error\" class=\"org.scalastyle.file.FileTabChecker\"\n           enabled=\"true\"/>\n\n    <check level=\"error\" class=\"org.scalastyle.file.HeaderMatchesChecker\"\n           enabled=\"true\">\n        <parameters>\n            <parameter name=\"header\"><![CDATA[/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */]]></parameter>\n        </parameters>\n    </check>\n    <check level=\"error\"\n           class=\"org.scalastyle.scalariform.SpacesAfterPlusChecker\"\n           enabled=\"true\"/>\n    <check level=\"error\"\n           class=\"org.scalastyle.scalariform.SpacesBeforePlusChecker\"\n           enabled=\"true\"/>\n    <check level=\"error\" class=\"org.scalastyle.file.FileLineLengthChecker\"\n           enabled=\"true\">\n        <parameters>\n            <parameter name=\"maxLineLength\"><![CDATA[100]]></parameter>\n            <parameter name=\"tabSize\"><![CDATA[2]]></parameter>\n            <parameter name=\"ignoreImports\">true</parameter>\n        </parameters>\n    </check>\n    <check level=\"error\" class=\"org.scalastyle.scalariform.ClassNamesChecker\"\n           enabled=\"true\">\n        <parameters>\n            <parameter name=\"regex\"><![CDATA[[A-Z][A-Za-z]*]]></parameter>\n        </parameters>\n    </check>\n    <check level=\"error\" class=\"org.scalastyle.scalariform.ObjectNamesChecker\"\n           enabled=\"true\">\n        <parameters>\n            <parameter name=\"regex\"><![CDATA[[A-Z][A-Za-z]*]]></parameter>\n        </parameters>\n    </check>\n    <check level=\"error\"\n           class=\"org.scalastyle.scalariform.PackageObjectNamesChecker\"\n           enabled=\"true\">\n        <parameters>\n            <parameter name=\"regex\"><![CDATA[^[a-z_][a-z0-9]*$]]></parameter>\n        </parameters>\n    </check>\n    <check level=\"error\"\n           class=\"org.scalastyle.scalariform.ParameterNumberChecker\"\n           enabled=\"true\">\n        <parameters>\n            <parameter name=\"maxParameters\"><![CDATA[15]]></parameter>\n        </parameters>\n    </check>\n    <check level=\"error\" class=\"org.scalastyle.scalariform.UppercaseLChecker\"\n           enabled=\"true\"/>\n    <check level=\"error\" class=\"org.scalastyle.scalariform.IfBraceChecker\"\n           enabled=\"true\">\n        <parameters>\n            <parameter name=\"singleLineAllowed\"><![CDATA[true]]></parameter>\n            <parameter name=\"doubleLineAllowed\"><![CDATA[true]]></parameter>\n        </parameters>\n    </check>\n    <check level=\"error\" class=\"org.scalastyle.file.NewLineAtEofChecker\"\n           enabled=\"true\"/>\n    <check level=\"error\"\n           class=\"org.scalastyle.scalariform.SpaceAfterCommentStartChecker\"\n           enabled=\"true\"/>\n    <check enabled=\"true\"\n           class=\"org.scalastyle.scalariform.PublicMethodsHaveTypeChecker\"\n           level=\"error\"/>\n    <check level=\"error\"\n           class=\"org.scalastyle.file.WhitespaceEndOfLineChecker\"\n           enabled=\"true\"/>\n    <check level=\"error\"\n           class=\"org.scalastyle.file.FileTabChecker\"\n           enabled=\"true\"/>\n    <check level=\"error\"\n           class=\"org.scalastyle.scalariform.NonASCIICharacterChecker\"\n           enabled=\"true\"/>\n    <check level=\"error\"\n           class=\"org.scalastyle.scalariform.SpaceAfterCommentStartChecker\"\n           enabled=\"true\"/>\n    <check customId=\"NoJavaDoc\" class=\"org.scalastyle.file.RegexChecker\"\n           enabled=\"true\"\n           level=\"error\">\n        <parameters>\n            <parameter name=\"regex\">(?m)^(\\s*)/[*][*].*$(\\r|)\\n^\\1 [*]</parameter>\n        </parameters>\n        <customMessage>Use Scaladoc style indentation for multiline comments</customMessage>\n    </check>\n\n    <!-- Do not enable ScalaDocChecker below. This forces all the classes,-->\n    <!-- traits, methods, types and properties to have documentation which-->\n    <!-- is overwhelming.-->\n    <check level=\"error\"\n           class=\"org.scalastyle.scalariform.ScalaDocChecker\"\n           enabled=\"false\"/>\n</scalastyle>\n"
  },
  {
    "path": "storage/elasticsearch/.gitignore",
    "content": "/bin/\n"
  },
  {
    "path": "storage/elasticsearch/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport PIOBuild._\n\nname := \"apache-predictionio-data-elasticsearch\"\n\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\"  % version.value % \"provided\",\n  \"org.apache.spark\"        %% \"spark-core\"                % sparkVersion.value % \"provided\",\n  \"org.elasticsearch.client\" % \"elasticsearch-rest-client\" % elasticsearchVersion.value,\n  \"org.elasticsearch\"       %% \"elasticsearch-spark-20\"    % elasticsearchVersion.value\n    exclude(\"org.apache.spark\", \"*\"),\n  \"org.specs2\"              %% \"specs2\"                    % \"2.3.13\" % \"test\")\n\nparallelExecution in Test := false\n\npomExtra := childrenPomExtra.value\n\nassemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)\n\n// skip test in assembly\ntest in assembly := {}\n\nassemblyOutputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile /\n  \"assembly\" / \"src\" / \"universal\" / \"lib\" / \"spark\" /\n  s\"pio-data-elasticsearch-assembly-${version.value}.jar\"\n"
  },
  {
    "path": "storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.elasticsearch\n\nimport java.io.IOException\n\nimport scala.collection.JavaConverters.mapAsJavaMapConverter\n\nimport org.apache.http.entity.ContentType\nimport org.apache.http.nio.entity.NStringEntity\nimport org.apache.http.util.EntityUtils\nimport org.apache.predictionio.data.storage.AccessKey\nimport org.apache.predictionio.data.storage.AccessKeys\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.elasticsearch.client.{ResponseException, RestClient}\nimport org.json4s._\nimport org.json4s.JsonDSL._\nimport org.json4s.native.JsonMethods._\nimport org.json4s.native.Serialization.write\n\nimport grizzled.slf4j.Logging\n\n/** Elasticsearch implementation of AccessKeys. */\nclass ESAccessKeys(client: RestClient, config: StorageClientConfig, metadataName: String)\n    extends AccessKeys with Logging {\n  implicit val formats = DefaultFormats.lossless\n  private val metadataKey = \"accesskeys\"\n  private val index = metadataName + \"_\" + metadataKey\n  private val estype = {\n    val mappingJson =\n      (\"mappings\" ->\n        (\"properties\" ->\n          (\"key\" -> (\"type\" -> \"keyword\")) ~\n          (\"events\" -> (\"type\" -> \"keyword\"))))\n\n    ESUtils.createIndex(client, index, compact(render(mappingJson)))\n  }\n\n  def insert(accessKey: AccessKey): Option[String] = {\n    val key = if (accessKey.key.isEmpty) generateKey else accessKey.key\n    update(accessKey.copy(key = key))\n    Some(key)\n  }\n\n  def get(id: String): Option[AccessKey] = {\n    if (id.isEmpty) {\n      return None\n    }\n    try {\n      val response = client.performRequest(\n        \"GET\",\n        s\"/$index/$estype/$id\",\n        Map.empty[String, String].asJava)\n      val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n      (jsonResponse \\ \"found\").extract[Boolean] match {\n        case true =>\n          Some((jsonResponse \\ \"_source\").extract[AccessKey])\n        case _ =>\n          None\n      }\n    } catch {\n      case e: ResponseException =>\n        e.getResponse.getStatusLine.getStatusCode match {\n          case 404 => None\n          case _ =>\n            error(s\"Failed to access to /$index/$estype/$id\", e)\n            None\n        }\n      case e: IOException =>\n        error(s\"Failed to access to /$index/$estype/$id\", e)\n        None\n    }\n  }\n\n  def getAll(): Seq[AccessKey] = {\n    try {\n      val json =\n        (\"query\" ->\n          (\"match_all\" -> List.empty))\n      ESUtils.getAll[AccessKey](client, index, compact(render(json)))\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to access to /$index/_search\", e)\n        Nil\n    }\n  }\n\n  def getByAppid(appid: Int): Seq[AccessKey] = {\n    try {\n      val json =\n        (\"query\" ->\n          (\"term\" ->\n            (\"appid\" -> appid)))\n      ESUtils.getAll[AccessKey](client, index, compact(render(json)))\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to access to /$index/_search\", e)\n        Nil\n    }\n  }\n\n  def update(accessKey: AccessKey): Unit = {\n    val id = accessKey.key\n    try {\n      val entity = new NStringEntity(write(accessKey), ContentType.APPLICATION_JSON)\n      val response = client.performRequest(\n        \"PUT\",\n        s\"/$index/$estype/$id\",\n        Map(\"refresh\" -> \"true\").asJava,\n        entity)\n      val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n      val result = (jsonResponse \\ \"result\").extract[String]\n      result match {\n        case \"created\" =>\n        case \"updated\" =>\n        case _ =>\n          error(s\"[$result] Failed to update $index/$estype/$id\")\n      }\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to update $index/$estype/$id\", e)\n    }\n  }\n\n  def delete(id: String): Unit = {\n    try {\n      val response = client.performRequest(\n        \"DELETE\",\n        s\"/$index/$estype/$id\",\n        Map(\"refresh\" -> \"true\").asJava)\n      val json = parse(EntityUtils.toString(response.getEntity))\n      val result = (json \\ \"result\").extract[String]\n      result match {\n        case \"deleted\" =>\n        case _ =>\n          error(s\"[$result] Failed to delete $index/$estype/$id\")\n      }\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to delete $index/$estype/$id\", e)\n    }\n  }\n}\n"
  },
  {
    "path": "storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.elasticsearch\n\nimport java.io.IOException\n\nimport scala.collection.JavaConverters.mapAsJavaMapConverter\n\nimport org.apache.http.entity.ContentType\nimport org.apache.http.nio.entity.NStringEntity\nimport org.apache.http.util.EntityUtils\nimport org.apache.predictionio.data.storage.App\nimport org.apache.predictionio.data.storage.Apps\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.elasticsearch.client.{ResponseException, RestClient}\nimport org.json4s._\nimport org.json4s.JsonDSL._\nimport org.json4s.native.JsonMethods._\nimport org.json4s.native.Serialization.write\n\nimport grizzled.slf4j.Logging\n\n/** Elasticsearch implementation of Items. */\nclass ESApps(client: RestClient, config: StorageClientConfig, metadataName: String)\n    extends Apps with Logging {\n  implicit val formats = DefaultFormats.lossless\n  private val seq = new ESSequences(client, config, metadataName)\n  private val metadataKey = \"apps\"\n  private val index = metadataName + \"_\" + metadataKey\n  private val estype = {\n    val mappingJson =\n      (\"mappings\" ->\n        (\"properties\" ->\n          (\"id\" -> (\"type\" -> \"keyword\")) ~\n          (\"name\" -> (\"type\" -> \"keyword\"))))\n\n    ESUtils.createIndex(client, index, compact(render(mappingJson)))\n  }\n\n  def insert(app: App): Option[Int] = {\n    val id = app.id match {\n      case v if v == 0 =>\n        @scala.annotation.tailrec\n        def generateId: Int = {\n          seq.genNext(metadataKey).toInt match {\n            case x if !get(x).isEmpty => generateId\n            case x => x\n          }\n        }\n        generateId\n      case v => v\n    }\n    update(app.copy(id = id))\n    Some(id)\n  }\n\n  def get(id: Int): Option[App] = {\n    try {\n      val response = client.performRequest(\n        \"GET\",\n        s\"/$index/$estype/$id\",\n        Map.empty[String, String].asJava)\n      val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n      (jsonResponse \\ \"found\").extract[Boolean] match {\n        case true =>\n          Some((jsonResponse \\ \"_source\").extract[App])\n        case _ =>\n          None\n      }\n    } catch {\n      case e: ResponseException =>\n        e.getResponse.getStatusLine.getStatusCode match {\n          case 404 => None\n          case _ =>\n            error(s\"Failed to access to /$index/$estype/$id\", e)\n            None\n        }\n      case e: IOException =>\n        error(s\"Failed to access to /$index/$estype/$id\", e)\n        None\n    }\n  }\n\n  def getByName(name: String): Option[App] = {\n    try {\n      val json =\n        (\"query\" ->\n          (\"term\" ->\n            (\"name\" -> name)))\n      val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)\n      val response = client.performRequest(\n        \"POST\",\n        s\"/$index/_search\",\n        Map.empty[String, String].asJava,\n        entity)\n      val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n      val results = (jsonResponse \\ \"hits\" \\ \"hits\").extract[Seq[JValue]]\n      results.headOption.map { jv =>\n        (jv \\ \"_source\").extract[App]\n      }\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to access to /$index/_search\", e)\n        None\n    }\n  }\n\n  def getAll(): Seq[App] = {\n    try {\n      val json =\n        (\"query\" ->\n          (\"match_all\" -> Nil))\n      ESUtils.getAll[App](client, index, compact(render(json)))\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to access to /$index/_search\", e)\n        Nil\n    }\n  }\n\n  def update(app: App): Unit = {\n    val id = app.id.toString\n    try {\n      val entity = new NStringEntity(write(app), ContentType.APPLICATION_JSON)\n      val response = client.performRequest(\n        \"PUT\",\n        s\"/$index/$estype/$id\",\n        Map(\"refresh\" -> \"true\").asJava,\n        entity)\n      val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n      val result = (jsonResponse \\ \"result\").extract[String]\n      result match {\n        case \"created\" =>\n        case \"updated\" =>\n        case _ =>\n          error(s\"[$result] Failed to update $index/$estype/$id\")\n      }\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to update $index/$estype/$id\", e)\n    }\n  }\n\n  def delete(id: Int): Unit = {\n    try {\n      val response = client.performRequest(\n        \"DELETE\",\n        s\"/$index/$estype/$id\",\n        Map(\"refresh\" -> \"true\").asJava)\n      val json = parse(EntityUtils.toString(response.getEntity))\n      val result = (json \\ \"result\").extract[String]\n      result match {\n        case \"deleted\" =>\n        case _ =>\n          error(s\"[$result] Failed to delete $index/$estype/$id\")\n      }\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to delete $index/$estype/$id\", e)\n    }\n  }\n}\n"
  },
  {
    "path": "storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.elasticsearch\n\nimport java.io.IOException\n\nimport scala.collection.JavaConverters.mapAsJavaMapConverter\n\nimport org.apache.http.entity.ContentType\nimport org.apache.http.nio.entity.NStringEntity\nimport org.apache.http.util.EntityUtils\nimport org.apache.predictionio.data.storage.Channel\nimport org.apache.predictionio.data.storage.Channels\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.elasticsearch.client.{ResponseException, RestClient}\nimport org.json4s._\nimport org.json4s.JsonDSL._\nimport org.json4s.native.JsonMethods._\nimport org.json4s.native.Serialization.write\n\nimport grizzled.slf4j.Logging\n\nclass ESChannels(client: RestClient, config: StorageClientConfig, metadataName: String)\n    extends Channels with Logging {\n  implicit val formats = DefaultFormats.lossless\n  private val seq = new ESSequences(client, config, metadataName)\n  private val metadataKey = \"channels\"\n  private val index = metadataName + \"_\" + metadataKey\n  private val estype = {\n    val mappingJson =\n      (\"mappings\" ->\n        (\"properties\" ->\n          (\"name\" -> (\"type\" -> \"keyword\"))))\n\n    ESUtils.createIndex(client, index, compact(render(mappingJson)))\n  }\n\n  def insert(channel: Channel): Option[Int] = {\n    val id = channel.id match {\n      case v if v == 0 =>\n        @scala.annotation.tailrec\n        def generateId: Int = {\n          seq.genNext(metadataKey).toInt match {\n            case x if !get(x).isEmpty => generateId\n            case x => x\n          }\n        }\n        generateId\n      case v => v\n    }\n\n    if (update(channel.copy(id = id))) Some(id) else None\n  }\n\n  def get(id: Int): Option[Channel] = {\n    try {\n      val response = client.performRequest(\n        \"GET\",\n        s\"/$index/$estype/$id\",\n        Map.empty[String, String].asJava)\n      val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n      (jsonResponse \\ \"found\").extract[Boolean] match {\n        case true =>\n          Some((jsonResponse \\ \"_source\").extract[Channel])\n        case _ =>\n          None\n      }\n    } catch {\n      case e: ResponseException =>\n        e.getResponse.getStatusLine.getStatusCode match {\n          case 404 => None\n          case _ =>\n            error(s\"Failed to access to /$index/$estype/$id\", e)\n            None\n        }\n      case e: IOException =>\n        error(s\"Failed to access to /$index/$estype/$id\", e)\n        None\n    }\n  }\n\n  def getByAppid(appid: Int): Seq[Channel] = {\n    try {\n      val json =\n        (\"query\" ->\n          (\"term\" ->\n            (\"appid\" -> appid)))\n      ESUtils.getAll[Channel](client, index, compact(render(json)))\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to access to /$index/_search\", e)\n        Nil\n    }\n  }\n\n  def update(channel: Channel): Boolean = {\n    val id = channel.id.toString\n    try {\n      val entity = new NStringEntity(write(channel), ContentType.APPLICATION_JSON)\n      val response = client.performRequest(\n        \"PUT\",\n        s\"/$index/$estype/$id\",\n        Map(\"refresh\" -> \"true\").asJava,\n        entity)\n      val json = parse(EntityUtils.toString(response.getEntity))\n      val result = (json \\ \"result\").extract[String]\n      result match {\n        case \"created\" => true\n        case \"updated\" => true\n        case _ =>\n          error(s\"[$result] Failed to update $index/$estype/$id\")\n          false\n      }\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to update $index/$estype/$id\", e)\n        false\n    }\n  }\n\n  def delete(id: Int): Unit = {\n    try {\n      val response = client.performRequest(\n        \"DELETE\",\n        s\"/$index/$estype/$id\",\n        Map(\"refresh\" -> \"true\").asJava)\n      val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n      val result = (jsonResponse \\ \"result\").extract[String]\n      result match {\n        case \"deleted\" =>\n        case _ =>\n          error(s\"[$result] Failed to delete $index/$estype/$id\")\n      }\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to delete $index/$estype/$id\", e)\n    }\n  }\n}\n"
  },
  {
    "path": "storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.elasticsearch\n\nimport java.io.IOException\n\nimport scala.collection.JavaConverters.mapAsJavaMapConverter\n\nimport org.apache.http.entity.ContentType\nimport org.apache.http.nio.entity.NStringEntity\nimport org.apache.http.util.EntityUtils\nimport org.apache.predictionio.data.storage.EngineInstance\nimport org.apache.predictionio.data.storage.EngineInstanceSerializer\nimport org.apache.predictionio.data.storage.EngineInstances\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.elasticsearch.client.{ResponseException, RestClient}\nimport org.json4s._\nimport org.json4s.JsonDSL._\nimport org.json4s.native.JsonMethods._\nimport org.json4s.native.Serialization.write\n\nimport grizzled.slf4j.Logging\n\nclass ESEngineInstances(client: RestClient, config: StorageClientConfig, metadataName: String)\n    extends EngineInstances with Logging {\n  implicit val formats = DefaultFormats + new EngineInstanceSerializer\n  private val metadataKey = \"engine_instances\"\n  private val index = metadataName + \"_\" + metadataKey\n  private val estype = {\n    val mappingJson =\n      (\"mappings\" ->\n        (\"properties\" ->\n          (\"status\" -> (\"type\" -> \"keyword\")) ~\n          (\"startTime\" -> (\"type\" -> \"date\")) ~\n          (\"endTime\" -> (\"type\" -> \"date\")) ~\n          (\"engineId\" -> (\"type\" -> \"keyword\")) ~\n          (\"engineVersion\" -> (\"type\" -> \"keyword\")) ~\n          (\"engineVariant\" -> (\"type\" -> \"keyword\")) ~\n          (\"engineFactory\" -> (\"type\" -> \"keyword\")) ~\n          (\"batch\" -> (\"type\" -> \"keyword\")) ~\n          (\"dataSourceParams\" -> (\"type\" -> \"keyword\")) ~\n          (\"preparatorParams\" -> (\"type\" -> \"keyword\")) ~\n          (\"algorithmsParams\" -> (\"type\" -> \"keyword\")) ~\n          (\"servingParams\" -> (\"type\" -> \"keyword\"))\n        ))\n\n    ESUtils.createIndex(client, index, compact(render(mappingJson)))\n  }\n\n  def insert(i: EngineInstance): String = {\n    val id = i.id match {\n      case x if x.isEmpty =>\n        @scala.annotation.tailrec\n        def generateId(newId: Option[String]): String = {\n          newId match {\n            case Some(x) => x\n            case _ => generateId(preInsert())\n          }\n        }\n        generateId(preInsert())\n      case x => x\n    }\n\n    update(i.copy(id = id))\n    id\n  }\n\n  def preInsert(): Option[String] = {\n    try {\n      val entity = new NStringEntity(\"{}\", ContentType.APPLICATION_JSON)\n      val response = client.performRequest(\n        \"POST\",\n        s\"/$index/$estype\",\n        Map(\"refresh\" -> \"true\").asJava,\n        entity)\n      val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n      val result = (jsonResponse \\ \"result\").extract[String]\n      result match {\n        case \"created\" =>\n          Some((jsonResponse \\ \"_id\").extract[String])\n        case _ =>\n          error(s\"[$result] Failed to create $index/$estype\")\n          None\n      }\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to create $index/$estype\", e)\n        None\n    }\n  }\n\n  def get(id: String): Option[EngineInstance] = {\n    try {\n      val response = client.performRequest(\n        \"GET\",\n        s\"/$index/$estype/$id\",\n        Map.empty[String, String].asJava)\n      val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n      (jsonResponse \\ \"found\").extract[Boolean] match {\n        case true =>\n          Some((jsonResponse \\ \"_source\").extract[EngineInstance])\n        case _ =>\n          None\n      }\n    } catch {\n      case e: ResponseException =>\n        e.getResponse.getStatusLine.getStatusCode match {\n          case 404 => None\n          case _ =>\n            error(s\"Failed to access to /$index/$estype/$id\", e)\n            None\n        }\n      case e: IOException =>\n        error(s\"Failed to access to /$index/$estype/$id\", e)\n        None\n    }\n  }\n\n  def getAll(): Seq[EngineInstance] = {\n    try {\n      val json =\n        (\"query\" ->\n          (\"match_all\" -> List.empty))\n      ESUtils.getAll[EngineInstance](client, index, compact(render(json)))\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to access to /$index/_search\", e)\n        Nil\n    }\n  }\n\n  def getCompleted(\n    engineId: String,\n    engineVersion: String,\n    engineVariant: String): Seq[EngineInstance] = {\n    try {\n      val json =\n        (\"query\" ->\n          (\"bool\" ->\n            (\"must\" -> List(\n              (\"term\" ->\n                (\"status\" -> \"COMPLETED\")),\n              (\"term\" ->\n                (\"engineId\" -> engineId)),\n              (\"term\" ->\n                (\"engineVersion\" -> engineVersion)),\n              (\"term\" ->\n                (\"engineVariant\" -> engineVariant)))))) ~\n              (\"sort\" -> List(\n                (\"startTime\" ->\n                  (\"order\" -> \"desc\"))))\n      ESUtils.getAll[EngineInstance](client, index, compact(render(json)))\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to access to /$index/_search\", e)\n        Nil\n    }\n  }\n\n  def getLatestCompleted(\n    engineId: String,\n    engineVersion: String,\n    engineVariant: String): Option[EngineInstance] =\n    getCompleted(\n      engineId,\n      engineVersion,\n      engineVariant).headOption\n\n  def update(i: EngineInstance): Unit = {\n    val id = i.id\n    try {\n      val entity = new NStringEntity(write(i), ContentType.APPLICATION_JSON)\n      val response = client.performRequest(\n        \"PUT\",\n        s\"/$index/$estype/$id\",\n        Map(\"refresh\" -> \"true\").asJava,\n        entity)\n      val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n      val result = (jsonResponse \\ \"result\").extract[String]\n      result match {\n        case \"created\" =>\n        case \"updated\" =>\n        case _ =>\n          error(s\"[$result] Failed to update $index/$estype/$id\")\n      }\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to update $index/$estype/$id\", e)\n    }\n  }\n\n  def delete(id: String): Unit = {\n    try {\n      val response = client.performRequest(\n        \"DELETE\",\n        s\"/$index/$estype/$id\",\n        Map(\"refresh\" -> \"true\").asJava)\n      val json = parse(EntityUtils.toString(response.getEntity))\n      val result = (json \\ \"result\").extract[String]\n      result match {\n        case \"deleted\" =>\n        case _ =>\n          error(s\"[$result] Failed to delete $index/$estype/$id\")\n      }\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to delete $index/$estype/$id\", e)\n    }\n  }\n}\n"
  },
  {
    "path": "storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.elasticsearch\n\nimport java.io.IOException\n\nimport scala.collection.JavaConverters._\n\nimport org.apache.http.entity.ContentType\nimport org.apache.http.nio.entity.NStringEntity\nimport org.apache.http.util.EntityUtils\nimport org.apache.predictionio.data.storage.EvaluationInstance\nimport org.apache.predictionio.data.storage.EvaluationInstanceSerializer\nimport org.apache.predictionio.data.storage.EvaluationInstances\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.elasticsearch.client.{ResponseException, RestClient}\nimport org.json4s._\nimport org.json4s.JsonDSL._\nimport org.json4s.native.JsonMethods._\nimport org.json4s.native.Serialization.write\n\nimport grizzled.slf4j.Logging\n\nclass ESEvaluationInstances(client: RestClient, config: StorageClientConfig, metadataName: String)\n    extends EvaluationInstances with Logging {\n  implicit val formats = DefaultFormats + new EvaluationInstanceSerializer\n  private val seq = new ESSequences(client, config, metadataName)\n  private val metadataKey = \"evaluation_instances\"\n  private val index = metadataName + \"_\" + metadataKey\n  private val estype = {\n    val mappingJson =\n      (\"mappings\" ->\n        (\"properties\" ->\n          (\"status\" -> (\"type\" -> \"keyword\")) ~\n          (\"startTime\" -> (\"type\" -> \"date\")) ~\n          (\"endTime\" -> (\"type\" -> \"date\")) ~\n          (\"evaluationClass\" -> (\"type\" -> \"keyword\")) ~\n          (\"engineParamsGeneratorClass\" -> (\"type\" -> \"keyword\")) ~\n          (\"batch\" -> (\"type\" -> \"keyword\")) ~\n          (\"evaluatorResults\" -> (\"type\" -> \"text\")) ~\n          (\"evaluatorResultsHTML\" -> (\"enabled\" -> false)) ~\n          (\"evaluatorResultsJSON\" -> (\"enabled\" -> false))))\n\n    ESUtils.createIndex(client, index, compact(render(mappingJson)))\n  }\n\n  def insert(i: EvaluationInstance): String = {\n    val id = i.id match {\n      case v if v.isEmpty =>\n        @scala.annotation.tailrec\n        def generateId: String = {\n          seq.genNext(metadataKey).toString match {\n            case x if !get(x).isEmpty => generateId\n            case x => x\n          }\n        }\n        generateId\n      case v => v\n    }\n    update(i.copy(id = id))\n    id\n  }\n\n  def get(id: String): Option[EvaluationInstance] = {\n    try {\n      val response = client.performRequest(\n        \"GET\",\n        s\"/$index/$estype/$id\",\n        Map.empty[String, String].asJava)\n      val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n      (jsonResponse \\ \"found\").extract[Boolean] match {\n        case true =>\n          Some((jsonResponse \\ \"_source\").extract[EvaluationInstance])\n        case _ =>\n          None\n      }\n    } catch {\n      case e: ResponseException =>\n        e.getResponse.getStatusLine.getStatusCode match {\n          case 404 => None\n          case _ =>\n            error(s\"Failed to access to /$index/$estype/$id\", e)\n            None\n        }\n      case e: IOException =>\n        error(s\"Failed to access to /$index/$estype/$id\", e)\n        None\n    }\n  }\n\n  def getAll(): Seq[EvaluationInstance] = {\n    try {\n      val json =\n        (\"query\" ->\n          (\"match_all\" -> List.empty))\n      ESUtils.getAll[EvaluationInstance](client, index, compact(render(json)))\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to access to /$index/_search\", e)\n        Nil\n    }\n  }\n\n  def getCompleted(): Seq[EvaluationInstance] = {\n    try {\n      val json =\n        (\"query\" ->\n          (\"term\" ->\n            (\"status\" -> \"EVALCOMPLETED\"))) ~\n            (\"sort\" ->\n              (\"startTime\" ->\n                (\"order\" -> \"desc\")))\n      ESUtils.getAll[EvaluationInstance](client, index, compact(render(json)))\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to access to /$index/_search\", e)\n        Nil\n    }\n  }\n\n  def update(i: EvaluationInstance): Unit = {\n    val id = i.id\n    try {\n      val entity = new NStringEntity(write(i), ContentType.APPLICATION_JSON)\n      val response = client.performRequest(\n        \"PUT\",\n        s\"/$index/$estype/$id\",\n        Map(\"refresh\" -> \"true\").asJava,\n        entity)\n      val json = parse(EntityUtils.toString(response.getEntity))\n      val result = (json \\ \"result\").extract[String]\n      result match {\n        case \"created\" =>\n        case \"updated\" =>\n        case _ =>\n          error(s\"[$result] Failed to update $index/$estype/$id\")\n      }\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to update $index/$estype/$id\", e)\n    }\n  }\n\n  def delete(id: String): Unit = {\n    try {\n      val response = client.performRequest(\n        \"DELETE\",\n        s\"/$index/$estype/$id\",\n        Map(\"refresh\" -> \"true\").asJava)\n      val json = parse(EntityUtils.toString(response.getEntity))\n      val result = (json \\ \"result\").extract[String]\n      result match {\n        case \"deleted\" =>\n        case _ =>\n          error(s\"[$result] Failed to delete $index/$estype/$id\")\n      }\n    } catch {\n      case e: IOException =>\n        error(s\"Failed to delete $index/$estype/$id\", e)\n    }\n  }\n}\n"
  },
  {
    "path": "storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEventsUtil.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.elasticsearch\n\nimport java.net.NetworkInterface\nimport java.net.SocketException\nimport java.security.SecureRandom\nimport java.util.Base64\nimport java.util.concurrent.atomic.AtomicInteger\nimport java.util.concurrent.atomic.AtomicLong\n\nimport org.apache.hadoop.io.MapWritable\nimport org.apache.hadoop.io.Text\nimport org.apache.predictionio.data.storage.DataMap\nimport org.apache.predictionio.data.storage.Event\nimport org.joda.time.DateTime\nimport org.json4s._\nimport org.json4s.native.Serialization.read\nimport org.json4s.native.Serialization.write\n\n\nobject ESEventsUtil {\n\n  implicit val formats = DefaultFormats\n\n  def resultToEvent(id: Text, result: MapWritable, appId: Int): Event = {\n\n    def getStringCol(col: String): String = {\n      val r = result.get(new Text(col)).asInstanceOf[Text]\n      require(r != null,\n        s\"Failed to get value for column ${col}. \" +\n          s\"StringBinary: ${r.getBytes()}.\")\n\n      r.toString()\n    }\n\n    def getOptStringCol(col: String): Option[String] = {\n      result.get(new Text(col)) match {\n        case x if x.isInstanceOf[Text] => Some(x.asInstanceOf[Text].toString)\n        case _ => None\n      }\n    }\n\n    val properties: DataMap = getOptStringCol(\"properties\")\n      .map(s => DataMap(read[JObject](s))).getOrElse(DataMap())\n    val eventId = Some(getStringCol(\"eventId\"))\n    val event = getStringCol(\"event\")\n    val entityType = getStringCol(\"entityType\")\n    val entityId = getStringCol(\"entityId\")\n    val targetEntityType = getOptStringCol(\"targetEntityType\")\n    val targetEntityId = getOptStringCol(\"targetEntityId\")\n    val prId = getOptStringCol(\"prId\")\n    val eventTime: DateTime = ESUtils.parseUTCDateTime(getStringCol(\"eventTime\"))\n    val creationTime: DateTime = ESUtils.parseUTCDateTime(getStringCol(\"creationTime\"))\n\n    Event(\n      eventId = eventId,\n      event = event,\n      entityType = entityType,\n      entityId = entityId,\n      targetEntityType = targetEntityType,\n      targetEntityId = targetEntityId,\n      properties = properties,\n      eventTime = eventTime,\n      tags = Nil,\n      prId = prId,\n      creationTime = creationTime\n    )\n  }\n\n  def eventToPut(event: Event, appId: Int): Map[String, Any] = {\n    Map(\n      \"eventId\" -> event.eventId.getOrElse { getBase64UUID },\n      \"event\" -> event.event,\n      \"entityType\" -> event.entityType,\n      \"entityId\" -> event.entityId,\n      \"targetEntityType\" -> event.targetEntityType,\n      \"targetEntityId\" -> event.targetEntityId,\n      \"properties\" -> write(event.properties.toJObject),\n      \"eventTime\" -> ESUtils.formatUTCDateTime(event.eventTime),\n      \"tags\" -> event.tags,\n      \"prId\" -> event.prId,\n      \"creationTime\" -> ESUtils.formatUTCDateTime(event.creationTime)\n    )\n  }\n\n  val secureRandom: SecureRandom = new SecureRandom()\n\n  val sequenceNumber: AtomicInteger = new AtomicInteger(secureRandom.nextInt())\n\n  val lastTimestamp: AtomicLong = new AtomicLong(0)\n\n  val secureMungedAddress: Array[Byte] = {\n    val address = getMacAddress match {\n      case Some(x) => x\n      case None =>\n        val dummy: Array[Byte] = new Array[Byte](6)\n        secureRandom.nextBytes(dummy)\n        dummy(0) = (dummy(0) | 0x01.toByte).toByte\n        dummy\n    }\n\n    val mungedBytes: Array[Byte] = new Array[Byte](6)\n    secureRandom.nextBytes(mungedBytes)\n    for (i <- 0 until 6) {\n      mungedBytes(i) = (mungedBytes(i) ^ address(i)).toByte\n    }\n\n    mungedBytes\n  }\n\n  def getMacAddress(): Option[Array[Byte]] = {\n    try {\n      NetworkInterface.getNetworkInterfaces match {\n        case en if en == null => None\n        case en =>\n          new Iterator[NetworkInterface] {\n            def next = en.nextElement\n            def hasNext = en.hasMoreElements\n          }.foldLeft(None: Option[Array[Byte]])((x, y) =>\n            x match {\n              case None =>\n                y.isLoopback match {\n                  case true =>\n                    y.getHardwareAddress match {\n                      case address if isValidAddress(address) => Some(address)\n                      case _ => None\n                    }\n                  case false => None\n                }\n              case _ => x\n            })\n      }\n    } catch {\n      case e: SocketException => None\n    }\n  }\n\n  def isValidAddress(address: Array[Byte]): Boolean = {\n    address match {\n      case v if v == null || v.length != 6 => false\n      case v => v.exists(b => b != 0x00.toByte)\n    }\n  }\n\n  def putLong(array: Array[Byte], l: Long, pos: Int, numberOfLongBytes: Int): Unit = {\n    for (i <- 0 until numberOfLongBytes) {\n      array(pos + numberOfLongBytes - i - 1) = (l >>> (i * 8)).toByte\n    }\n  }\n\n  def getBase64UUID(): String = {\n    val sequenceId: Int = sequenceNumber.incrementAndGet & 0xffffff\n    val timestamp: Long = synchronized {\n      val t = Math.max(lastTimestamp.get, System.currentTimeMillis)\n      if (sequenceId == 0) {\n        lastTimestamp.set(t + 1)\n      } else {\n        lastTimestamp.set(t)\n      }\n      lastTimestamp.get\n    }\n\n    val uuidBytes: Array[Byte] = new Array[Byte](15)\n\n    putLong(uuidBytes, timestamp, 0, 6)\n    System.arraycopy(secureMungedAddress, 0, uuidBytes, 6, secureMungedAddress.length)\n    putLong(uuidBytes, sequenceId, 12, 3)\n\n    Base64.getUrlEncoder().withoutPadding().encodeToString(uuidBytes)\n  }\n}\n"
  },
  {
    "path": "storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.elasticsearch\n\nimport java.io.IOException\n\nimport scala.collection.JavaConverters._\nimport scala.concurrent.ExecutionContext\nimport scala.concurrent.Future\nimport org.apache.http.entity.{ContentType, StringEntity}\nimport org.apache.http.nio.entity.NStringEntity\nimport org.apache.http.util.EntityUtils\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.storage.LEvents\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.elasticsearch.client.RestClient\nimport org.joda.time.DateTime\nimport org.json4s._\nimport org.json4s.JsonDSL._\nimport org.json4s.native.JsonMethods._\nimport org.json4s.native.Serialization.write\nimport org.json4s.ext.JodaTimeSerializers\nimport grizzled.slf4j.Logging\nimport org.apache.http.message.BasicHeader\n\nclass ESLEvents(val client: RestClient, config: StorageClientConfig, val eventdataName: String)\n    extends LEvents with Logging {\n  implicit val formats = DefaultFormats.lossless ++ JodaTimeSerializers.all\n\n  def eventdataKey(appId: Int, channelId: Option[Int] = None): String = {\n    channelId.map { ch =>\n      s\"${appId}_${ch}\"\n    }.getOrElse {\n      s\"${appId}\"\n    }\n  }\n\n  override def init(appId: Int, channelId: Option[Int] = None): Boolean = {\n    val index = eventdataName + \"_\" + eventdataKey(appId, channelId)\n    val json =\n      (\"mappings\" ->\n        (\"properties\" ->\n          (\"name\" -> (\"type\" -> \"keyword\")) ~\n          (\"eventId\" -> (\"type\" -> \"keyword\")) ~\n          (\"event\" -> (\"type\" -> \"keyword\")) ~\n          (\"entityType\" -> (\"type\" -> \"keyword\")) ~\n          (\"entityId\" -> (\"type\" -> \"keyword\")) ~\n          (\"targetEntityType\" -> (\"type\" -> \"keyword\")) ~\n          (\"targetEntityId\" -> (\"type\" -> \"keyword\")) ~\n          (\"properties\" -> (\"enabled\" -> false)) ~\n          (\"eventTime\" -> (\"type\" -> \"date\")) ~\n          (\"tags\" -> (\"type\" -> \"keyword\")) ~\n          (\"prId\" -> (\"type\" -> \"keyword\")) ~\n          (\"creationTime\" -> (\"type\" -> \"date\"))))\n    ESUtils.createIndex(client, index, compact(render(json)))\n    true\n  }\n\n  override def remove(appId: Int, channelId: Option[Int] = None): Boolean = {\n    val index = eventdataName + \"_\" + eventdataKey(appId, channelId)\n    try {\n      client.performRequest(\n        \"DELETE\",\n        s\"/$index\",\n        Map.empty[String, String].asJava\n      ).getStatusLine.getStatusCode match {\n        case 200 => true\n        case _ =>\n          error(s\"Failed to remove $index\")\n          false\n      }\n    } catch {\n      case e: Exception =>\n        error(s\"Failed to remove $index\", e)\n        false\n    }\n  }\n\n  override def close(): Unit = {}\n\n  override def futureInsert(\n    event: Event,\n    appId: Int,\n    channelId: Option[Int])(implicit ec: ExecutionContext): Future[String] = {\n    Future {\n      val index = eventdataName + \"_\" + eventdataKey(appId, channelId)\n      val estype = ESUtils.esType(client, index)\n      try {\n        val id = event.eventId.getOrElse {\n          ESEventsUtil.getBase64UUID\n        }\n        val json =\n          (\"eventId\" -> id) ~\n          (\"event\" -> event.event) ~\n          (\"entityType\" -> event.entityType) ~\n          (\"entityId\" -> event.entityId) ~\n          (\"targetEntityType\" -> event.targetEntityType) ~\n          (\"targetEntityId\" -> event.targetEntityId) ~\n          (\"eventTime\" -> ESUtils.formatUTCDateTime(event.eventTime)) ~\n          (\"tags\" -> event.tags) ~\n          (\"prId\" -> event.prId) ~\n          (\"creationTime\" -> ESUtils.formatUTCDateTime(event.creationTime)) ~\n          (\"properties\" -> write(event.properties.toJObject))\n        val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)\n        val response = client.performRequest(\n          \"PUT\",\n          s\"/$index/$estype/$id\",\n          Map(\"refresh\" -> ESUtils.getEventDataRefresh(config)).asJava,\n          entity)\n        val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n        val result = (jsonResponse \\ \"result\").extract[String]\n        result match {\n          case \"created\" => id\n          case _ =>\n            error(s\"[$result] Failed to update $index/$estype/$id\")\n            \"\"\n        }\n      } catch {\n        case e: IOException =>\n          error(s\"Failed to update $index/$estype/<id>\", e)\n          \"\"\n      }\n    }\n  }\n\n  override def futureInsertBatch(\n    events: Seq[Event],\n    appId: Int,\n    channelId: Option[Int])(implicit ec: ExecutionContext): Future[Seq[String]] = {\n    Future {\n      val index = eventdataName + \"_\" + eventdataKey(appId, channelId)\n      val estype = ESUtils.esType(client, index)\n      try {\n        val ids = events.map { event =>\n          event.eventId.getOrElse(ESEventsUtil.getBase64UUID)\n        }\n\n        val json = events.zip(ids).map { case (event, id) =>\n          val commandJson =\n            (\"create\" -> (\n              (\"_index\" -> index) ~\n              (\"_type\" -> estype) ~\n              (\"_id\" -> id)\n            ))\n\n          val documentJson =\n            (\"eventId\" -> id) ~\n            (\"event\" -> event.event) ~\n            (\"entityType\" -> event.entityType) ~\n            (\"entityId\" -> event.entityId) ~\n            (\"targetEntityType\" -> event.targetEntityType) ~\n            (\"targetEntityId\" -> event.targetEntityId) ~\n            (\"eventTime\" -> ESUtils.formatUTCDateTime(event.eventTime)) ~\n            (\"tags\" -> event.tags) ~\n            (\"prId\" -> event.prId) ~\n            (\"creationTime\" -> ESUtils.formatUTCDateTime(event.creationTime)) ~\n            (\"properties\" -> write(event.properties.toJObject))\n\n          compact(render(commandJson)) + \"\\n\" + compact(render(documentJson))\n\n        }.mkString(\"\", \"\\n\", \"\\n\")\n\n        val entity = new StringEntity(json)\n        val response = client.performRequest(\n          \"POST\",\n          \"/_bulk\",\n          Map(\"refresh\" -> ESUtils.getEventDataRefresh(config)).asJava,\n          entity,\n          new BasicHeader(\"Content-Type\", \"application/x-ndjson\"))\n\n        val responseJson = parse(EntityUtils.toString(response.getEntity))\n        val items = (responseJson \\ \"items\").asInstanceOf[JArray]\n\n        items.arr.map { case value: JObject =>\n          val result = (value \\ \"create\" \\ \"result\").extract[String]\n          val id = (value \\ \"create\" \\ \"_id\").extract[String]\n\n          result match {\n            case \"created\" => id\n            case _ =>\n              error(s\"[$result] Failed to update $index/$estype/$id\")\n              \"\"\n          }\n        }\n      } catch {\n        case e: IOException =>\n          error(s\"Failed to update $index/$estype/<id>\", e)\n          Nil\n      }\n    }\n  }\n\n  override def futureGet(\n    eventId: String,\n    appId: Int,\n    channelId: Option[Int])(implicit ec: ExecutionContext): Future[Option[Event]] = {\n    Future {\n      val index = eventdataName + \"_\" + eventdataKey(appId, channelId)\n      try {\n        val json =\n          (\"query\" ->\n            (\"term\" ->\n              (\"eventId\" -> eventId)))\n        val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)\n        val response = client.performRequest(\n          \"POST\",\n          s\"/$index/_search\",\n          Map.empty[String, String].asJava,\n          entity)\n        val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n        val results = (jsonResponse \\ \"hits\" \\ \"hits\").extract[Seq[JValue]]\n        results.headOption.map { jv =>\n          (jv \\ \"_source\").extract[Event]\n        }\n      } catch {\n        case e: IOException =>\n          error(s\"Failed to access to /$index/_search\", e)\n          None\n      }\n    }\n  }\n\n  override def futureDelete(\n    eventId: String,\n    appId: Int,\n    channelId: Option[Int])(implicit ec: ExecutionContext): Future[Boolean] = {\n    Future {\n      val index = eventdataName + \"_\" + eventdataKey(appId, channelId)\n      try {\n        val json =\n          (\"query\" ->\n            (\"term\" ->\n              (\"eventId\" -> eventId)))\n        val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)\n        val response = client.performRequest(\n          \"POST\",\n          s\"/$index/_delete_by_query\",\n          Map(\"refresh\" -> ESUtils.getEventDataRefresh(config)).asJava,\n          entity)\n        val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n        (jsonResponse \\ \"deleted\").extract[Int] > 0\n      } catch {\n        case e: IOException =>\n          error(s\"Failed to delete $index:$eventId\", e)\n          false\n      }\n    }\n  }\n\n  override def futureFind(\n    appId: Int,\n    channelId: Option[Int] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    entityType: Option[String] = None,\n    entityId: Option[String] = None,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None,\n    limit: Option[Int] = None,\n    reversed: Option[Boolean] = None)\n    (implicit ec: ExecutionContext): Future[Iterator[Event]] = {\n    Future {\n      val index = eventdataName + \"_\" + eventdataKey(appId, channelId)\n      try {\n        val query = ESUtils.createEventQuery(\n          startTime, untilTime, entityType, entityId,\n          eventNames, targetEntityType, targetEntityId, reversed)\n        limit.getOrElse(20) match {\n          case -1 => ESUtils.getEventAll(client, index, query).toIterator\n          case size => ESUtils.getEvents(client, index, query, size).toIterator\n        }\n      } catch {\n        case e: IOException =>\n          error(e.getMessage)\n          Iterator.empty\n      }\n    }\n  }\n\n}\n"
  },
  {
    "path": "storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.elasticsearch\n\nimport scala.collection.JavaConverters._\n\nimport org.apache.hadoop.conf.Configuration\nimport org.apache.hadoop.io.MapWritable\nimport org.apache.hadoop.io.Text\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.storage.PEvents\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\nimport org.elasticsearch.client.RestClient\nimport org.elasticsearch.hadoop.mr.EsInputFormat\nimport org.elasticsearch.spark._\nimport org.joda.time.DateTime\nimport java.io.IOException\nimport org.apache.http.util.EntityUtils\nimport org.apache.http.nio.entity.NStringEntity\nimport org.apache.http.entity.ContentType\nimport org.json4s._\nimport org.json4s.JsonDSL._\nimport org.json4s.native.JsonMethods._\nimport org.json4s.ext.JodaTimeSerializers\n\n\nclass ESPEvents(client: RestClient, config: StorageClientConfig, eventdataName: String)\n    extends PEvents {\n  implicit val formats = DefaultFormats.lossless ++ JodaTimeSerializers.all\n\n  def eventdataKey(appId: Int, channelId: Option[Int] = None): String = {\n    channelId.map { ch =>\n      s\"${appId}_${ch}\"\n    }.getOrElse {\n      s\"${appId}\"\n    }\n  }\n\n  def getESNodes(): String = {\n    val hosts = config.properties.get(\"HOSTS\").\n      map(_.split(\",\").toSeq).getOrElse(Seq(\"localhost\"))\n    val ports = config.properties.get(\"PORTS\").\n      map(_.split(\",\").toSeq.map(_.toInt)).getOrElse(Seq(9200))\n    (hosts, ports).zipped.map(\n      (h, p) => s\"$h:$p\").mkString(\",\")\n  }\n\n  override def find(\n    appId: Int,\n    channelId: Option[Int] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    entityType: Option[String] = None,\n    entityId: Option[String] = None,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None)(sc: SparkContext): RDD[Event] = {\n\n    val query = ESUtils.createEventQuery(\n      startTime, untilTime, entityType, entityId,\n      eventNames, targetEntityType, targetEntityId, None)\n\n    val index = eventdataName + \"_\" + eventdataKey(appId, channelId)\n    val conf = new Configuration()\n    conf.set(\"es.resource\", index)\n    conf.set(\"es.query\", query)\n    conf.set(\"es.nodes\", getESNodes())\n\n    val rdd = sc.newAPIHadoopRDD(conf, classOf[EsInputFormat[Text, MapWritable]],\n      classOf[Text], classOf[MapWritable]).map {\n        case (key, doc) => {\n          ESEventsUtil.resultToEvent(key, doc, appId)\n        }\n      }\n\n    rdd\n  }\n\n  override def write(\n    events: RDD[Event],\n    appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {\n    val index = eventdataName + \"_\" + eventdataKey(appId, channelId)\n    val estype = ESUtils.esType(client, index)\n    val conf = Map(\"es.resource\" -> s\"$index/$estype\", \"es.nodes\" -> getESNodes())\n    events.map { event =>\n      ESEventsUtil.eventToPut(event, appId)\n    }.saveToEs(conf)\n  }\n\n  override def delete(\n    eventIds: RDD[String],\n    appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {\n    val index = eventdataName + \"_\" + eventdataKey(appId, channelId)\n      eventIds.foreachPartition { iter =>\n        iter.foreach { eventId =>\n          try {\n            val json =\n              (\"query\" ->\n                (\"term\" ->\n                  (\"eventId\" -> eventId)))\n            val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)\n            val response = client.performRequest(\n              \"POST\",\n              s\"/$index/_delete_by_query\",\n              Map(\"refresh\" -> ESUtils.getEventDataRefresh(config)).asJava,\n              entity)\n          val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n          if ((jsonResponse \\ \"deleted\").extract[Int] == 0) {\n            logger.warn(\"The number of documents that were successfully deleted is 0. \"\n              + s\"$index:$eventId\")\n          }\n        } catch {\n          case e: IOException =>\n            logger.error(s\"Failed to update $index:$eventId\", e)\n        }\n      }\n    }\n  }\n\n}\n"
  },
  {
    "path": "storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.elasticsearch\n\nimport java.io.IOException\n\nimport scala.collection.JavaConverters._\n\nimport org.apache.http.entity.ContentType\nimport org.apache.http.nio.entity.NStringEntity\nimport org.apache.http.util.EntityUtils\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.apache.predictionio.data.storage.StorageClientException\nimport org.elasticsearch.client.RestClient\nimport org.json4s._\nimport org.json4s.JsonDSL._\nimport org.json4s.native.JsonMethods._\nimport org.json4s.native.Serialization.write\n\nimport grizzled.slf4j.Logging\n\nclass ESSequences(client: RestClient, config: StorageClientConfig, metadataName: String) extends Logging {\n  implicit val formats = DefaultFormats\n  private val metadataKey = \"sequences\"\n  private val index = metadataName + \"_\" + metadataKey\n  private val estype = {\n    val mappingJson =\n      (\"mappings\" ->\n        (\"properties\" ->\n          (\"n\" -> (\"enabled\" -> false))))\n\n    ESUtils.createIndex(client, index, compact(render(mappingJson)))\n  }\n\n  def genNext(name: String): Long = {\n    try {\n      val entity = new NStringEntity(write(\"n\" -> name), ContentType.APPLICATION_JSON)\n      val response = client.performRequest(\n        \"PUT\",\n        s\"/$index/$estype/$name\",\n        Map(\"refresh\" -> \"false\").asJava,\n        entity)\n      val jsonResponse = parse(EntityUtils.toString(response.getEntity))\n      val result = (jsonResponse \\ \"result\").extract[String]\n      result match {\n        case \"created\" =>\n          (jsonResponse \\ \"_version\").extract[Long]\n        case \"updated\" =>\n          (jsonResponse \\ \"_version\").extract[Long]\n        case _ =>\n          throw new IllegalStateException(s\"[$result] Failed to update $index/$estype/$name\")\n      }\n    } catch {\n      case e: IOException =>\n        throw new StorageClientException(s\"Failed to update $index/$estype/$name\", e)\n    }\n  }\n}\n"
  },
  {
    "path": "storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.elasticsearch\n\nimport scala.collection.JavaConversions._\n\nimport org.apache.http.entity.ContentType\nimport org.apache.http.nio.entity.NStringEntity\nimport org.elasticsearch.client.RestClient\nimport org.json4s._\nimport org.json4s.JsonDSL._\nimport org.json4s.native.JsonMethods._\nimport org.json4s.native.Serialization.read\nimport org.apache.http.util.EntityUtils\nimport org.joda.time.DateTime\nimport org.joda.time.format.DateTimeFormat\nimport org.joda.time.DateTimeZone\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.apache.http.HttpHost\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.storage.DataMap\n\nobject ESUtils {\n  val scrollLife = \"1m\"\n\n  def toEvent(value: JValue)(\n    implicit formats: Formats): Event = {\n    def getString(s: String): String = {\n      (value \\ s) match {\n        case JNothing => null\n        case x => x.extract[String]\n      }\n    }\n\n    def getOptString(s: String): Option[String] = {\n      Option(getString(s))\n    }\n\n    val properties: DataMap = getOptString(\"properties\")\n      .map(s => DataMap(read[JObject](s))).getOrElse(DataMap())\n    val eventId = getOptString(\"eventId\")\n    val event = getString(\"event\")\n    val entityType = getString(\"entityType\")\n    val entityId = getString(\"entityId\")\n    val targetEntityType = getOptString(\"targetEntityType\")\n    val targetEntityId = getOptString(\"targetEntityId\")\n    val prId = getOptString(\"prId\")\n    val eventTime: DateTime = ESUtils.parseUTCDateTime(getString(\"eventTime\"))\n    val creationTime: DateTime = ESUtils.parseUTCDateTime(getString(\"creationTime\"))\n    val tags = (value \\ \"tags\").extract[Seq[String]]\n\n    Event(\n      eventId = eventId,\n      event = event,\n      entityType = entityType,\n      entityId = entityId,\n      targetEntityType = targetEntityType,\n      targetEntityId = targetEntityId,\n      properties = properties,\n      eventTime = eventTime,\n      tags = tags,\n      prId = prId,\n      creationTime = creationTime)\n  }\n\n  def getEvents(\n    client: RestClient,\n    index: String,\n    query: String,\n    size: Int)(\n      implicit formats: Formats): Seq[Event] = {\n    getDocList(client, index, query, size).map(x => toEvent(x))\n  }\n\n  def getDocList(\n    client: RestClient,\n    index: String,\n    query: String,\n    size: Int)(\n      implicit formats: Formats): Seq[JValue] = {\n    val entity = new NStringEntity(query, ContentType.APPLICATION_JSON)\n    val response = client.performRequest(\n      \"POST\",\n      s\"/$index/_search\",\n      Map(\"size\" -> s\"${size}\"),\n      entity)\n    val responseJValue = parse(EntityUtils.toString(response.getEntity))\n    val hits = (responseJValue \\ \"hits\" \\ \"hits\").extract[Seq[JValue]]\n    hits.map(h => (h \\ \"_source\"))\n  }\n\n  def getAll[T: Manifest](\n    client: RestClient,\n    index: String,\n    query: String)(\n      implicit formats: Formats): Seq[T] = {\n    getDocAll(client, index, query).map(x => x.extract[T])\n  }\n\n  def getEventAll(\n    client: RestClient,\n    index: String,\n    query: String)(\n      implicit formats: Formats): Seq[Event] = {\n    getDocAll(client, index, query).map(x => toEvent(x))\n  }\n\n  def getDocAll(\n    client: RestClient,\n    index: String,\n    query: String)(\n      implicit formats: Formats): Seq[JValue] = {\n\n    @scala.annotation.tailrec\n    def scroll(scrollId: String, hits: Seq[JValue], results: Seq[JValue]): Seq[JValue] = {\n      if (hits.isEmpty) results\n      else {\n        val json = (\"scroll\" -> scrollLife) ~ (\"scroll_id\" -> scrollId)\n        val scrollBody = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON)\n        val response = client.performRequest(\n          \"POST\",\n          \"/_search/scroll\",\n          Map[String, String](),\n          scrollBody)\n        val responseJValue = parse(EntityUtils.toString(response.getEntity))\n        scroll((responseJValue \\ \"_scroll_id\").extract[String],\n          (responseJValue \\ \"hits\" \\ \"hits\").extract[Seq[JValue]],\n          results ++ hits.map(h => (h \\ \"_source\").extract[JValue]))\n      }\n    }\n\n    val entity = new NStringEntity(query, ContentType.APPLICATION_JSON)\n    val response = client.performRequest(\n      \"POST\",\n      s\"/$index/_search\",\n      Map(\"scroll\" -> scrollLife),\n      entity)\n    val responseJValue = parse(EntityUtils.toString(response.getEntity))\n    scroll((responseJValue \\ \"_scroll_id\").extract[String],\n      (responseJValue \\ \"hits\" \\ \"hits\").extract[Seq[JValue]],\n      Nil)\n  }\n\n  def createIndex(\n    client: RestClient,\n    index: String,\n    json: String)(\n      implicit formats: Formats): String = {\n    client.performRequest(\n      \"HEAD\",\n      s\"/$index\",\n      Map(\"include_type_name\" -> \"false\")).getStatusLine.getStatusCode match {\n        case 404 =>\n          val entity = new NStringEntity(json, ContentType.APPLICATION_JSON)\n          client.performRequest(\n            \"PUT\",\n            s\"/$index\",\n            Map(\"include_type_name\" -> \"false\"),\n            entity).getStatusLine.getStatusCode match {\n              case 200 =>\n                \"_doc\"\n              case _ =>\n                throw new IllegalStateException(s\"/$index is invalid: $json\")\n            }\n        case 200 =>\n          esType(client, index)\n        case _ =>\n          throw new IllegalStateException(s\"/$index is invalid: $json\")\n      }\n  }\n\n  // We cannot have several types within a single index as of ES 6.0, so\n  // continue to add or update a document under the current type. This code is\n  // a step towards ES 7.0 support (removal of mapping types).\n  def esType(\n    client: RestClient,\n    index: String)(\n      implicit formats: Formats): String = {\n    val response = client.performRequest(\n      \"GET\",\n      s\"/$index\",\n      Map(\"include_type_name\" -> \"true\"))\n    response.getStatusLine.getStatusCode match {\n      case 200 =>\n        (parse(EntityUtils.toString(response.getEntity)) \\ index \\ \"mappings\")\n          .extract[JObject].values.collectFirst {\n          case (name, _) if name != \"_doc\" && name != \"properties\" => name\n        }.getOrElse(\"_doc\")\n      case _ =>\n        throw new IllegalStateException(s\"/$index is invalid.\")\n    }\n  }\n\n  def formatUTCDateTime(dt: DateTime): String = {\n    DateTimeFormat\n      .forPattern(\"yyyy-MM-dd'T'HH:mm:ss.SSSZ\").print(dt.withZone(DateTimeZone.UTC))\n  }\n\n  def parseUTCDateTime(str: String): DateTime = {\n    DateTimeFormat\n      .forPattern(\"yyyy-MM-dd'T'HH:mm:ss.SSSZ\").parseDateTime(str)\n  }\n\n  def createEventQuery(\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    entityType: Option[String] = None,\n    entityId: Option[String] = None,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None,\n    reversed: Option[Boolean] = None): String = {\n    val mustQueries = Seq(\n      startTime.map { x =>\n        val v = formatUTCDateTime(x)\n        s\"\"\"{\"range\":{\"eventTime\":{\"gte\":\"${v}\"}}}\"\"\"\n      },\n      untilTime.map { x =>\n        val v = formatUTCDateTime(x)\n        s\"\"\"{\"range\":{\"eventTime\":{\"lt\":\"${v}\"}}}\"\"\"\n      },\n      entityType.map(x => s\"\"\"{\"term\":{\"entityType\":\"${x}\"}}\"\"\"),\n      entityId.map(x => s\"\"\"{\"term\":{\"entityId\":\"${x}\"}}\"\"\"),\n      targetEntityType.flatMap(xx => xx.map(x => s\"\"\"{\"term\":{\"targetEntityType\":\"${x}\"}}\"\"\")),\n      targetEntityId.flatMap(xx => xx.map(x => s\"\"\"{\"term\":{\"targetEntityId\":\"${x}\"}}\"\"\")),\n      eventNames\n        .map { xx => xx.map(x => \"\\\"%s\\\"\".format(x)) }\n        .map(x => s\"\"\"{\"terms\":{\"event\":[${x.mkString(\",\")}]}}\"\"\")).flatten.mkString(\",\")\n    val query = mustQueries.isEmpty match {\n      case true => \"\"\"query\":{\"match_all\":{}}\"\"\"\n      case _ => s\"\"\"query\":{\"bool\":{\"must\":[${mustQueries}]}}\"\"\"\n    }\n    val sortOrder = reversed.map(x => x match {\n      case true => \"desc\"\n      case _ => \"asc\"\n    }).getOrElse(\"asc\")\n    s\"\"\"{\n       |\"${query},\n       |\"sort\":[{\"eventTime\":{\"order\":\"${sortOrder}\"}}]\n       |}\"\"\".stripMargin\n  }\n\n  def getHttpHosts(config: StorageClientConfig): Seq[HttpHost] = {\n    val hosts = config.properties.get(\"HOSTS\").\n      map(_.split(\",\").toSeq).getOrElse(Seq(\"localhost\"))\n    val ports = config.properties.get(\"PORTS\").\n      map(_.split(\",\").toSeq.map(_.toInt)).getOrElse(Seq(9200))\n    val schemes = config.properties.get(\"SCHEMES\").\n      map(_.split(\",\").toSeq).getOrElse(Seq(\"http\"))\n    (hosts, ports, schemes).zipped.map((h, p, s) => new HttpHost(h, p, s))\n  }\n\n  def getEventDataRefresh(config: StorageClientConfig): String = {\n    config.properties.getOrElse(\"EVENTDATA_REFRESH\", \"true\")\n  }\n}\n"
  },
  {
    "path": "storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.elasticsearch\n\nimport org.apache.http.HttpHost\nimport org.apache.http.auth.{AuthScope, UsernamePasswordCredentials}\nimport org.apache.http.impl.client.BasicCredentialsProvider\nimport org.apache.http.impl.nio.client.HttpAsyncClientBuilder\nimport org.apache.predictionio.data.storage.BaseStorageClient\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.apache.predictionio.data.storage.StorageClientException\nimport org.apache.predictionio.workflow.CleanupFunctions\nimport org.elasticsearch.client.RestClient\nimport org.elasticsearch.client.RestClientBuilder.HttpClientConfigCallback\n\nimport grizzled.slf4j.Logging\n\nobject ESClient extends Logging {\n  private var _sharedRestClient: Option[RestClient] = None\n\n  def open(\n    hosts: Seq[HttpHost],\n    basicAuth: Option[(String, String)] = None): RestClient = {\n    try {\n      val newClient = _sharedRestClient match {\n        case Some(c)  => c\n        case None     => {\n          var builder = RestClient.builder(hosts: _*)\n          builder = basicAuth match {\n            case Some((username, password)) => builder.setHttpClientConfigCallback(\n              new BasicAuthProvider(username, password))\n            case None                       => builder}\n          builder.build()\n        }\n      }\n      _sharedRestClient = Some(newClient)\n      newClient\n    } catch {\n      case e: Throwable =>\n        throw new StorageClientException(e.getMessage, e)\n    }\n  }\n\n  def close(): Unit = {\n    _sharedRestClient.foreach { client =>\n      client.close()\n      _sharedRestClient = None\n    }\n  }\n}\n\nclass StorageClient(val config: StorageClientConfig)\n  extends BaseStorageClient with Logging {\n\n  override val prefix = \"ES\"\n\n  val usernamePassword = (\n    config.properties.get(\"USERNAME\"),\n    config.properties.get(\"PASSWORD\"))\n  val optionalBasicAuth: Option[(String, String)] = usernamePassword match {\n    case (None, None)         => None\n    case (username, password) => Some(\n      (username.getOrElse(\"\"), password.getOrElse(\"\")))\n  }\n\n  CleanupFunctions.add { ESClient.close }\n\n  val client = ESClient.open(ESUtils.getHttpHosts(config), optionalBasicAuth)\n}\n\nclass BasicAuthProvider(\n    val username: String,\n    val password: String)\n  extends HttpClientConfigCallback {\n\n  val credentialsProvider = new BasicCredentialsProvider()\n  credentialsProvider.setCredentials(\n    AuthScope.ANY,\n    new UsernamePasswordCredentials(username, password))\n\n  override def customizeHttpClient(\n    httpClientBuilder: HttpAsyncClientBuilder\n  ): HttpAsyncClientBuilder = {\n    httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)\n  }\n}\n"
  },
  {
    "path": "storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\n/** Elasticsearch implementation of storage traits, supporting meta data only\n  *\n  * @group Implementation\n  */\npackage elasticsearch {}\n"
  },
  {
    "path": "storage/elasticsearch/src/test/resources/application.conf",
    "content": "org.apache.predictionio.data.storage {\n  sources {\n    mongodb {\n      type = mongodb\n      hosts = [localhost]\n      ports = [27017]\n    }\n    elasticsearch {\n      type = elasticsearch\n      hosts = [localhost]\n      ports = [9300]\n    }\n  }\n  repositories {\n    # This section is dummy just to make storage happy.\n    # The actual testing will not bypass these repository settings completely.\n    # Please refer to StorageTestUtils.scala.\n    settings {\n      name = \"test_predictionio\"\n      source = mongodb\n    }\n\n    appdata {\n      name = \"test_predictionio_appdata\"\n      source = mongodb\n    }\n  }\n}\n"
  },
  {
    "path": "storage/elasticsearch/src/test/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClientSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.elasticsearch\n\nimport org.apache.predictionio.data.storage.{App, Apps, Storage, StorageClientConfig}\nimport org.elasticsearch.client.{RestClient, Response}\nimport scala.collection.JavaConverters._\n\nimport org.specs2._\nimport org.specs2.specification.Step\n\nclass ElasticsearchStorageClientSpec extends Specification {\n  def is = s2\"\"\"\n\n  PredictionIO Storage Elasticsearch REST Client Specification ${getESClient}\n\n  \"\"\"\n\n  def getESClient = sequential ^ s2\"\"\"\n\n    StorageClient should\n    - initialize metadata store ${initMetadataStore(appsDO)}\n\n  \"\"\"\n\n  def initMetadataStore(appsDO: Apps) = sequential ^ s2\"\"\"\n\n    creates an app ${createsApp(appsDO)}\n    gets apps ${getApps(appsDO)}\n\n  \"\"\"\n\n  val indexName = \"test_pio_storage_meta_\" + hashCode\n\n  def appsDO: Apps = Storage.getDataObject[Apps](StorageTestUtils.elasticsearchSourceName, indexName)\n\n  def createsApp(appsDO: Apps) = {\n    val newId: Int = 123\n    val newApp: App = App(newId, \"test1\", Some(\"App for ElasticsearchStorageClientSpec\"))\n    val id: Option[Int] = appsDO.insert(newApp)\n    val createdApp: Option[App] = appsDO.get(id.get)\n    createdApp.get.id mustEqual newId\n  }\n\n  def getApps(appsDO: Apps) = {\n    val apps: Seq[App] = appsDO.getAll()\n    println(s\"Storage.config ${Storage.config}\")\n    println(s\"getApps ${apps}\")\n    apps must beAnInstanceOf[Seq[App]]\n  }\n}"
  },
  {
    "path": "storage/elasticsearch/src/test/scala/org/apache/predictionio/data/storage/elasticsearch/StorageTestUtils.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.elasticsearch\n\nobject StorageTestUtils {\n  val elasticsearchSourceName = \"ELASTICSEARCH\"\n\n  def dropESIndex(namespace: String): Unit = {\n    // TODO\n  }\n\n}\n"
  },
  {
    "path": "storage/hbase/.gitignore",
    "content": "/bin/\n"
  },
  {
    "path": "storage/hbase/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport PIOBuild._\n\nname := \"apache-predictionio-data-hbase\"\n\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % version.value % \"provided\",\n  \"org.apache.spark\"        %% \"spark-core\"     % sparkVersion.value % \"provided\",\n  \"org.apache.hbase\"         % \"hbase-common\"   % hbaseVersion.value,\n  \"org.apache.hbase\"         % \"hbase-client\"   % hbaseVersion.value\n    exclude(\"org.apache.zookeeper\", \"zookeeper\"),\n  // added for Parallel storage interface\n  \"org.apache.hbase\"         % \"hbase-server\"   % hbaseVersion.value\n    exclude(\"org.apache.hbase\", \"hbase-client\")\n    exclude(\"org.apache.zookeeper\", \"zookeeper\")\n    exclude(\"javax.servlet\", \"servlet-api\")\n    exclude(\"org.mortbay.jetty\", \"servlet-api-2.5\")\n    exclude(\"org.mortbay.jetty\", \"jsp-api-2.1\")\n    exclude(\"org.mortbay.jetty\", \"jsp-2.1\"),\n  \"org.specs2\"              %% \"specs2\"         % \"2.3.13\" % \"test\")\n\nparallelExecution in Test := false\n\npomExtra := childrenPomExtra.value\n\nassemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)\n\n// skip test in assembly\ntest in assembly := {}\n\nassemblyOutputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile /\n  \"assembly\" / \"src\" / \"universal\" / \"lib\" / \"spark\" /\n  s\"pio-data-hbase-assembly-${version.value}.jar\"\n"
  },
  {
    "path": "storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBEventsUtil.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.hbase\n\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.storage.EventValidation\nimport org.apache.predictionio.data.storage.DataMap\n\nimport org.apache.hadoop.hbase.client.Result\nimport org.apache.hadoop.hbase.client.Put\nimport org.apache.hadoop.hbase.client.Scan\nimport org.apache.hadoop.hbase.util.Bytes\nimport org.apache.hadoop.hbase.filter.FilterList\nimport org.apache.hadoop.hbase.filter.SingleColumnValueFilter\nimport org.apache.hadoop.hbase.filter.CompareFilter.CompareOp\nimport org.apache.hadoop.hbase.filter.BinaryComparator\nimport org.apache.hadoop.hbase.filter.QualifierFilter\nimport org.apache.hadoop.hbase.filter.SkipFilter\n\nimport org.json4s.DefaultFormats\nimport org.json4s.JObject\nimport org.json4s.native.Serialization.{ read, write }\n\nimport org.joda.time.DateTime\nimport org.joda.time.DateTimeZone\n\nimport org.apache.commons.codec.binary.Base64\nimport java.security.MessageDigest\n\nimport java.util.UUID\n\n/* common utility function for accessing EventsStore in HBase */\nobject HBEventsUtil {\n\n  implicit val formats = DefaultFormats\n\n  def tableName(namespace: String, appId: Int, channelId: Option[Int] = None): String = {\n    channelId.map { ch =>\n      s\"${namespace}:events_${appId}_${ch}\"\n    }.getOrElse {\n      s\"${namespace}:events_${appId}\"\n    }\n  }\n\n  // column names for \"e\" column family\n  val colNames: Map[String, Array[Byte]] = Map(\n    \"event\" -> \"e\",\n    \"entityType\" -> \"ety\",\n    \"entityId\" -> \"eid\",\n    \"targetEntityType\" -> \"tety\",\n    \"targetEntityId\" -> \"teid\",\n    \"properties\" -> \"p\",\n    \"prId\" -> \"prid\",\n    \"eventTime\" -> \"et\",\n    \"eventTimeZone\" -> \"etz\",\n    \"creationTime\" -> \"ct\",\n    \"creationTimeZone\" -> \"ctz\"\n  ).mapValues(Bytes.toBytes(_))\n\n  def hash(entityType: String, entityId: String): Array[Byte] = {\n    val s = entityType + \"-\" + entityId\n    // get a new MessageDigest object each time for thread-safe\n    val md5 = MessageDigest.getInstance(\"MD5\")\n    md5.digest(Bytes.toBytes(s))\n  }\n\n  class RowKey(\n    val b: Array[Byte]\n  ) {\n    require((b.size == 32), s\"Incorrect b size: ${b.size}\")\n    lazy val entityHash: Array[Byte] = b.slice(0, 16)\n    lazy val millis: Long = Bytes.toLong(b.slice(16, 24))\n    lazy val uuidLow: Long = Bytes.toLong(b.slice(24, 32))\n\n    lazy val toBytes: Array[Byte] = b\n\n    override def toString: String = {\n      Base64.encodeBase64URLSafeString(toBytes)\n    }\n  }\n\n  object RowKey {\n    def apply(\n      entityType: String,\n      entityId: String,\n      millis: Long,\n      uuidLow: Long): RowKey = {\n        // add UUID least significant bits for multiple actions at the same time\n        // (UUID's most significant bits are actually timestamp,\n        // use eventTime instead).\n        val b = hash(entityType, entityId) ++\n          Bytes.toBytes(millis) ++ Bytes.toBytes(uuidLow)\n        new RowKey(b)\n      }\n\n    // get RowKey from string representation\n    def apply(s: String): RowKey = {\n      try {\n        apply(Base64.decodeBase64(s))\n      } catch {\n        case e: Exception => throw new RowKeyException(\n          s\"Failed to convert String ${s} to RowKey because ${e}\", e)\n      }\n    }\n\n    def apply(b: Array[Byte]): RowKey = {\n      if (b.size != 32) {\n        val bString = b.mkString(\",\")\n        throw new RowKeyException(\n          s\"Incorrect byte array size. Bytes: ${bString}.\")\n      }\n      new RowKey(b)\n    }\n\n  }\n\n  class RowKeyException(val msg: String, val cause: Exception)\n    extends Exception(msg, cause) {\n      def this(msg: String) = this(msg, null)\n    }\n\n  case class PartialRowKey(entityType: String, entityId: String,\n    millis: Option[Long] = None) {\n    val toBytes: Array[Byte] = {\n      hash(entityType, entityId) ++\n        (millis.map(Bytes.toBytes(_)).getOrElse(Array[Byte]()))\n    }\n  }\n\n  def eventToPut(event: Event, appId: Int): (Put, RowKey) = {\n    // generate new rowKey if eventId is None\n    val rowKey = event.eventId.map { id =>\n      RowKey(id) // create rowKey from eventId\n    }.getOrElse {\n      // TODO: use real UUID. not pseudo random\n      val uuidLow: Long = UUID.randomUUID().getLeastSignificantBits\n      RowKey(\n        entityType = event.entityType,\n        entityId = event.entityId,\n        millis = event.eventTime.getMillis,\n        uuidLow = uuidLow\n      )\n    }\n\n    val eBytes = Bytes.toBytes(\"e\")\n    // use eventTime as HBase's cell timestamp\n    val put = new Put(rowKey.toBytes, event.eventTime.getMillis)\n\n    def addStringToE(col: Array[Byte], v: String): Put = {\n      put.add(eBytes, col, Bytes.toBytes(v))\n    }\n\n    def addLongToE(col: Array[Byte], v: Long): Put = {\n      put.add(eBytes, col, Bytes.toBytes(v))\n    }\n\n    addStringToE(colNames(\"event\"), event.event)\n    addStringToE(colNames(\"entityType\"), event.entityType)\n    addStringToE(colNames(\"entityId\"), event.entityId)\n\n    event.targetEntityType.foreach { targetEntityType =>\n      addStringToE(colNames(\"targetEntityType\"), targetEntityType)\n    }\n\n    event.targetEntityId.foreach { targetEntityId =>\n      addStringToE(colNames(\"targetEntityId\"), targetEntityId)\n    }\n\n    // TODO: make properties Option[]\n    if (!event.properties.isEmpty) {\n      addStringToE(colNames(\"properties\"), write(event.properties.toJObject))\n    }\n\n    event.prId.foreach { prId =>\n      addStringToE(colNames(\"prId\"), prId)\n    }\n\n    addLongToE(colNames(\"eventTime\"), event.eventTime.getMillis)\n    val eventTimeZone = event.eventTime.getZone\n    if (!eventTimeZone.equals(EventValidation.defaultTimeZone)) {\n      addStringToE(colNames(\"eventTimeZone\"), eventTimeZone.getID)\n    }\n\n    addLongToE(colNames(\"creationTime\"), event.creationTime.getMillis)\n    val creationTimeZone = event.creationTime.getZone\n    if (!creationTimeZone.equals(EventValidation.defaultTimeZone)) {\n      addStringToE(colNames(\"creationTimeZone\"), creationTimeZone.getID)\n    }\n\n    // can use zero-length byte array for tag cell value\n    (put, rowKey)\n  }\n\n  def resultToEvent(result: Result, appId: Int): Event = {\n    val rowKey = RowKey(result.getRow())\n\n    val eBytes = Bytes.toBytes(\"e\")\n    // val e = result.getFamilyMap(eBytes)\n\n    def getStringCol(col: String): String = {\n      val r = result.getValue(eBytes, colNames(col))\n      require(r != null,\n        s\"Failed to get value for column ${col}. \" +\n        s\"Rowkey: ${rowKey.toString} \" +\n        s\"StringBinary: ${Bytes.toStringBinary(result.getRow())}.\")\n\n      Bytes.toString(r)\n    }\n\n    def getLongCol(col: String): Long = {\n      val r = result.getValue(eBytes, colNames(col))\n      require(r != null,\n        s\"Failed to get value for column ${col}. \" +\n        s\"Rowkey: ${rowKey.toString} \" +\n        s\"StringBinary: ${Bytes.toStringBinary(result.getRow())}.\")\n\n      Bytes.toLong(r)\n    }\n\n    def getOptStringCol(col: String): Option[String] = {\n      val r = result.getValue(eBytes, colNames(col))\n      if (r == null) {\n        None\n      } else {\n        Some(Bytes.toString(r))\n      }\n    }\n\n    def getTimestamp(col: String): Long = {\n      result.getColumnLatestCell(eBytes, colNames(col)).getTimestamp()\n    }\n\n    val event = getStringCol(\"event\")\n    val entityType = getStringCol(\"entityType\")\n    val entityId = getStringCol(\"entityId\")\n    val targetEntityType = getOptStringCol(\"targetEntityType\")\n    val targetEntityId = getOptStringCol(\"targetEntityId\")\n    val properties: DataMap = getOptStringCol(\"properties\")\n      .map(s => DataMap(read[JObject](s))).getOrElse(DataMap())\n    val prId = getOptStringCol(\"prId\")\n    val eventTimeZone = getOptStringCol(\"eventTimeZone\")\n      .map(DateTimeZone.forID(_))\n      .getOrElse(EventValidation.defaultTimeZone)\n    val eventTime = new DateTime(\n      getLongCol(\"eventTime\"), eventTimeZone)\n    val creationTimeZone = getOptStringCol(\"creationTimeZone\")\n      .map(DateTimeZone.forID(_))\n      .getOrElse(EventValidation.defaultTimeZone)\n    val creationTime: DateTime = new DateTime(\n      getLongCol(\"creationTime\"), creationTimeZone)\n\n    Event(\n      eventId = Some(RowKey(result.getRow()).toString),\n      event = event,\n      entityType = entityType,\n      entityId = entityId,\n      targetEntityType = targetEntityType,\n      targetEntityId = targetEntityId,\n      properties = properties,\n      eventTime = eventTime,\n      tags = Nil,\n      prId = prId,\n      creationTime = creationTime\n    )\n  }\n\n\n  // for mandatory field. None means don't care.\n  // for optional field. None means don't care.\n  //    Some(None) means not exist.\n  //    Some(Some(x)) means it should match x\n  def createScan(\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    entityType: Option[String] = None,\n    entityId: Option[String] = None,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None,\n    reversed: Option[Boolean] = None): Scan = {\n\n    val scan: Scan = new Scan()\n\n    (entityType, entityId) match {\n      case (Some(et), Some(eid)) => {\n        val start = PartialRowKey(et, eid,\n          startTime.map(_.getMillis)).toBytes\n        // if no untilTime, stop when reach next bytes of entityTypeAndId\n        val stop = PartialRowKey(et, eid,\n          untilTime.map(_.getMillis).orElse(Some(-1))).toBytes\n\n        if (reversed.getOrElse(false)) {\n          // Reversed order.\n          // If you specify a startRow and stopRow,\n          // to scan in reverse, the startRow needs to be lexicographically\n          // after the stopRow.\n          scan.setStartRow(stop)\n          scan.setStopRow(start)\n          scan.setReversed(true)\n        } else {\n          scan.setStartRow(start)\n          scan.setStopRow(stop)\n        }\n      }\n      case (_, _) => {\n        val minTime: Long = startTime.map(_.getMillis).getOrElse(0)\n        val maxTime: Long = untilTime.map(_.getMillis).getOrElse(Long.MaxValue)\n        scan.setTimeRange(minTime, maxTime)\n        if (reversed.getOrElse(false)) {\n          scan.setReversed(true)\n        }\n      }\n    }\n\n    val filters = new FilterList(FilterList.Operator.MUST_PASS_ALL)\n\n    val eBytes = Bytes.toBytes(\"e\")\n\n    def createBinaryFilter(col: String, value: Array[Byte]): SingleColumnValueFilter = {\n      val comp = new BinaryComparator(value)\n      new SingleColumnValueFilter(\n        eBytes, colNames(col), CompareOp.EQUAL, comp)\n    }\n\n    // skip the row if the column exists\n    def createSkipRowIfColumnExistFilter(col: String): SkipFilter = {\n      val comp = new BinaryComparator(colNames(col))\n      val q = new QualifierFilter(CompareOp.NOT_EQUAL, comp)\n      // filters an entire row if any of the Cell checks do not pass\n      new SkipFilter(q)\n    }\n\n    entityType.foreach { et =>\n      val compType = new BinaryComparator(Bytes.toBytes(et))\n      val filterType = new SingleColumnValueFilter(\n        eBytes, colNames(\"entityType\"), CompareOp.EQUAL, compType)\n      filters.addFilter(filterType)\n    }\n\n    entityId.foreach { eid =>\n      val compId = new BinaryComparator(Bytes.toBytes(eid))\n      val filterId = new SingleColumnValueFilter(\n        eBytes, colNames(\"entityId\"), CompareOp.EQUAL, compId)\n      filters.addFilter(filterId)\n    }\n\n    eventNames.foreach { eventsList =>\n      // match any of event in the eventsList\n      val eventFilters = new FilterList(FilterList.Operator.MUST_PASS_ONE)\n      eventsList.foreach { e =>\n        val compEvent = new BinaryComparator(Bytes.toBytes(e))\n        val filterEvent = new SingleColumnValueFilter(\n          eBytes, colNames(\"event\"), CompareOp.EQUAL, compEvent)\n        eventFilters.addFilter(filterEvent)\n      }\n      if (!eventFilters.getFilters().isEmpty) {\n        filters.addFilter(eventFilters)\n      }\n    }\n\n    targetEntityType.foreach {\n      case None =>\n        val filter = createSkipRowIfColumnExistFilter(\"targetEntityType\")\n        filters.addFilter(filter)\n      case Some(tet) =>\n        val filter = createBinaryFilter(\n          \"targetEntityType\", Bytes.toBytes(tet))\n        // the entire row will be skipped if the column is not found.\n        filter.setFilterIfMissing(true)\n        filters.addFilter(filter)\n    }\n\n    targetEntityId.foreach {\n      case None =>\n        val filter = createSkipRowIfColumnExistFilter(\"targetEntityId\")\n        filters.addFilter(filter)\n      case Some(teid) =>\n        val filter = createBinaryFilter(\n          \"targetEntityId\", Bytes.toBytes(teid))\n        // the entire row will be skipped if the column is not found.\n        filter.setFilterIfMissing(true)\n        filters.addFilter(filter)\n    }\n\n    if (!filters.getFilters().isEmpty) {\n      scan.setFilter(filters)\n    }\n\n    scan\n  }\n\n}\n"
  },
  {
    "path": "storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBLEvents.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.hbase\n\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.storage.LEvents\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.apache.predictionio.data.storage.hbase.HBEventsUtil.RowKey\nimport org.apache.hadoop.hbase.HColumnDescriptor\nimport org.apache.hadoop.hbase.HTableDescriptor\nimport org.apache.hadoop.hbase.NamespaceDescriptor\nimport org.apache.hadoop.hbase.TableName\nimport org.apache.hadoop.hbase.client._\nimport org.joda.time.DateTime\n\nimport scala.collection.JavaConversions._\nimport scala.concurrent.ExecutionContext\nimport scala.concurrent.Future\n\nclass HBLEvents(val client: HBClient, config: StorageClientConfig, val namespace: String)\n  extends LEvents with Logging {\n\n  // implicit val formats = DefaultFormats + new EventJson4sSupport.DBSerializer\n\n  def resultToEvent(result: Result, appId: Int): Event =\n    HBEventsUtil.resultToEvent(result, appId)\n\n  def getTable(appId: Int, channelId: Option[Int] = None): HTableInterface =\n    client.connection.getTable(HBEventsUtil.tableName(namespace, appId, channelId))\n\n  override\n  def init(appId: Int, channelId: Option[Int] = None): Boolean = {\n    // check namespace exist\n    val existingNamespace = client.admin.listNamespaceDescriptors()\n      .map(_.getName)\n    if (!existingNamespace.contains(namespace)) {\n      val nameDesc = NamespaceDescriptor.create(namespace).build()\n      info(s\"The namespace ${namespace} doesn't exist yet. Creating now...\")\n      client.admin.createNamespace(nameDesc)\n    }\n\n    val tableName = TableName.valueOf(HBEventsUtil.tableName(namespace, appId, channelId))\n    if (!client.admin.tableExists(tableName)) {\n      info(s\"The table ${tableName.getNameAsString()} doesn't exist yet.\" +\n        \" Creating now...\")\n      val tableDesc = new HTableDescriptor(tableName)\n      tableDesc.addFamily(new HColumnDescriptor(\"e\"))\n      tableDesc.addFamily(new HColumnDescriptor(\"r\")) // reserved\n      client.admin.createTable(tableDesc)\n    }\n    true\n  }\n\n  override\n  def remove(appId: Int, channelId: Option[Int] = None): Boolean = {\n    val tableName = TableName.valueOf(HBEventsUtil.tableName(namespace, appId, channelId))\n    try {\n      if (client.admin.tableExists(tableName)) {\n        info(s\"Removing table ${tableName.getNameAsString()}...\")\n        client.admin.disableTable(tableName)\n        client.admin.deleteTable(tableName)\n      } else {\n        info(s\"Table ${tableName.getNameAsString()} doesn't exist.\" +\n          s\" Nothing is deleted.\")\n      }\n      true\n    } catch {\n      case e: Exception => {\n        error(s\"Fail to remove table for appId ${appId}. Exception: ${e}\")\n        false\n      }\n    }\n  }\n\n  override\n  def close(): Unit = {\n    client.admin.close()\n    client.connection.close()\n  }\n\n  override\n  def futureInsert(\n    event: Event, appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext):\n    Future[String] = {\n    Future {\n      val table = getTable(appId, channelId)\n      val (put, rowKey) = HBEventsUtil.eventToPut(event, appId)\n      table.put(put)\n      table.flushCommits()\n      table.close()\n      rowKey.toString\n    }\n  }\n\n  override\n  def futureInsertBatch(\n    events: Seq[Event], appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext):\n    Future[Seq[String]] = {\n    Future {\n      val table = getTable(appId, channelId)\n      val (puts, rowKeys) = events.map { event => HBEventsUtil.eventToPut(event, appId) }.unzip\n      table.put(puts)\n      table.flushCommits()\n      table.close()\n      rowKeys.map(_.toString)\n    }\n  }\n\n  override\n  def futureGet(\n    eventId: String, appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext):\n    Future[Option[Event]] = {\n      Future {\n        val table = getTable(appId, channelId)\n        val rowKey = RowKey(eventId)\n        val get = new Get(rowKey.toBytes)\n\n        val result = table.get(get)\n        table.close()\n\n        if (!result.isEmpty()) {\n          val event = resultToEvent(result, appId)\n          Some(event)\n        } else {\n          None\n        }\n      }\n    }\n\n  override\n  def futureDelete(\n    eventId: String, appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext):\n    Future[Boolean] = {\n    Future {\n      val table = getTable(appId, channelId)\n      val rowKey = RowKey(eventId)\n      val exists = table.exists(new Get(rowKey.toBytes))\n      table.delete(new Delete(rowKey.toBytes))\n      table.close()\n      exists\n    }\n  }\n\n  override\n  def futureFind(\n    appId: Int,\n    channelId: Option[Int] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    entityType: Option[String] = None,\n    entityId: Option[String] = None,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None,\n    limit: Option[Int] = None,\n    reversed: Option[Boolean] = None)(implicit ec: ExecutionContext):\n    Future[Iterator[Event]] = {\n      Future {\n\n        require(!((reversed == Some(true)) && (entityType.isEmpty || entityId.isEmpty)),\n          \"the parameter reversed can only be used with both entityType and entityId specified.\")\n\n        val table = getTable(appId, channelId)\n\n        val scan = HBEventsUtil.createScan(\n          startTime = startTime,\n          untilTime = untilTime,\n          entityType = entityType,\n          entityId = entityId,\n          eventNames = eventNames,\n          targetEntityType = targetEntityType,\n          targetEntityId = targetEntityId,\n          reversed = reversed)\n        val scanner = table.getScanner(scan)\n        table.close()\n\n        val eventsIter = scanner.iterator()\n\n        // Get all events if None or Some(-1)\n        val results: Iterator[Result] = limit match {\n          case Some(-1) => eventsIter\n          case None => eventsIter\n          case Some(x) => eventsIter.take(x)\n        }\n\n        val eventsIt = results.map { resultToEvent(_, appId) }\n\n        eventsIt\n      }\n  }\n\n}\n"
  },
  {
    "path": "storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/HBPEvents.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.hbase\n\nimport org.apache.hadoop.hbase.HBaseConfiguration\nimport org.apache.hadoop.hbase.client.{Delete, HTable, Result}\nimport org.apache.hadoop.hbase.io.ImmutableBytesWritable\nimport org.apache.hadoop.hbase.mapreduce.{TableInputFormat, TableOutputFormat}\nimport org.apache.hadoop.io.Writable\nimport org.apache.hadoop.mapreduce.OutputFormat\nimport org.apache.predictionio.data.storage.{Event, PEvents, StorageClientConfig}\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\nimport org.joda.time.DateTime\n\nclass HBPEvents(client: HBClient, config: StorageClientConfig, namespace: String) extends PEvents {\n\n  def checkTableExists(appId: Int, channelId: Option[Int]): Unit = {\n    if (!client.admin.tableExists(HBEventsUtil.tableName(namespace, appId, channelId))) {\n      if (channelId.nonEmpty) {\n        logger.error(s\"The appId $appId with channelId $channelId does not exist.\" +\n          s\" Please use valid appId and channelId.\")\n        throw new Exception(s\"HBase table not found for appId $appId\" +\n          s\" with channelId $channelId.\")\n      } else {\n        logger.error(s\"The appId $appId does not exist. Please use valid appId.\")\n        throw new Exception(s\"HBase table not found for appId $appId.\")\n      }\n    }\n  }\n\n  override\n  def find(\n    appId: Int,\n    channelId: Option[Int] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    entityType: Option[String] = None,\n    entityId: Option[String] = None,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None\n    )(sc: SparkContext): RDD[Event] = {\n\n    checkTableExists(appId, channelId)\n\n    val conf = HBaseConfiguration.create()\n    conf.set(TableInputFormat.INPUT_TABLE,\n      HBEventsUtil.tableName(namespace, appId, channelId))\n\n    val scan = HBEventsUtil.createScan(\n        startTime = startTime,\n        untilTime = untilTime,\n        entityType = entityType,\n        entityId = entityId,\n        eventNames = eventNames,\n        targetEntityType = targetEntityType,\n        targetEntityId = targetEntityId,\n        reversed = None)\n    scan.setCaching(500) // TODO\n    scan.setCacheBlocks(false) // TODO\n\n    conf.set(TableInputFormat.SCAN, PIOHBaseUtil.convertScanToString(scan))\n\n    // HBase is not accessed until this rdd is actually used.\n    val rdd = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],\n      classOf[ImmutableBytesWritable],\n      classOf[Result]).map {\n        case (key, row) => HBEventsUtil.resultToEvent(row, appId)\n      }\n\n    rdd\n  }\n\n  override\n  def write(\n    events: RDD[Event], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {\n\n    checkTableExists(appId, channelId)\n\n    val conf = HBaseConfiguration.create()\n    conf.set(TableOutputFormat.OUTPUT_TABLE,\n      HBEventsUtil.tableName(namespace, appId, channelId))\n    conf.setClass(\"mapreduce.outputformat.class\",\n      classOf[TableOutputFormat[Object]],\n      classOf[OutputFormat[Object, Writable]])\n\n    events.map { event =>\n      val (put, rowKey) = HBEventsUtil.eventToPut(event, appId)\n      (new ImmutableBytesWritable(rowKey.toBytes), put)\n    }.saveAsNewAPIHadoopDataset(conf)\n\n  }\n\n  def delete(\n    eventIds: RDD[String], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {\n\n    checkTableExists(appId, channelId)\n\n    val tableName = HBEventsUtil.tableName(namespace, appId, channelId)\n\n    eventIds.foreachPartition{ iter =>\n      val conf = HBaseConfiguration.create()\n      conf.set(TableOutputFormat.OUTPUT_TABLE,\n        tableName)\n\n      val table = new HTable(conf, tableName)\n      iter.foreach { id =>\n        val rowKey = HBEventsUtil.RowKey(id)\n        val delete = new Delete(rowKey.b)\n        table.delete(delete)\n      }\n      table.close\n    }\n  }\n}\n"
  },
  {
    "path": "storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/PIOHBaseUtil.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.hbase\n\nimport org.apache.hadoop.hbase.client.Scan\nimport org.apache.hadoop.hbase.protobuf.ProtobufUtil\nimport org.apache.hadoop.hbase.util.Base64\n\nobject PIOHBaseUtil {\n  /*\n   * Copying this from Apache HBase because of its restrictive scope in 0.98.x\n   */\n  def convertScanToString(scan: Scan): String = {\n    val proto = ProtobufUtil.toScan(scan)\n    Base64.encodeBytes(proto.toByteArray)\n  }\n}\n"
  },
  {
    "path": "storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/StorageClient.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.hbase\n\nimport org.apache.predictionio.data.storage.BaseStorageClient\nimport org.apache.predictionio.data.storage.StorageClientConfig\n\nimport org.apache.hadoop.conf.Configuration\nimport org.apache.hadoop.hbase.HBaseConfiguration\nimport org.apache.hadoop.hbase.MasterNotRunningException\nimport org.apache.hadoop.hbase.ZooKeeperConnectionException\nimport org.apache.hadoop.hbase.client.HConnectionManager\nimport org.apache.hadoop.hbase.client.HConnection\nimport org.apache.hadoop.hbase.client.HBaseAdmin\n\nimport grizzled.slf4j.Logging\n\ncase class HBClient(\n  val conf: Configuration,\n  val connection: HConnection,\n  val admin: HBaseAdmin\n)\n\nclass StorageClient(val config: StorageClientConfig)\n  extends BaseStorageClient with Logging {\n\n  val conf = HBaseConfiguration.create()\n\n  if (config.test) {\n    // use fewer retries and shorter timeout for test mode\n    conf.set(\"hbase.client.retries.number\", \"1\")\n    conf.set(\"zookeeper.session.timeout\", \"30000\");\n    conf.set(\"zookeeper.recovery.retry\", \"1\")\n  }\n\n  try {\n    HBaseAdmin.checkHBaseAvailable(conf)\n  } catch {\n    case e: MasterNotRunningException =>\n      error(\"HBase master is not running (ZooKeeper ensemble: \" +\n        conf.get(\"hbase.zookeeper.quorum\") + \"). Please make sure that HBase \" +\n        \"is running properly, and that the configuration is pointing at the \" +\n        \"correct ZooKeeper ensemble.\")\n      throw e\n    case e: ZooKeeperConnectionException =>\n      error(\"Cannot connect to ZooKeeper (ZooKeeper ensemble: \" +\n        conf.get(\"hbase.zookeeper.quorum\") + \"). Please make sure that the \" +\n        \"configuration is pointing at the correct ZooKeeper ensemble. By \" +\n        \"default, HBase manages its own ZooKeeper, so if you have not \" +\n        \"configured HBase to use an external ZooKeeper, that means your \" +\n        \"HBase is not started or configured properly.\")\n      throw e\n    case e: Exception => {\n      error(\"Failed to connect to HBase.\" +\n        \" Please check if HBase is running properly.\")\n      throw e\n    }\n  }\n\n  val connection = HConnectionManager.createConnection(conf)\n\n  val client = HBClient(\n    conf = conf,\n    connection = connection,\n    admin = new HBaseAdmin(connection)\n  )\n\n  override\n  val prefix = \"HB\"\n}\n"
  },
  {
    "path": "storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/package.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\n/** HBase implementation of storage traits, supporting event data only\n  *\n  * @group Implementation\n  */\npackage object hbase {}\n"
  },
  {
    "path": "storage/hbase/src/test/resources/application.conf",
    "content": "org.apache.predictionio.data.storage {\n  sources {\n    mongodb {\n      type = mongodb\n      hosts = [localhost]\n      ports = [27017]\n    }\n    elasticsearch {\n      type = elasticsearch\n      hosts = [localhost]\n      ports = [9300]\n    }\n  }\n  repositories {\n    # This section is dummy just to make storage happy.\n    # The actual testing will not bypass these repository settings completely.\n    # Please refer to StorageTestUtils.scala.\n    settings {\n      name = \"test_predictionio\"\n      source = mongodb\n    }\n\n    appdata {\n      name = \"test_predictionio_appdata\"\n      source = mongodb\n    }\n  }\n}\n"
  },
  {
    "path": "storage/hbase/src/test/scala/org/apache/predictionio/data/storage/hbase/LEventsSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.hbase\n\nimport org.apache.predictionio.data.storage.{Event, LEvents, PropertyMap, Storage}\nimport org.specs2._\nimport org.specs2.specification.Step\n\nclass LEventsSpec extends Specification with TestEvents {\n  def is = s2\"\"\"\n\n  PredictionIO Storage LEvents Specification\n\n    Events can be implemented by:\n    - HBLEvents ${hbEvents}\n\n  \"\"\"\n\n  def hbEvents = sequential ^ s2\"\"\"\n\n    HBLEvents should\n    - behave like any LEvents implementation ${events(hbDO)}\n    - (table cleanup) ${Step(StorageTestUtils.dropHBaseNamespace(dbName))}\n\n  \"\"\"\n\n  val appId = 1\n\n  def events(eventClient: LEvents) = sequential ^ s2\"\"\"\n\n    init default ${initDefault(eventClient)}\n    insert 3 test events and get back by event ID ${insertAndGetEvents(eventClient)}\n    insert 3 test events with timezone and get back by event ID ${insertAndGetTimezone(eventClient)}\n    insert and delete by ID ${insertAndDelete(eventClient)}\n    insert test user events ${insertTestUserEvents(eventClient)}\n    find user events ${findUserEvents(eventClient)}\n    aggregate user properties ${aggregateUserProperties(eventClient)}\n    aggregate one user properties ${aggregateOneUserProperties(eventClient)}\n    aggregate non-existent user properties ${aggregateNonExistentUserProperties(eventClient)}\n    init channel ${initChannel(eventClient)}\n    insert 2 events to channel ${insertChannel(eventClient)}\n    insert 1 event to channel and delete by ID  ${insertAndDeleteChannel(eventClient)}\n    find events from channel ${findChannel(eventClient)}\n    remove default ${removeDefault(eventClient)}\n    remove channel ${removeChannel(eventClient)}\n\n  \"\"\"\n\n  val dbName = \"test_pio_storage_events_\" + hashCode\n  def hbDO = Storage.getDataObject[LEvents](\n    StorageTestUtils.hbaseSourceName,\n    dbName\n  )\n\n  def initDefault(eventClient: LEvents) = {\n    eventClient.init(appId)\n  }\n\n  def insertAndGetEvents(eventClient: LEvents) = {\n\n    // events from TestEvents trait\n    val listOfEvents = List(r1,r2,r3)\n\n    val insertResp = listOfEvents.map { eventClient.insert(_, appId) }\n\n    val insertedEventId: List[String] = insertResp\n\n    val insertedEvent: List[Option[Event]] = listOfEvents.zip(insertedEventId)\n      .map { case (e, id) => Some(e.copy(eventId = Some(id))) }\n\n    val getResp = insertedEventId.map { id => eventClient.get(id, appId) }\n\n    val getEvents = getResp\n\n    insertedEvent must containTheSameElementsAs(getEvents)\n  }\n\n  def insertAndGetTimezone(eventClient: LEvents) = {\n    val listOfEvents = List(tz1, tz2, tz3)\n\n    val insertResp = listOfEvents.map { eventClient.insert(_, appId) }\n\n    val insertedEventId: List[String] = insertResp\n\n    val insertedEvent: List[Option[Event]] = listOfEvents.zip(insertedEventId)\n      .map { case (e, id) => Some(e.copy(eventId = Some(id))) }\n\n    val getResp = insertedEventId.map { id => eventClient.get(id, appId) }\n\n    val getEvents = getResp\n\n    insertedEvent must containTheSameElementsAs(getEvents)\n  }\n\n  def insertAndDelete(eventClient: LEvents) = {\n    val eventId = eventClient.insert(r2, appId)\n\n    val resultBefore = eventClient.get(eventId, appId)\n\n    val expectedBefore = r2.copy(eventId = Some(eventId))\n\n    val deleteStatus = eventClient.delete(eventId, appId)\n\n    val resultAfter = eventClient.get(eventId, appId)\n\n    (resultBefore must beEqualTo(Some(expectedBefore))) and\n    (deleteStatus must beEqualTo(true)) and\n    (resultAfter must beEqualTo(None))\n  }\n\n  def insertTestUserEvents(eventClient: LEvents) = {\n    // events from TestEvents trait\n    val listOfEvents = Vector(u1e5, u2e2, u1e3, u1e1, u2e3, u2e1, u1e4, u1e2)\n\n    listOfEvents.map{ eventClient.insert(_, appId) }\n\n    success\n  }\n\n  def findUserEvents(eventClient: LEvents) = {\n\n    val results: List[Event] = eventClient.find(\n      appId = appId,\n      entityType = Some(\"user\"))\n      .toList\n      .map(e => e.copy(eventId = None)) // ignore eventID\n\n    // same events in insertTestUserEvents\n    val expected = List(u1e5, u2e2, u1e3, u1e1, u2e3, u2e1, u1e4, u1e2)\n\n    results must containTheSameElementsAs(expected)\n  }\n\n  def aggregateUserProperties(eventClient: LEvents) = {\n\n    val result: Map[String, PropertyMap] = eventClient.aggregateProperties(\n      appId = appId,\n      entityType = \"user\")\n\n    val expected = Map(\n      \"u1\" -> PropertyMap(u1, u1BaseTime, u1LastTime),\n      \"u2\" -> PropertyMap(u2, u2BaseTime, u2LastTime)\n    )\n\n    result must beEqualTo(expected)\n  }\n\n  def aggregateOneUserProperties(eventClient: LEvents) = {\n    val result: Option[PropertyMap] = eventClient.aggregatePropertiesOfEntity(\n      appId = appId,\n      entityType = \"user\",\n      entityId = \"u1\")\n\n    val expected = Some(PropertyMap(u1, u1BaseTime, u1LastTime))\n\n    result must beEqualTo(expected)\n  }\n\n  def aggregateNonExistentUserProperties(eventClient: LEvents) = {\n    val result: Option[PropertyMap] = eventClient.aggregatePropertiesOfEntity(\n      appId = appId,\n      entityType = \"user\",\n      entityId = \"u999999\")\n\n    result must beEqualTo(None)\n  }\n\n  val channelId = 12\n\n  def initChannel(eventClient: LEvents) = {\n    eventClient.init(appId, Some(channelId))\n  }\n\n  def insertChannel(eventClient: LEvents) = {\n\n    // events from TestEvents trait\n    val listOfEvents = List(r4,r5)\n\n    listOfEvents.map( eventClient.insert(_, appId, Some(channelId)) )\n\n    success\n  }\n\n  def insertAndDeleteChannel(eventClient: LEvents) = {\n\n    val eventId = eventClient.insert(r2, appId, Some(channelId))\n\n    val resultBefore = eventClient.get(eventId, appId, Some(channelId))\n\n    val expectedBefore = r2.copy(eventId = Some(eventId))\n\n    val deleteStatus = eventClient.delete(eventId, appId, Some(channelId))\n\n    val resultAfter = eventClient.get(eventId, appId, Some(channelId))\n\n    (resultBefore must beEqualTo(Some(expectedBefore))) and\n    (deleteStatus must beEqualTo(true)) and\n    (resultAfter must beEqualTo(None))\n  }\n\n  def findChannel(eventClient: LEvents) = {\n\n    val results: List[Event] = eventClient.find(\n      appId = appId,\n      channelId = Some(channelId)\n    )\n    .toList\n    .map(e => e.copy(eventId = None)) // ignore eventId\n\n    // same events in insertChannel\n    val expected = List(r4, r5)\n\n    results must containTheSameElementsAs(expected)\n  }\n\n  def removeDefault(eventClient: LEvents) = {\n    eventClient.remove(appId)\n  }\n\n  def removeChannel(eventClient: LEvents) = {\n    eventClient.remove(appId, Some(channelId))\n  }\n}\n"
  },
  {
    "path": "storage/hbase/src/test/scala/org/apache/predictionio/data/storage/hbase/PEventsSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.hbase\n\nimport org.apache.predictionio.data.storage._\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\nimport org.specs2._\nimport org.specs2.specification.Step\n\nclass PEventsSpec extends Specification with TestEvents {\n\n  System.clearProperty(\"spark.driver.port\")\n  System.clearProperty(\"spark.hostPort\")\n  val sc = new SparkContext(\"local[4]\", \"PEventAggregatorSpec test\")\n\n  val appId = 1\n  val channelId = 6\n  val dbName = \"test_pio_storage_events_\" + hashCode\n\n  def hbLocal = Storage.getDataObject[LEvents](\n    StorageTestUtils.hbaseSourceName,\n    dbName\n  )\n\n  def hbPar = Storage.getDataObject[PEvents](\n    StorageTestUtils.hbaseSourceName,\n    dbName\n  )\n\n  def stopSpark = {\n    sc.stop()\n  }\n\n  def is = s2\"\"\"\n\n  PredictionIO Storage PEvents Specification\n\n    PEvents can be implemented by:\n    - HBPEvents ${hbPEvents}\n    - (stop Spark) ${Step(sc.stop())}\n\n  \"\"\"\n\n  def hbPEvents = sequential ^ s2\"\"\"\n\n    HBPEvents should\n    - behave like any PEvents implementation ${events(hbLocal, hbPar)}\n    - (table cleanup) ${Step(StorageTestUtils.dropHBaseNamespace(dbName))}\n\n  \"\"\"\n\n  def events(localEventClient: LEvents, parEventClient: PEvents) = sequential ^ s2\"\"\"\n\n    - (init test) ${initTest(localEventClient)}\n    - (insert test events) ${insertTestEvents(localEventClient)}\n    find in default ${find(parEventClient)}\n    find in channel ${findChannel(parEventClient)}\n    aggregate user properties in default ${aggregateUserProperties(parEventClient)}\n    aggregate user properties in channel ${aggregateUserPropertiesChannel(parEventClient)}\n    write to default ${write(parEventClient)}\n    write to channel ${writeChannel(parEventClient)}\n\n  \"\"\"\n\n  /* setup */\n\n  // events from TestEvents trait\n  val listOfEvents = List(u1e5, u2e2, u1e3, u1e1, u2e3, u2e1, u1e4, u1e2, r1, r2)\n  val listOfEventsChannel = List(u3e1, u3e2, u3e3, r3, r4)\n\n  def initTest(localEventClient: LEvents) = {\n    localEventClient.init(appId)\n    localEventClient.init(appId, Some(channelId))\n  }\n\n  def insertTestEvents(localEventClient: LEvents) = {\n    listOfEvents.map( localEventClient.insert(_, appId) )\n    // insert to channel\n    listOfEventsChannel.map( localEventClient.insert(_, appId, Some(channelId)) )\n    success\n  }\n\n  /* following are tests */\n\n  def find(parEventClient: PEvents) = {\n    val resultRDD: RDD[Event] = parEventClient.find(\n      appId = appId\n    )(sc)\n\n    val results = resultRDD.collect.toList\n      .map {_.copy(eventId = None)} // ignore eventId\n\n    results must containTheSameElementsAs(listOfEvents)\n  }\n\n  def findChannel(parEventClient: PEvents) = {\n    val resultRDD: RDD[Event] = parEventClient.find(\n      appId = appId,\n      channelId = Some(channelId)\n    )(sc)\n\n    val results = resultRDD.collect.toList\n      .map {_.copy(eventId = None)} // ignore eventId\n\n    results must containTheSameElementsAs(listOfEventsChannel)\n  }\n\n  def aggregateUserProperties(parEventClient: PEvents) = {\n    val resultRDD: RDD[(String, PropertyMap)] = parEventClient.aggregateProperties(\n      appId = appId,\n      entityType = \"user\"\n    )(sc)\n    val result: Map[String, PropertyMap] = resultRDD.collectAsMap.toMap\n\n    val expected = Map(\n      \"u1\" -> PropertyMap(u1, u1BaseTime, u1LastTime),\n      \"u2\" -> PropertyMap(u2, u2BaseTime, u2LastTime)\n    )\n\n    result must beEqualTo(expected)\n  }\n\n  def aggregateUserPropertiesChannel(parEventClient: PEvents) = {\n    val resultRDD: RDD[(String, PropertyMap)] = parEventClient.aggregateProperties(\n      appId = appId,\n      channelId = Some(channelId),\n      entityType = \"user\"\n    )(sc)\n    val result: Map[String, PropertyMap] = resultRDD.collectAsMap.toMap\n\n    val expected = Map(\n      \"u3\" -> PropertyMap(u3, u3BaseTime, u3LastTime)\n    )\n\n    result must beEqualTo(expected)\n  }\n\n  def write(parEventClient: PEvents) = {\n    val written = List(r5, r6)\n    val writtenRDD = sc.parallelize(written)\n    parEventClient.write(writtenRDD, appId)(sc)\n\n    // read back\n    val resultRDD = parEventClient.find(\n      appId = appId\n    )(sc)\n\n    val results = resultRDD.collect.toList\n      .map { _.copy(eventId = None)} // ignore eventId\n\n    val expected = listOfEvents ++ written\n\n    results must containTheSameElementsAs(expected)\n  }\n\n  def writeChannel(parEventClient: PEvents) = {\n    val written = List(r1, r5, r6)\n    val writtenRDD = sc.parallelize(written)\n    parEventClient.write(writtenRDD, appId, Some(channelId))(sc)\n\n    // read back\n    val resultRDD = parEventClient.find(\n      appId = appId,\n      channelId = Some(channelId)\n    )(sc)\n\n    val results = resultRDD.collect.toList\n      .map { _.copy(eventId = None)} // ignore eventId\n\n    val expected = listOfEventsChannel ++ written\n\n    results must containTheSameElementsAs(expected)\n  }\n\n}\n"
  },
  {
    "path": "storage/hbase/src/test/scala/org/apache/predictionio/data/storage/hbase/StorageTestUtils.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.hbase\n\nimport org.apache.predictionio.data.storage.{LEvents, Storage}\n\nobject StorageTestUtils {\n  val hbaseSourceName = \"HBASE\"\n\n  def dropHBaseNamespace(namespace: String): Unit = {\n    val eventDb = Storage.getDataObject[LEvents](hbaseSourceName, namespace)\n      .asInstanceOf[HBLEvents]\n    val admin = eventDb.client.admin\n    val tableNames = admin.listTableNamesByNamespace(namespace)\n    tableNames.foreach { name =>\n      admin.disableTable(name)\n      admin.deleteTable(name)\n    }\n\n    // Only empty namespaces (no tables) can be removed.\n    admin.deleteNamespace(namespace)\n  }\n\n}\n"
  },
  {
    "path": "storage/hbase/src/test/scala/org/apache/predictionio/data/storage/hbase/TestEvents.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.hbase\n\nimport org.apache.predictionio.data.storage.{DataMap, Event}\nimport org.joda.time.{DateTime, DateTimeZone}\n\ntrait TestEvents {\n\n  val u1BaseTime = new DateTime(654321)\n  val u2BaseTime = new DateTime(6543210)\n  val u3BaseTime = new DateTime(6543410)\n\n  // u1 events\n  val u1e1 = Event(\n    event = \"$set\",\n    entityType = \"user\",\n    entityId = \"u1\",\n    properties = DataMap(\n      \"\"\"{\n        \"a\" : 1,\n        \"b\" : \"value2\",\n        \"d\" : [1, 2, 3],\n      }\"\"\"),\n    eventTime = u1BaseTime\n  )\n\n  val u1e2 = u1e1.copy(\n    event = \"$set\",\n    properties = DataMap(\"\"\"{\"a\" : 2}\"\"\"),\n    eventTime = u1BaseTime.plusDays(1)\n  )\n\n  val u1e3 = u1e1.copy(\n    event = \"$set\",\n    properties = DataMap(\"\"\"{\"b\" : \"value4\"}\"\"\"),\n    eventTime = u1BaseTime.plusDays(2)\n  )\n\n  val u1e4 = u1e1.copy(\n    event = \"$unset\",\n    properties = DataMap(\"\"\"{\"b\" : null}\"\"\"),\n    eventTime = u1BaseTime.plusDays(3)\n  )\n\n  val u1e5 = u1e1.copy(\n    event = \"$set\",\n    properties = DataMap(\"\"\"{\"e\" : \"new\"}\"\"\"),\n    eventTime = u1BaseTime.plusDays(4)\n  )\n\n  val u1LastTime = u1BaseTime.plusDays(4)\n  val u1 = \"\"\"{\"a\": 2, \"d\": [1, 2, 3], \"e\": \"new\"}\"\"\"\n\n  // delete event for u1\n  val u1ed = u1e1.copy(\n    event = \"$delete\",\n    properties = DataMap(),\n    eventTime = u1BaseTime.plusDays(5)\n  )\n\n  // u2 events\n  val u2e1 = Event(\n    event = \"$set\",\n    entityType = \"user\",\n    entityId = \"u2\",\n    properties = DataMap(\n      \"\"\"{\n        \"a\" : 21,\n        \"b\" : \"value12\",\n        \"d\" : [7, 5, 6],\n      }\"\"\"),\n    eventTime = u2BaseTime\n  )\n\n  val u2e2 = u2e1.copy(\n    event = \"$unset\",\n    properties = DataMap(\"\"\"{\"a\" : null}\"\"\"),\n    eventTime = u2BaseTime.plusDays(1)\n  )\n\n  val u2e3 = u2e1.copy(\n    event = \"$set\",\n    properties = DataMap(\"\"\"{\"b\" : \"value9\", \"g\": \"new11\"}\"\"\"),\n    eventTime = u2BaseTime.plusDays(2)\n  )\n\n  val u2LastTime = u2BaseTime.plusDays(2)\n  val u2 = \"\"\"{\"b\": \"value9\", \"d\": [7, 5, 6], \"g\": \"new11\"}\"\"\"\n\n  // u3 events\n  val u3e1 = Event(\n    event = \"$set\",\n    entityType = \"user\",\n    entityId = \"u3\",\n    properties = DataMap(\n      \"\"\"{\n        \"a\" : 22,\n        \"b\" : \"value13\",\n        \"d\" : [5, 6, 1],\n      }\"\"\"),\n    eventTime = u3BaseTime\n  )\n\n  val u3e2 = u3e1.copy(\n    event = \"$unset\",\n    properties = DataMap(\"\"\"{\"a\" : null}\"\"\"),\n    eventTime = u3BaseTime.plusDays(1)\n  )\n\n  val u3e3 = u3e1.copy(\n    event = \"$set\",\n    properties = DataMap(\"\"\"{\"b\" : \"value10\", \"f\": \"new12\", \"d\" : [1, 3, 2]}\"\"\"),\n    eventTime = u3BaseTime.plusDays(2)\n  )\n\n  val u3LastTime = u3BaseTime.plusDays(2)\n  val u3 = \"\"\"{\"b\": \"value10\", \"d\": [1, 3, 2], \"f\": \"new12\"}\"\"\"\n\n  // some random events\n  val r1 = Event(\n    event = \"my_event\",\n    entityType = \"my_entity_type\",\n    entityId = \"my_entity_id\",\n    targetEntityType = Some(\"my_target_entity_type\"),\n    targetEntityId = Some(\"my_target_entity_id\"),\n    properties = DataMap(\n      \"\"\"{\n        \"prop1\" : 1,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [1, 2, 3],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\"],\n        \"prop6\" : 4.56\n      }\"\"\"\n    ),\n    eventTime = DateTime.now,\n    prId = Some(\"my_prid\")\n  )\n  val r2 = Event(\n    event = \"my_event2\",\n    entityType = \"my_entity_type2\",\n    entityId = \"my_entity_id2\"\n  )\n  val r3 = Event(\n    event = \"my_event3\",\n    entityType = \"my_entity_type\",\n    entityId = \"my_entity_id\",\n    targetEntityType = Some(\"my_target_entity_type\"),\n    targetEntityId = Some(\"my_target_entity_id\"),\n    properties = DataMap(\n      \"\"\"{\n        \"propA\" : 1.2345,\n        \"propB\" : \"valueB\",\n      }\"\"\"\n    ),\n    prId = Some(\"my_prid\")\n  )\n  val r4 = Event(\n    event = \"my_event4\",\n    entityType = \"my_entity_type4\",\n    entityId = \"my_entity_id4\",\n    targetEntityType = Some(\"my_target_entity_type4\"),\n    targetEntityId = Some(\"my_target_entity_id4\"),\n    properties = DataMap(\n      \"\"\"{\n        \"prop1\" : 1,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [1, 2, 3],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\"],\n        \"prop6\" : 4.56\n      }\"\"\"),\n    eventTime = DateTime.now\n  )\n  val r5 = Event(\n    event = \"my_event5\",\n    entityType = \"my_entity_type5\",\n    entityId = \"my_entity_id5\",\n    targetEntityType = Some(\"my_target_entity_type5\"),\n    targetEntityId = Some(\"my_target_entity_id5\"),\n    properties = DataMap(\n      \"\"\"{\n        \"prop1\" : 1,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [1, 2, 3],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\"],\n        \"prop6\" : 4.56\n      }\"\"\"\n    ),\n    eventTime = DateTime.now\n  )\n  val r6 = Event(\n    event = \"my_event6\",\n    entityType = \"my_entity_type6\",\n    entityId = \"my_entity_id6\",\n    targetEntityType = Some(\"my_target_entity_type6\"),\n    targetEntityId = Some(\"my_target_entity_id6\"),\n    properties = DataMap(\n      \"\"\"{\n        \"prop1\" : 6,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [6, 7, 8],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\"],\n        \"prop6\" : 4.56\n      }\"\"\"\n    ),\n    eventTime = DateTime.now\n  )\n\n  // timezone\n  val tz1 = Event(\n    event = \"my_event\",\n    entityType = \"my_entity_type\",\n    entityId = \"my_entity_id0\",\n    targetEntityType = Some(\"my_target_entity_type\"),\n    targetEntityId = Some(\"my_target_entity_id\"),\n    properties = DataMap(\n      \"\"\"{\n        \"prop1\" : 1,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [1, 2, 3],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\"],\n        \"prop6\" : 4.56\n      }\"\"\"\n    ),\n    eventTime = new DateTime(12345678, DateTimeZone.forID(\"-08:00\")),\n    prId = Some(\"my_prid\")\n  )\n\n  val tz2 = Event(\n    event = \"my_event\",\n    entityType = \"my_entity_type\",\n    entityId = \"my_entity_id1\",\n    eventTime = new DateTime(12345678, DateTimeZone.forID(\"+02:00\")),\n    prId = Some(\"my_prid\")\n  )\n\n  val tz3 = Event(\n    event = \"my_event\",\n    entityType = \"my_entity_type\",\n    entityId = \"my_entity_id2\",\n    eventTime = new DateTime(12345678, DateTimeZone.forID(\"+08:00\")),\n    prId = Some(\"my_prid\")\n  )\n\n}\n"
  },
  {
    "path": "storage/hdfs/.gitignore",
    "content": "/bin/\n"
  },
  {
    "path": "storage/hdfs/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport PIOBuild._\n\nname := \"apache-predictionio-data-hdfs\"\n\nlibraryDependencies ++= Seq(\n  \"org.apache.hadoop\"        % \"hadoop-common\"            % hadoopVersion.value\n    exclude(\"commons-beanutils\", \"*\"),\n  \"org.apache.hadoop\"        % \"hadoop-hdfs\"              % hadoopVersion.value,\n  \"org.apache.predictionio\" %% \"apache-predictionio-data\" % version.value % \"provided\",\n  \"org.scalatest\"           %% \"scalatest\"                % \"2.1.7\" % \"test\")\n\nparallelExecution in Test := false\n\npomExtra := childrenPomExtra.value\n\nassemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)\n\nassemblyExcludedJars in assembly := {\n  val cp = (fullClasspath in assembly).value\n  cp filter {_.data.getName.contains(\"slf4j-log4j12\")}\n}\n\n// skip test in assembly\ntest in assembly := {}\n\nassemblyOutputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile /\n  \"assembly\" / \"src\" / \"universal\" / \"lib\" / \"spark\" /\n  (\"pio-data-hdfs-assembly-\" + version.value + \".jar\")\n"
  },
  {
    "path": "storage/hdfs/project/build.properties",
    "content": "sbt.version=0.13.15\n"
  },
  {
    "path": "storage/hdfs/src/main/scala/org/apache/predictionio/data/storage/hdfs/HDFSModels.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.hdfs\n\nimport java.io.IOException\n\nimport com.google.common.io.ByteStreams\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.data.storage.Model\nimport org.apache.predictionio.data.storage.Models\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.apache.hadoop.fs.FileSystem\nimport org.apache.hadoop.fs.Path\n\nclass HDFSModels(fs: FileSystem, config: StorageClientConfig, prefix: String)\n  extends Models with Logging {\n\n  def insert(i: Model): Unit = {\n    try {\n      val fsdos = fs.create(new Path(s\"$prefix${i.id}\"))\n      fsdos.write(i.models)\n      fsdos.close\n    } catch {\n      case e: IOException => error(e.getMessage)\n    }\n  }\n\n  def get(id: String): Option[Model] = {\n    try {\n      val p = new Path(s\"$prefix$id\")\n      Some(Model(\n        id = id,\n        models = ByteStreams.toByteArray(fs.open(p))))\n    } catch {\n      case e: Throwable =>\n        error(e.getMessage)\n        None\n    }\n  }\n\n  def delete(id: String): Unit = {\n    val p = new Path(s\"$prefix$id\")\n    if (!fs.delete(p, false)) {\n      error(s\"Unable to delete ${fs.makeQualified(p).toString}!\")\n    }\n  }\n}\n"
  },
  {
    "path": "storage/hdfs/src/main/scala/org/apache/predictionio/data/storage/hdfs/StorageClient.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.hdfs\n\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.data.storage.BaseStorageClient\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.apache.hadoop.conf.Configuration\nimport org.apache.hadoop.fs.FileSystem\nimport org.apache.hadoop.fs.Path\n\nclass StorageClient(val config: StorageClientConfig) extends BaseStorageClient\n    with Logging {\n  override val prefix = \"HDFS\"\n  val conf = new Configuration\n  val fs = FileSystem.get(conf)\n  fs.setWorkingDirectory(\n    new Path(config.properties.getOrElse(\"PATH\", config.properties(\"HOSTS\"))))\n  val client = fs\n}\n"
  },
  {
    "path": "storage/hdfs/src/main/scala/org/apache/predictionio/data/storage/hdfs/package.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\n/** HDFS implementation of storage traits, supporting model data only\n  *\n  * @group Implementation\n  */\npackage object hdfs {}\n"
  },
  {
    "path": "storage/hdfs/src/test/resources/application.conf",
    "content": "org.apache.predictionio.data.storage {\n  sources {\n    mongodb {\n      type = mongodb\n      hosts = [localhost]\n      ports = [27017]\n    }\n    elasticsearch {\n      type = elasticsearch\n      hosts = [localhost]\n      ports = [9300]\n    }\n  }\n  repositories {\n    # This section is dummy just to make storage happy.\n    # The actual testing will not bypass these repository settings completely.\n    # Please refer to StorageTestUtils.scala.\n    settings {\n      name = \"test_predictionio\"\n      source = mongodb\n    }\n\n    appdata {\n      name = \"test_predictionio_appdata\"\n      source = mongodb\n    }\n  }\n}\n"
  },
  {
    "path": "storage/jdbc/.gitignore",
    "content": "/bin/\n"
  },
  {
    "path": "storage/jdbc/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport PIOBuild._\n\nname := \"apache-predictionio-data-jdbc\"\n\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % version.value % \"provided\",\n  \"org.apache.spark\"        %% \"spark-sql\"      % sparkVersion.value % \"provided\",\n  \"org.scalikejdbc\"         %% \"scalikejdbc\"    % \"3.1.0\",\n  \"org.postgresql\"           % \"postgresql\"     % \"9.4-1204-jdbc41\" % \"test\",\n  \"org.specs2\"              %% \"specs2\"         % \"2.3.13\" % \"test\")\n\nparallelExecution in Test := false\n\npomExtra := childrenPomExtra.value\n\nassemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)\n\n// skip test in assembly\ntest in assembly := {}\n\nassemblyOutputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile /\n  \"assembly\" / \"src\" / \"universal\" / \"lib\" / \"spark\" /\n  s\"pio-data-jdbc-assembly-${version.value}.jar\"\n"
  },
  {
    "path": "storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCAccessKeys.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.jdbc\n\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.data.storage.AccessKey\nimport org.apache.predictionio.data.storage.AccessKeys\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport scalikejdbc._\n\nimport scala.util.Random\n\n/** JDBC implementation of [[AccessKeys]] */\nclass JDBCAccessKeys(client: String, config: StorageClientConfig, prefix: String)\n  extends AccessKeys with Logging {\n  /** Database table name for this data access object */\n  val tableName = JDBCUtils.prefixTableName(prefix, \"accesskeys\")\n  DB autoCommit { implicit session =>\n    sql\"\"\"\n    create table if not exists $tableName (\n      accesskey varchar(64) not null primary key,\n      appid integer not null,\n      events text)\"\"\".execute().apply()\n  }\n\n  def insert(accessKey: AccessKey): Option[String] = DB localTx { implicit s =>\n    val key = if (accessKey.key.isEmpty) generateKey else accessKey.key\n    val events = if (accessKey.events.isEmpty) None else Some(accessKey.events.mkString(\",\"))\n    sql\"\"\"\n    insert into $tableName values(\n      $key,\n      ${accessKey.appid},\n      $events)\"\"\".update().apply()\n    Some(key)\n  }\n\n  def get(key: String): Option[AccessKey] = DB readOnly { implicit session =>\n    sql\"SELECT accesskey, appid, events FROM $tableName WHERE accesskey = $key\".\n      map(resultToAccessKey).single().apply()\n  }\n\n  def getAll(): Seq[AccessKey] = DB readOnly { implicit session =>\n    sql\"SELECT accesskey, appid, events FROM $tableName\".map(resultToAccessKey).list().apply()\n  }\n\n  def getByAppid(appid: Int): Seq[AccessKey] = DB readOnly { implicit session =>\n    sql\"SELECT accesskey, appid, events FROM $tableName WHERE appid = $appid\".\n      map(resultToAccessKey).list().apply()\n  }\n\n  def update(accessKey: AccessKey): Unit = DB localTx { implicit session =>\n    val events = if (accessKey.events.isEmpty) None else Some(accessKey.events.mkString(\",\"))\n    sql\"\"\"\n    UPDATE $tableName SET\n      appid = ${accessKey.appid},\n      events = $events\n    WHERE accesskey = ${accessKey.key}\"\"\".update().apply()\n  }\n\n  def delete(key: String): Unit = DB localTx { implicit session =>\n    sql\"DELETE FROM $tableName WHERE accesskey = $key\".update().apply()\n  }\n\n  /** Convert JDBC results to [[AccessKey]] */\n  def resultToAccessKey(rs: WrappedResultSet): AccessKey = {\n    AccessKey(\n      key = rs.string(\"accesskey\"),\n      appid = rs.int(\"appid\"),\n      events = rs.stringOpt(\"events\").map(_.split(\",\").toSeq).getOrElse(Nil))\n  }\n}\n"
  },
  {
    "path": "storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCApps.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.jdbc\n\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.data.storage.App\nimport org.apache.predictionio.data.storage.Apps\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport scalikejdbc._\n\n/** JDBC implementation of [[Apps]] */\nclass JDBCApps(client: String, config: StorageClientConfig, prefix: String)\n  extends Apps with Logging {\n  /** Database table name for this data access object */\n  val tableName = JDBCUtils.prefixTableName(prefix, \"apps\")\n  DB autoCommit { implicit session =>\n    sql\"\"\"\n    create table if not exists $tableName (\n      id serial not null primary key,\n      name text not null,\n      description text)\"\"\".execute.apply()\n  }\n\n  def insert(app: App): Option[Int] = DB localTx { implicit session =>\n    val q = if (app.id == 0) {\n      sql\"\"\"\n      insert into $tableName (name, description) values(${app.name}, ${app.description})\n      \"\"\"\n    } else {\n      sql\"\"\"\n      insert into $tableName values(${app.id}, ${app.name}, ${app.description})\n      \"\"\"\n    }\n    Some(q.updateAndReturnGeneratedKey().apply().toInt)\n  }\n\n  def get(id: Int): Option[App] = DB readOnly { implicit session =>\n    sql\"SELECT id, name, description FROM $tableName WHERE id = ${id}\".map(rs =>\n      App(\n        id = rs.int(\"id\"),\n        name = rs.string(\"name\"),\n        description = rs.stringOpt(\"description\"))\n    ).single().apply()\n  }\n\n  def getByName(name: String): Option[App] = DB readOnly { implicit session =>\n    sql\"SELECT id, name, description FROM $tableName WHERE name = ${name}\".map(rs =>\n      App(\n        id = rs.int(\"id\"),\n        name = rs.string(\"name\"),\n        description = rs.stringOpt(\"description\"))\n    ).single().apply()\n  }\n\n  def getAll(): Seq[App] = DB readOnly { implicit session =>\n    sql\"SELECT id, name, description FROM $tableName\".map(rs =>\n      App(\n        id = rs.int(\"id\"),\n        name = rs.string(\"name\"),\n        description = rs.stringOpt(\"description\"))\n    ).list().apply()\n  }\n\n  def update(app: App): Unit = DB localTx { implicit session =>\n    sql\"\"\"\n    update $tableName set name = ${app.name}, description = ${app.description}\n    where id = ${app.id}\"\"\".update().apply()\n  }\n\n  def delete(id: Int): Unit = DB localTx { implicit session =>\n    sql\"DELETE FROM $tableName WHERE id = $id\".update().apply()\n  }\n}\n"
  },
  {
    "path": "storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCChannels.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.jdbc\n\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.data.storage.Channel\nimport org.apache.predictionio.data.storage.Channels\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport scalikejdbc._\n\n/** JDBC implementation of [[Channels]] */\nclass JDBCChannels(client: String, config: StorageClientConfig, prefix: String)\n  extends Channels with Logging {\n  /** Database table name for this data access object */\n  val tableName = JDBCUtils.prefixTableName(prefix, \"channels\")\n  DB autoCommit { implicit session =>\n    sql\"\"\"\n    create table if not exists $tableName (\n      id serial not null primary key,\n      name text not null,\n      appid integer not null)\"\"\".execute().apply()\n  }\n\n  def insert(channel: Channel): Option[Int] = DB localTx { implicit session =>\n    val q = if (channel.id == 0) {\n      sql\"INSERT INTO $tableName (name, appid) VALUES(${channel.name}, ${channel.appid})\"\n    } else {\n      sql\"INSERT INTO $tableName VALUES(${channel.id}, ${channel.name}, ${channel.appid})\"\n    }\n    Some(q.updateAndReturnGeneratedKey().apply().toInt)\n  }\n\n  def get(id: Int): Option[Channel] = DB localTx { implicit session =>\n    sql\"SELECT id, name, appid FROM $tableName WHERE id = $id\".\n      map(resultToChannel).single().apply()\n  }\n\n  def getByAppid(appid: Int): Seq[Channel] = DB localTx { implicit session =>\n    sql\"SELECT id, name, appid FROM $tableName WHERE appid = $appid\".\n      map(resultToChannel).list().apply()\n  }\n\n  def delete(id: Int): Unit = DB localTx { implicit session =>\n    sql\"DELETE FROM $tableName WHERE id = $id\".update().apply()\n  }\n\n  def resultToChannel(rs: WrappedResultSet): Channel = {\n    Channel(\n      id = rs.int(\"id\"),\n      name = rs.string(\"name\"),\n      appid = rs.int(\"appid\"))\n  }\n}\n"
  },
  {
    "path": "storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEngineInstances.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.jdbc\n\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.data.storage.EngineInstance\nimport org.apache.predictionio.data.storage.EngineInstances\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport scalikejdbc._\n\n/** JDBC implementation of [[EngineInstances]] */\nclass JDBCEngineInstances(client: String, config: StorageClientConfig, prefix: String)\n  extends EngineInstances with Logging {\n  /** Database table name for this data access object */\n  val tableName = JDBCUtils.prefixTableName(prefix, \"engineinstances\")\n  DB autoCommit { implicit session =>\n    sql\"\"\"\n    create table if not exists $tableName (\n      id varchar(100) not null primary key,\n      status text not null,\n      startTime timestamp DEFAULT CURRENT_TIMESTAMP,\n      endTime timestamp DEFAULT CURRENT_TIMESTAMP,\n      engineId text not null,\n      engineVersion text not null,\n      engineVariant text not null,\n      engineFactory text not null,\n      batch text not null,\n      env text not null,\n      sparkConf text not null,\n      datasourceParams text not null,\n      preparatorParams text not null,\n      algorithmsParams text not null,\n      servingParams text not null)\"\"\".execute().apply()\n  }\n\n  def insert(i: EngineInstance): String = DB localTx { implicit session =>\n    val id = java.util.UUID.randomUUID().toString\n    sql\"\"\"\n    INSERT INTO $tableName VALUES(\n      $id,\n      ${i.status},\n      ${i.startTime},\n      ${i.endTime},\n      ${i.engineId},\n      ${i.engineVersion},\n      ${i.engineVariant},\n      ${i.engineFactory},\n      ${i.batch},\n      ${JDBCUtils.mapToString(i.env)},\n      ${JDBCUtils.mapToString(i.sparkConf)},\n      ${i.dataSourceParams},\n      ${i.preparatorParams},\n      ${i.algorithmsParams},\n      ${i.servingParams})\"\"\".update().apply()\n    id\n  }\n\n  def get(id: String): Option[EngineInstance] = DB localTx { implicit session =>\n    sql\"\"\"\n    SELECT\n      id,\n      status,\n      startTime,\n      endTime,\n      engineId,\n      engineVersion,\n      engineVariant,\n      engineFactory,\n      batch,\n      env,\n      sparkConf,\n      datasourceParams,\n      preparatorParams,\n      algorithmsParams,\n      servingParams\n    FROM $tableName WHERE id = $id\"\"\".map(resultToEngineInstance).\n      single().apply()\n  }\n\n  def getAll(): Seq[EngineInstance] = DB localTx { implicit session =>\n    sql\"\"\"\n    SELECT\n      id,\n      status,\n      startTime,\n      endTime,\n      engineId,\n      engineVersion,\n      engineVariant,\n      engineFactory,\n      batch,\n      env,\n      sparkConf,\n      datasourceParams,\n      preparatorParams,\n      algorithmsParams,\n      servingParams\n    FROM $tableName\"\"\".map(resultToEngineInstance).list().apply()\n  }\n\n  def getLatestCompleted(\n    engineId: String,\n    engineVersion: String,\n    engineVariant: String): Option[EngineInstance] =\n    getCompleted(engineId, engineVersion, engineVariant).headOption\n\n  def getCompleted(\n    engineId: String,\n    engineVersion: String,\n    engineVariant: String): Seq[EngineInstance] = DB localTx { implicit s =>\n    sql\"\"\"\n    SELECT\n      id,\n      status,\n      startTime,\n      endTime,\n      engineId,\n      engineVersion,\n      engineVariant,\n      engineFactory,\n      batch,\n      env,\n      sparkConf,\n      datasourceParams,\n      preparatorParams,\n      algorithmsParams,\n      servingParams\n    FROM $tableName\n    WHERE\n      status = 'COMPLETED' AND\n      engineId = $engineId AND\n      engineVersion = $engineVersion AND\n      engineVariant = $engineVariant\n    ORDER BY startTime DESC\"\"\".\n      map(resultToEngineInstance).list().apply()\n  }\n\n  def update(i: EngineInstance): Unit = DB localTx { implicit session =>\n    sql\"\"\"\n    update $tableName set\n      status = ${i.status},\n      startTime = ${i.startTime},\n      endTime = ${i.endTime},\n      engineId = ${i.engineId},\n      engineVersion = ${i.engineVersion},\n      engineVariant = ${i.engineVariant},\n      engineFactory = ${i.engineFactory},\n      batch = ${i.batch},\n      env = ${JDBCUtils.mapToString(i.env)},\n      sparkConf = ${JDBCUtils.mapToString(i.sparkConf)},\n      datasourceParams = ${i.dataSourceParams},\n      preparatorParams = ${i.preparatorParams},\n      algorithmsParams = ${i.algorithmsParams},\n      servingParams = ${i.servingParams}\n    where id = ${i.id}\"\"\".update().apply()\n  }\n\n  def delete(id: String): Unit = DB localTx { implicit session =>\n    sql\"DELETE FROM $tableName WHERE id = $id\".update().apply()\n  }\n\n  /** Convert JDBC results to [[EngineInstance]] */\n  def resultToEngineInstance(rs: WrappedResultSet): EngineInstance = {\n    EngineInstance(\n      id = rs.string(\"id\"),\n      status = rs.string(\"status\"),\n      startTime = rs.jodaDateTime(\"startTime\"),\n      endTime = rs.jodaDateTime(\"endTime\"),\n      engineId = rs.string(\"engineId\"),\n      engineVersion = rs.string(\"engineVersion\"),\n      engineVariant = rs.string(\"engineVariant\"),\n      engineFactory = rs.string(\"engineFactory\"),\n      batch = rs.string(\"batch\"),\n      env = JDBCUtils.stringToMap(rs.string(\"env\")),\n      sparkConf = JDBCUtils.stringToMap(rs.string(\"sparkConf\")),\n      dataSourceParams = rs.string(\"datasourceParams\"),\n      preparatorParams = rs.string(\"preparatorParams\"),\n      algorithmsParams = rs.string(\"algorithmsParams\"),\n      servingParams = rs.string(\"servingParams\"))\n  }\n}\n"
  },
  {
    "path": "storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCEvaluationInstances.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.jdbc\n\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.data.storage.EvaluationInstance\nimport org.apache.predictionio.data.storage.EvaluationInstances\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport scalikejdbc._\n\n/** JDBC implementations of [[EvaluationInstances]] */\nclass JDBCEvaluationInstances(client: String, config: StorageClientConfig, prefix: String)\n  extends EvaluationInstances with Logging {\n  /** Database table name for this data access object */\n  val tableName = JDBCUtils.prefixTableName(prefix, \"evaluationinstances\")\n  DB autoCommit { implicit session =>\n    sql\"\"\"\n    create table if not exists $tableName (\n      id varchar(100) not null primary key,\n      status text not null,\n      startTime timestamp DEFAULT CURRENT_TIMESTAMP,\n      endTime timestamp DEFAULT CURRENT_TIMESTAMP,\n      evaluationClass text not null,\n      engineParamsGeneratorClass text not null,\n      batch text not null,\n      env text not null,\n      sparkConf text not null,\n      evaluatorResults text not null,\n      evaluatorResultsHTML text not null,\n      evaluatorResultsJSON text)\"\"\".execute().apply()\n  }\n\n  def insert(i: EvaluationInstance): String = DB localTx { implicit session =>\n    val id = java.util.UUID.randomUUID().toString\n    sql\"\"\"\n    INSERT INTO $tableName VALUES(\n      $id,\n      ${i.status},\n      ${i.startTime},\n      ${i.endTime},\n      ${i.evaluationClass},\n      ${i.engineParamsGeneratorClass},\n      ${i.batch},\n      ${JDBCUtils.mapToString(i.env)},\n      ${JDBCUtils.mapToString(i.sparkConf)},\n      ${i.evaluatorResults},\n      ${i.evaluatorResultsHTML},\n      ${i.evaluatorResultsJSON})\"\"\".update().apply()\n    id\n  }\n\n  def get(id: String): Option[EvaluationInstance] = DB localTx { implicit session =>\n    sql\"\"\"\n    SELECT\n      id,\n      status,\n      startTime,\n      endTime,\n      evaluationClass,\n      engineParamsGeneratorClass,\n      batch,\n      env,\n      sparkConf,\n      evaluatorResults,\n      evaluatorResultsHTML,\n      evaluatorResultsJSON\n    FROM $tableName WHERE id = $id\n    \"\"\".map(resultToEvaluationInstance).single().apply()\n  }\n\n  def getAll(): Seq[EvaluationInstance] = DB localTx { implicit session =>\n    sql\"\"\"\n    SELECT\n      id,\n      status,\n      startTime,\n      endTime,\n      evaluationClass,\n      engineParamsGeneratorClass,\n      batch,\n      env,\n      sparkConf,\n      evaluatorResults,\n      evaluatorResultsHTML,\n      evaluatorResultsJSON\n    FROM $tableName\n    \"\"\".map(resultToEvaluationInstance).list().apply()\n  }\n\n  def getCompleted(): Seq[EvaluationInstance] = DB localTx { implicit s =>\n    sql\"\"\"\n    SELECT\n      id,\n      status,\n      startTime,\n      endTime,\n      evaluationClass,\n      engineParamsGeneratorClass,\n      batch,\n      env,\n      sparkConf,\n      evaluatorResults,\n      evaluatorResultsHTML,\n      evaluatorResultsJSON\n    FROM $tableName\n    WHERE\n      status = 'EVALCOMPLETED'\n    ORDER BY starttime DESC\n    \"\"\".map(resultToEvaluationInstance).list().apply()\n  }\n\n  def update(i: EvaluationInstance): Unit = DB localTx { implicit session =>\n    sql\"\"\"\n    update $tableName set\n      status = ${i.status},\n      startTime = ${i.startTime},\n      endTime = ${i.endTime},\n      evaluationClass = ${i.evaluationClass},\n      engineParamsGeneratorClass = ${i.engineParamsGeneratorClass},\n      batch = ${i.batch},\n      env = ${JDBCUtils.mapToString(i.env)},\n      sparkConf = ${JDBCUtils.mapToString(i.sparkConf)},\n      evaluatorResults = ${i.evaluatorResults},\n      evaluatorResultsHTML = ${i.evaluatorResultsHTML},\n      evaluatorResultsJSON = ${i.evaluatorResultsJSON}\n    where id = ${i.id}\"\"\".update().apply()\n  }\n\n  def delete(id: String): Unit = DB localTx { implicit session =>\n    sql\"DELETE FROM $tableName WHERE id = $id\".update().apply()\n  }\n\n  /** Convert JDBC results to [[EvaluationInstance]] */\n  def resultToEvaluationInstance(rs: WrappedResultSet): EvaluationInstance = {\n    EvaluationInstance(\n      id = rs.string(\"id\"),\n      status = rs.string(\"status\"),\n      startTime = rs.jodaDateTime(\"startTime\"),\n      endTime = rs.jodaDateTime(\"endTime\"),\n      evaluationClass = rs.string(\"evaluationClass\"),\n      engineParamsGeneratorClass = rs.string(\"engineParamsGeneratorClass\"),\n      batch = rs.string(\"batch\"),\n      env = JDBCUtils.stringToMap(rs.string(\"env\")),\n      sparkConf = JDBCUtils.stringToMap(rs.string(\"sparkConf\")),\n      evaluatorResults = rs.string(\"evaluatorResults\"),\n      evaluatorResultsHTML = rs.string(\"evaluatorResultsHTML\"),\n      evaluatorResultsJSON = rs.string(\"evaluatorResultsJSON\"))\n  }\n}\n"
  },
  {
    "path": "storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCLEvents.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.jdbc\n\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.data.storage.DataMap\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.storage.LEvents\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.joda.time.DateTime\nimport org.joda.time.DateTimeZone\nimport org.json4s.JObject\nimport org.json4s.native.Serialization.read\nimport org.json4s.native.Serialization.write\nimport scalikejdbc._\n\nimport scala.concurrent.ExecutionContext\nimport scala.concurrent.Future\n\n/** JDBC implementation of [[LEvents]] */\nclass JDBCLEvents(\n    client: String,\n    config: StorageClientConfig,\n    namespace: String) extends LEvents with Logging {\n  implicit private val formats = org.json4s.DefaultFormats\n\n  override def init(appId: Int, channelId: Option[Int] = None): Boolean = {\n\n    // To use index, it must be varchar less than 255 characters on a VARCHAR column\n    val useIndex = config.properties.contains(\"INDEX\") &&\n      config.properties(\"INDEX\").equalsIgnoreCase(\"enabled\")\n\n    val tableName = JDBCUtils.eventTableName(namespace, appId, channelId)\n    val entityIdIndexName = s\"idx_${tableName}_ei\"\n    val entityTypeIndexName = s\"idx_${tableName}_et\"\n    DB autoCommit { implicit session =>\n      if (useIndex) {\n        SQL(s\"\"\"\n      create table if not exists $tableName (\n        id varchar(32) not null primary key,\n        event varchar(255) not null,\n        entityType varchar(255) not null,\n        entityId varchar(255) not null,\n        targetEntityType text,\n        targetEntityId text,\n        properties text,\n        eventTime timestamp DEFAULT CURRENT_TIMESTAMP,\n        eventTimeZone varchar(50) not null,\n        tags text,\n        prId text,\n        creationTime timestamp DEFAULT CURRENT_TIMESTAMP,\n        creationTimeZone varchar(50) not null)\"\"\").execute().apply()\n\n        // create index\n        SQL(s\"create index $entityIdIndexName on $tableName (entityId)\").execute().apply()\n        SQL(s\"create index $entityTypeIndexName on $tableName (entityType)\").execute().apply()\n      } else {\n        SQL(s\"\"\"\n      create table if not exists $tableName (\n        id varchar(32) not null primary key,\n        event text not null,\n        entityType text not null,\n        entityId text not null,\n        targetEntityType text,\n        targetEntityId text,\n        properties text,\n        eventTime timestamp DEFAULT CURRENT_TIMESTAMP,\n        eventTimeZone varchar(50) not null,\n        tags text,\n        prId text,\n        creationTime timestamp DEFAULT CURRENT_TIMESTAMP,\n        creationTimeZone varchar(50) not null)\"\"\").execute().apply()\n      }\n      true\n    }\n  }\n\n  override def remove(appId: Int, channelId: Option[Int] = None): Boolean =\n    DB autoCommit { implicit session =>\n      SQL(s\"\"\"\n      drop table ${JDBCUtils.eventTableName(namespace, appId, channelId)}\n      \"\"\").execute().apply()\n      true\n    }\n\n  override def close(): Unit = ConnectionPool.closeAll()\n\n  override def futureInsert(event: Event, appId: Int, channelId: Option[Int])(\n    implicit ec: ExecutionContext): Future[String] = Future {\n    DB localTx { implicit session =>\n      val id = event.eventId.getOrElse(JDBCUtils.generateId)\n      val tableName = sqls.createUnsafely(JDBCUtils.eventTableName(namespace, appId, channelId))\n      sql\"\"\"\n      insert into $tableName values(\n        $id,\n        ${event.event},\n        ${event.entityType},\n        ${event.entityId},\n        ${event.targetEntityType},\n        ${event.targetEntityId},\n        ${write(event.properties.toJObject)},\n        ${event.eventTime},\n        ${event.eventTime.getZone.getID},\n        ${if (event.tags.nonEmpty) Some(event.tags.mkString(\",\")) else None},\n        ${event.prId},\n        ${event.creationTime},\n        ${event.creationTime.getZone.getID}\n      )\n      \"\"\".update().apply()\n      id\n    }\n  }\n\n  override def futureInsertBatch(events: Seq[Event], appId: Int, channelId: Option[Int])(\n    implicit ec: ExecutionContext): Future[Seq[String]] = Future {\n    DB localTx { implicit session =>\n      val ids = events.map(_.eventId.getOrElse(JDBCUtils.generateId))\n      val params = events.zip(ids).map { case (event, id) =>\n        Seq(\n          'id               -> id,\n          'event            -> event.event,\n          'entityType       -> event.entityType,\n          'entityId         -> event.entityId,\n          'targetEntityType -> event.targetEntityType,\n          'targetEntityId   -> event.targetEntityId,\n          'properties       -> write(event.properties.toJObject),\n          'eventTime        -> event.eventTime,\n          'eventTimeZone    -> event.eventTime.getZone.getID,\n          'tags             -> (if(event.tags.nonEmpty) Some(event.tags.mkString(\",\")) else None),\n          'prId             -> event.prId,\n          'creationTime     -> event.creationTime,\n          'creationTimeZone -> event.creationTime.getZone.getID\n        )\n      }\n\n      val tableName = sqls.createUnsafely(JDBCUtils.eventTableName(namespace, appId, channelId))\n      sql\"\"\"\n      insert into $tableName values(\n        {id},\n        {event},\n        {entityType},\n        {entityId},\n        {targetEntityType},\n        {targetEntityId},\n        {properties},\n        {eventTime},\n        {eventTimeZone},\n        {tags},\n        {prId},\n        {creationTime},\n        {creationTimeZone}\n      )\n      \"\"\".batchByName(params: _*).apply()\n\n      ids\n    }\n  }\n\n  override def futureGet(eventId: String, appId: Int, channelId: Option[Int])(\n    implicit ec: ExecutionContext): Future[Option[Event]] = Future {\n    DB readOnly { implicit session =>\n      val tableName = sqls.createUnsafely(JDBCUtils.eventTableName(namespace, appId, channelId))\n      sql\"\"\"\n      select\n        id,\n        event,\n        entityType,\n        entityId,\n        targetEntityType,\n        targetEntityId,\n        properties,\n        eventTime,\n        eventTimeZone,\n        tags,\n        prId,\n        creationTime,\n        creationTimeZone\n      from $tableName\n      where id = $eventId\n      \"\"\".map(resultToEvent).single().apply()\n    }\n  }\n\n  override def futureDelete(eventId: String, appId: Int, channelId: Option[Int])(\n    implicit ec: ExecutionContext): Future[Boolean] = Future {\n    DB localTx { implicit session =>\n      val tableName = sqls.createUnsafely(JDBCUtils.eventTableName(namespace, appId, channelId))\n      sql\"\"\"\n      delete from $tableName where id = $eventId\n      \"\"\".update().apply()\n      true\n    }\n  }\n\n  override def futureFind(\n      appId: Int,\n      channelId: Option[Int] = None,\n      startTime: Option[DateTime] = None,\n      untilTime: Option[DateTime] = None,\n      entityType: Option[String] = None,\n      entityId: Option[String] = None,\n      eventNames: Option[Seq[String]] = None,\n      targetEntityType: Option[Option[String]] = None,\n      targetEntityId: Option[Option[String]] = None,\n      limit: Option[Int] = None,\n      reversed: Option[Boolean] = None\n    )(implicit ec: ExecutionContext): Future[Iterator[Event]] = Future {\n    DB readOnly { implicit session =>\n      val tableName = sqls.createUnsafely(JDBCUtils.eventTableName(namespace, appId, channelId))\n      val whereClause = sqls.toAndConditionOpt(\n        startTime.map(x => sqls\"eventTime >= $x\"),\n        untilTime.map(x => sqls\"eventTime < $x\"),\n        entityType.map(x => sqls\"entityType = $x\"),\n        entityId.map(x => sqls\"entityId = $x\"),\n        eventNames.map(x =>\n          sqls.toOrConditionOpt(x.map(y =>\n            Some(sqls\"event = $y\")\n          ): _*)\n        ).getOrElse(None),\n        targetEntityType.map(x => x.map(y => sqls\"targetEntityType = $y\")\n            .getOrElse(sqls\"targetEntityType IS NULL\")),\n        targetEntityId.map(x => x.map(y => sqls\"targetEntityId = $y\")\n            .getOrElse(sqls\"targetEntityId IS NULL\"))\n      ).map(sqls.where(_)).getOrElse(sqls\"\")\n      val orderByClause = reversed.map(x =>\n        if (x) sqls\"eventTime desc\" else sqls\"eventTime asc\"\n      ).getOrElse(sqls\"eventTime asc\")\n      val limitClause = limit.map(x =>\n        if (x < 0) sqls\"\" else sqls.limit(x)\n      ).getOrElse(sqls\"\")\n      val q = sql\"\"\"\n      select\n        id,\n        event,\n        entityType,\n        entityId,\n        targetEntityType,\n        targetEntityId,\n        properties,\n        eventTime,\n        eventTimeZone,\n        tags,\n        prId,\n        creationTime,\n        creationTimeZone\n      from $tableName\n      $whereClause\n      order by $orderByClause\n      $limitClause\n      \"\"\"\n      q.map(resultToEvent).list().apply().toIterator\n    }\n  }\n\n  private[predictionio] def resultToEvent(rs: WrappedResultSet): Event = {\n    Event(\n      eventId = rs.stringOpt(\"id\"),\n      event = rs.string(\"event\"),\n      entityType = rs.string(\"entityType\"),\n      entityId = rs.string(\"entityId\"),\n      targetEntityType = rs.stringOpt(\"targetEntityType\"),\n      targetEntityId = rs.stringOpt(\"targetEntityId\"),\n      properties = rs.stringOpt(\"properties\").map(p =>\n        DataMap(read[JObject](p))).getOrElse(DataMap()),\n      eventTime = new DateTime(rs.jodaDateTime(\"eventTime\"),\n        DateTimeZone.forID(rs.string(\"eventTimeZone\"))),\n      tags = rs.stringOpt(\"tags\").map(t => t.split(\",\").toList).getOrElse(Nil),\n      prId = rs.stringOpt(\"prId\"),\n      creationTime = new DateTime(rs.jodaDateTime(\"creationTime\"),\n        DateTimeZone.forID(rs.string(\"creationTimeZone\")))\n    )\n  }\n}\n"
  },
  {
    "path": "storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCModels.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.jdbc\n\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.data.storage.Model\nimport org.apache.predictionio.data.storage.Models\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport scalikejdbc._\n\n/** JDBC implementation of [[Models]] */\nclass JDBCModels(client: String, config: StorageClientConfig, prefix: String)\n  extends Models with Logging {\n  /** Database table name for this data access object */\n  val tableName = JDBCUtils.prefixTableName(prefix, \"models\")\n\n  /** Determines binary column type based on JDBC driver type */\n  val binaryColumnType = JDBCUtils.binaryColumnType(client)\n  DB autoCommit { implicit session =>\n    sql\"\"\"\n    create table if not exists $tableName (\n      id varchar(100) not null primary key,\n      models $binaryColumnType not null)\"\"\".execute().apply()\n  }\n\n  def insert(i: Model): Unit = DB localTx { implicit session =>\n    sql\"insert into $tableName values(${i.id}, ${i.models})\".update().apply()\n  }\n\n  def get(id: String): Option[Model] = DB readOnly { implicit session =>\n    sql\"select id, models from $tableName where id = $id\".map { r =>\n      Model(id = r.string(\"id\"), models = r.bytes(\"models\"))\n    }.single().apply()\n  }\n\n  def delete(id: String): Unit = DB localTx { implicit session =>\n    sql\"delete from $tableName where id = $id\".execute().apply()\n  }\n}\n"
  },
  {
    "path": "storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.jdbc\n\nimport java.sql.{DriverManager, ResultSet}\n\nimport com.github.nscala_time.time.Imports._\nimport org.apache.predictionio.data.storage.{DataMap, Event, PEvents, StorageClientConfig}\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.{JdbcRDD, RDD}\nimport org.apache.spark.sql.{SaveMode, SparkSession}\nimport org.json4s.JObject\nimport org.json4s.native.Serialization\nimport scalikejdbc._\n\n/** JDBC implementation of [[PEvents]] */\nclass JDBCPEvents(client: String, config: StorageClientConfig, namespace: String) extends PEvents {\n  @transient private implicit lazy val formats = org.json4s.DefaultFormats\n\n  def find(\n    appId: Int,\n    channelId: Option[Int] = None,\n    startTime: Option[DateTime] = None,\n    untilTime: Option[DateTime] = None,\n    entityType: Option[String] = None,\n    entityId: Option[String] = None,\n    eventNames: Option[Seq[String]] = None,\n    targetEntityType: Option[Option[String]] = None,\n    targetEntityId: Option[Option[String]] = None)(sc: SparkContext): RDD[Event] = {\n\n    val lower = startTime.map(_.getMillis).getOrElse(0.toLong)\n    /** Change the default upper bound from +100 to +1 year because MySQL's\n      * FROM_UNIXTIME(t) will return NULL if we use +100 years.\n      */\n    val upper = untilTime.map(_.getMillis).getOrElse((DateTime.now + 1.years).getMillis)\n    val par = scala.math.min(\n      new Duration(upper - lower).getStandardDays,\n      config.properties.getOrElse(\"PARTITIONS\", \"4\").toLong).toInt\n    val entityTypeClause = entityType.map(x => s\"and entityType = '$x'\").getOrElse(\"\")\n    val entityIdClause = entityId.map(x => s\"and entityId = '$x'\").getOrElse(\"\")\n    val eventNamesClause =\n      eventNames.map(\"and (\" + _.map(y => s\"event = '$y'\").mkString(\" or \") + \")\").getOrElse(\"\")\n    val targetEntityTypeClause = targetEntityType.map(\n      _.map(x => s\"and targetEntityType = '$x'\"\n    ).getOrElse(\"and targetEntityType is null\")).getOrElse(\"\")\n    val targetEntityIdClause = targetEntityId.map(\n      _.map(x => s\"and targetEntityId = '$x'\"\n    ).getOrElse(\"and targetEntityId is null\")).getOrElse(\"\")\n    val q = s\"\"\"\n      select\n        id,\n        event,\n        entityType,\n        entityId,\n        targetEntityType,\n        targetEntityId,\n        properties,\n        eventTime,\n        eventTimeZone,\n        tags,\n        prId,\n        creationTime,\n        creationTimeZone\n      from ${JDBCUtils.eventTableName(namespace, appId, channelId)}\n      where\n        eventTime >= ${JDBCUtils.timestampFunction(client)}(?) and\n        eventTime < ${JDBCUtils.timestampFunction(client)}(?)\n      $entityTypeClause\n      $entityIdClause\n      $eventNamesClause\n      $targetEntityTypeClause\n      $targetEntityIdClause\n      \"\"\".replace(\"\\n\", \" \")\n    new JdbcRDD(\n      sc,\n      () => {\n        DriverManager.getConnection(\n          client,\n          config.properties(\"USERNAME\"),\n          config.properties(\"PASSWORD\"))\n      },\n      q,\n      lower / 1000,\n      upper / 1000,\n      par,\n      (r: ResultSet) => {\n        Event(\n          eventId = Option(r.getString(\"id\")),\n          event = r.getString(\"event\"),\n          entityType = r.getString(\"entityType\"),\n          entityId = r.getString(\"entityId\"),\n          targetEntityType = Option(r.getString(\"targetEntityType\")),\n          targetEntityId = Option(r.getString(\"targetEntityId\")),\n          properties = Option(r.getString(\"properties\")).map(x =>\n            DataMap(Serialization.read[JObject](x))).getOrElse(DataMap()),\n          eventTime = new DateTime(r.getTimestamp(\"eventTime\").getTime,\n            DateTimeZone.forID(r.getString(\"eventTimeZone\"))),\n          tags = Option(r.getString(\"tags\")).map(x =>\n            x.split(\",\").toList).getOrElse(Nil),\n          prId = Option(r.getString(\"prId\")),\n          creationTime = new DateTime(r.getTimestamp(\"creationTime\").getTime,\n            DateTimeZone.forID(r.getString(\"creationTimeZone\"))))\n      }).cache()\n  }\n\n  def write(events: RDD[Event], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {\n    val sqlSession = SparkSession.builder().getOrCreate()\n    import sqlSession.implicits._\n\n    val tableName = JDBCUtils.eventTableName(namespace, appId, channelId)\n\n    val eventsColumnNamesInDF = Seq[String](\n        \"id\"\n      , \"event\"\n      , \"entityType\"\n      , \"entityId\"\n      , \"targetEntityType\"\n      , \"targetEntityId\"\n      , \"properties\"\n      , \"eventTime\"\n      , \"eventTimeZone\"\n      , \"tags\"\n      , \"prId\"\n      , \"creationTime\"\n      , \"creationTimeZone\")\n\n    // Necessary for handling postgres \"case-sensitivity\"\n    val eventsColumnNamesInSQL = JDBCUtils.driverType(client) match {\n      case \"postgresql\" => eventsColumnNamesInDF.map(_.toLowerCase)\n      case _ => eventsColumnNamesInDF\n    }\n    val eventDF = events.map { event =>\n      (event.eventId.getOrElse(JDBCUtils.generateId)\n        , event.event\n        , event.entityType\n        , event.entityId\n        , event.targetEntityType.orNull\n        , event.targetEntityId.orNull\n        , if (!event.properties.isEmpty) Serialization.write(event.properties.toJObject) else null\n        , new java.sql.Timestamp(event.eventTime.getMillis)\n        , event.eventTime.getZone.getID\n        , if (event.tags.nonEmpty) Some(event.tags.mkString(\",\")) else null\n        , event.prId\n        , new java.sql.Timestamp(event.creationTime.getMillis)\n        , event.creationTime.getZone.getID)\n    }.toDF(eventsColumnNamesInSQL:_*)\n\n    val prop = new java.util.Properties\n    prop.setProperty(\"user\", config.properties(\"USERNAME\"))\n    prop.setProperty(\"password\", config.properties(\"PASSWORD\"))\n    eventDF.write.mode(SaveMode.Append).jdbc(client, tableName, prop)\n  }\n\n  def delete(eventIds: RDD[String], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {\n\n    eventIds.foreachPartition{ iter =>\n      DB(\n        DriverManager.getConnection(\n          client,\n          config.properties(\"USERNAME\"),\n          config.properties(\"PASSWORD\"))\n      ) localTx { implicit session =>\n        val tableName = JDBCUtils.eventTableName(namespace, appId, channelId)\n        val table = SQLSyntax.createUnsafely(tableName)\n\n        iter.foreach { eventId =>\n          sql\"\"\"\n          delete from $table where id = $eventId\n          \"\"\".update().apply()\n        }\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCUtils.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.jdbc\n\nimport scalikejdbc._\n\n/** JDBC related utilities */\nobject JDBCUtils {\n  /** Extract JDBC driver type from URL\n    *\n    * @param url JDBC URL\n    * @return The driver type, e.g. postgresql\n    */\n  def driverType(url: String): String = {\n    val capture = \"\"\"jdbc:([^:]+):\"\"\".r\n    capture findFirstIn url match {\n      case Some(capture(driverType)) => driverType\n      case None => \"\"\n    }\n  }\n\n  /** Determines binary column type from JDBC URL\n    *\n    * @param url JDBC URL\n    * @return Binary column type as SQLSyntax, e.g. LONGBLOB\n    */\n  def binaryColumnType(url: String): SQLSyntax = {\n    driverType(url) match {\n      case \"postgresql\" => sqls\"bytea\"\n      case \"mysql\" => sqls\"longblob\"\n      case _ => sqls\"longblob\"\n    }\n  }\n\n  /** Determines UNIX timestamp conversion function from JDBC URL\n    *\n    * @param url JDBC URL\n    * @return Timestamp conversion function, e.g. TO_TIMESTAMP\n    */\n  def timestampFunction(url: String): String = {\n    driverType(url) match {\n      case \"postgresql\" => \"to_timestamp\"\n      case \"mysql\" => \"from_unixtime\"\n      case _ => \"from_unixtime\"\n    }\n  }\n\n  /** Converts Map of String to String to comma-separated list of key=value\n    *\n    * @param m Map of String to String\n    * @return Comma-separated list, e.g. FOO=BAR,X=Y,...\n    */\n  def mapToString(m: Map[String, String]): String = {\n    m.map(t => s\"${t._1}=${t._2}\").mkString(\",\")\n  }\n\n  /** Inverse of mapToString\n    *\n    * @param str Comma-separated list, e.g. FOO=BAR,X=Y,...\n    * @return Map of String to String, e.g. Map(\"FOO\" -> \"BAR\", \"X\" -> \"Y\", ...)\n    */\n  def stringToMap(str: String): Map[String, String] = {\n    if (str.isEmpty) {\n      Map.empty[String, String]\n    } else {\n      str.split(\",\").map { x =>\n        val y = x.split(\"=\")\n        y(0) -> y(1)\n      }.toMap[String, String]\n    }\n  }\n\n  /** Generate 32-character random ID using UUID with - stripped */\n  def generateId: String = java.util.UUID.randomUUID().toString.replace(\"-\", \"\")\n\n  /** Prefix a table name\n    *\n    * @param prefix Table prefix\n    * @param table Table name\n    * @return Prefixed table name\n    */\n  def prefixTableName(prefix: String, table: String): SQLSyntax =\n    sqls.createUnsafely(s\"${prefix}_$table\")\n\n  /** Derive event table name\n    *\n    * @param namespace Namespace of event tables\n    * @param appId App ID\n    * @param channelId Optional channel ID\n    * @return Full event table name\n    */\n  def eventTableName(namespace: String, appId: Int, channelId: Option[Int]): String =\n    s\"${namespace}_${appId}${channelId.map(\"_\" + _).getOrElse(\"\")}\"\n}\n"
  },
  {
    "path": "storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/StorageClient.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.jdbc\n\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.data.storage.BaseStorageClient\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.apache.predictionio.data.storage.StorageClientException\nimport scalikejdbc._\n\n/** JDBC implementation of [[BaseStorageClient]] */\nclass StorageClient(val config: StorageClientConfig)\n  extends BaseStorageClient with Logging {\n  override val prefix = \"JDBC\"\n\n  if (!config.properties.contains(\"URL\")) {\n    throw new StorageClientException(\"The URL variable is not set!\", null)\n  }\n  if (!config.properties.contains(\"USERNAME\")) {\n    throw new StorageClientException(\"The USERNAME variable is not set!\", null)\n  }\n  if (!config.properties.contains(\"PASSWORD\")) {\n    throw new StorageClientException(\"The PASSWORD variable is not set!\", null)\n  }\n\n  // set max size of connection pool\n  val maxSize: Int = config.properties.getOrElse(\"CONNECTIONS\", \"8\").toInt\n  val settings = ConnectionPoolSettings(maxSize = maxSize)\n\n  ConnectionPool.singleton(\n    config.properties(\"URL\"),\n    config.properties(\"USERNAME\"),\n    config.properties(\"PASSWORD\"),\n    settings)\n  /** JDBC connection URL. Connections are managed by ScalikeJDBC. */\n  val client = config.properties(\"URL\")\n}\n"
  },
  {
    "path": "storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/package.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\n/** JDBC implementation of storage traits, supporting meta data, event data, and\n  * model data\n  *\n  * @group Implementation\n  */\npackage object jdbc {}\n"
  },
  {
    "path": "storage/jdbc/src/test/resources/application.conf",
    "content": "org.apache.predictionio.data.storage {\n  sources {\n    mongodb {\n      type = mongodb\n      hosts = [localhost]\n      ports = [27017]\n    }\n    elasticsearch {\n      type = elasticsearch\n      hosts = [localhost]\n      ports = [9300]\n    }\n  }\n  repositories {\n    # This section is dummy just to make storage happy.\n    # The actual testing will not bypass these repository settings completely.\n    # Please refer to StorageTestUtils.scala.\n    settings {\n      name = \"test_predictionio\"\n      source = mongodb\n    }\n\n    appdata {\n      name = \"test_predictionio_appdata\"\n      source = mongodb\n    }\n  }\n}\n"
  },
  {
    "path": "storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/JDBCUtilsSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.jdbc\n\nimport org.specs2.Specification\n\nclass JDBCUtilsSpec extends Specification {\n  def is = s2\"\"\"\n\n  PredictionIO JDBC Utilities Specification\n\n  driverType should extract the correct portion from a JDBC URL ${driverType}\n  mapToString should return an empty string with empty map input ${mapToStringEmptyInput}\n  stringToMap should correctly create mapping ${stringToMap}\n  stringToMap should return an empty map with empty string input ${stringToMapEmptyInput}\n\n  \"\"\"\n\n  def driverType = {\n    JDBCUtils.driverType(\"jdbc:postgresql://remotehost:5432/somedbname\") must beEqualTo(\"postgresql\")\n  }\n\n  def mapToStringEmptyInput = {\n    JDBCUtils.mapToString(Map.empty[String, String]) must be empty\n  }\n\n  def stringToMap = {\n    val m = JDBCUtils.stringToMap(\"FOO=BAR,DEAD=BEEF\")\n    m must havePairs(\"FOO\" -> \"BAR\", \"DEAD\" -> \"BEEF\")\n  }\n\n  def stringToMapEmptyInput = {\n    JDBCUtils.stringToMap(\"\") must be empty\n  }\n}\n"
  },
  {
    "path": "storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/LEventsSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.jdbc\n\nimport org.apache.predictionio.data.storage.{Event, LEvents, PropertyMap, Storage}\nimport org.specs2._\nimport org.specs2.specification.Step\n\nclass LEventsSpec extends Specification with TestEvents {\n  def is = s2\"\"\"\n\n  PredictionIO Storage LEvents Specification\n\n    Events can be implemented by:\n    - JDBCLEvents ${jdbcLEvents}\n\n  \"\"\"\n\n  def jdbcLEvents = sequential ^ s2\"\"\"\n\n    JDBCLEvents should\n    - behave like any LEvents implementation ${events(jdbcDO)}\n\n  \"\"\"\n\n  val appId = 1\n\n  def events(eventClient: LEvents) = sequential ^ s2\"\"\"\n\n    init default ${initDefault(eventClient)}\n    insert 3 test events and get back by event ID ${insertAndGetEvents(eventClient)}\n    insert 3 test events with timezone and get back by event ID ${insertAndGetTimezone(eventClient)}\n    insert and delete by ID ${insertAndDelete(eventClient)}\n    insert test user events ${insertTestUserEvents(eventClient)}\n    find user events ${findUserEvents(eventClient)}\n    aggregate user properties ${aggregateUserProperties(eventClient)}\n    aggregate one user properties ${aggregateOneUserProperties(eventClient)}\n    aggregate non-existent user properties ${aggregateNonExistentUserProperties(eventClient)}\n    init channel ${initChannel(eventClient)}\n    insert 2 events to channel ${insertChannel(eventClient)}\n    insert 1 event to channel and delete by ID  ${insertAndDeleteChannel(eventClient)}\n    find events from channel ${findChannel(eventClient)}\n    remove default ${removeDefault(eventClient)}\n    remove channel ${removeChannel(eventClient)}\n\n  \"\"\"\n\n  val dbName = \"test_pio_storage_events_\" + hashCode\n\n  def jdbcDO = Storage.getDataObject[LEvents](StorageTestUtils.jdbcSourceName, dbName)\n\n  def initDefault(eventClient: LEvents) = {\n    eventClient.init(appId)\n  }\n\n  def insertAndGetEvents(eventClient: LEvents) = {\n\n    // events from TestEvents trait\n    val listOfEvents = List(r1,r2,r3)\n\n    val insertResp = listOfEvents.map { eventClient.insert(_, appId) }\n\n    val insertedEventId: List[String] = insertResp\n\n    val insertedEvent: List[Option[Event]] = listOfEvents.zip(insertedEventId)\n      .map { case (e, id) => Some(e.copy(eventId = Some(id))) }\n\n    val getResp = insertedEventId.map { id => eventClient.get(id, appId) }\n\n    val getEvents = getResp\n\n    insertedEvent must containTheSameElementsAs(getEvents)\n  }\n\n  def insertAndGetTimezone(eventClient: LEvents) = {\n    val listOfEvents = List(tz1, tz2, tz3)\n\n    val insertResp = listOfEvents.map { eventClient.insert(_, appId) }\n\n    val insertedEventId: List[String] = insertResp\n\n    val insertedEvent: List[Option[Event]] = listOfEvents.zip(insertedEventId)\n      .map { case (e, id) => Some(e.copy(eventId = Some(id))) }\n\n    val getResp = insertedEventId.map { id => eventClient.get(id, appId) }\n\n    val getEvents = getResp\n\n    insertedEvent must containTheSameElementsAs(getEvents)\n  }\n\n  def insertAndDelete(eventClient: LEvents) = {\n    val eventId = eventClient.insert(r2, appId)\n\n    val resultBefore = eventClient.get(eventId, appId)\n\n    val expectedBefore = r2.copy(eventId = Some(eventId))\n\n    val deleteStatus = eventClient.delete(eventId, appId)\n\n    val resultAfter = eventClient.get(eventId, appId)\n\n    (resultBefore must beEqualTo(Some(expectedBefore))) and\n    (deleteStatus must beEqualTo(true)) and\n    (resultAfter must beEqualTo(None))\n  }\n\n  def insertTestUserEvents(eventClient: LEvents) = {\n    // events from TestEvents trait\n    val listOfEvents = Vector(u1e5, u2e2, u1e3, u1e1, u2e3, u2e1, u1e4, u1e2)\n\n    listOfEvents.map{ eventClient.insert(_, appId) }\n\n    success\n  }\n\n  def findUserEvents(eventClient: LEvents) = {\n\n    val results: List[Event] = eventClient.find(\n      appId = appId,\n      entityType = Some(\"user\"))\n      .toList\n      .map(e => e.copy(eventId = None)) // ignore eventID\n\n    // same events in insertTestUserEvents\n    val expected = List(u1e5, u2e2, u1e3, u1e1, u2e3, u2e1, u1e4, u1e2)\n\n    results must containTheSameElementsAs(expected)\n  }\n\n  def aggregateUserProperties(eventClient: LEvents) = {\n\n    val result: Map[String, PropertyMap] = eventClient.aggregateProperties(\n      appId = appId,\n      entityType = \"user\")\n\n    val expected = Map(\n      \"u1\" -> PropertyMap(u1, u1BaseTime, u1LastTime),\n      \"u2\" -> PropertyMap(u2, u2BaseTime, u2LastTime)\n    )\n\n    result must beEqualTo(expected)\n  }\n\n  def aggregateOneUserProperties(eventClient: LEvents) = {\n    val result: Option[PropertyMap] = eventClient.aggregatePropertiesOfEntity(\n      appId = appId,\n      entityType = \"user\",\n      entityId = \"u1\")\n\n    val expected = Some(PropertyMap(u1, u1BaseTime, u1LastTime))\n\n    result must beEqualTo(expected)\n  }\n\n  def aggregateNonExistentUserProperties(eventClient: LEvents) = {\n    val result: Option[PropertyMap] = eventClient.aggregatePropertiesOfEntity(\n      appId = appId,\n      entityType = \"user\",\n      entityId = \"u999999\")\n\n    result must beEqualTo(None)\n  }\n\n  val channelId = 12\n\n  def initChannel(eventClient: LEvents) = {\n    eventClient.init(appId, Some(channelId))\n  }\n\n  def insertChannel(eventClient: LEvents) = {\n\n    // events from TestEvents trait\n    val listOfEvents = List(r4,r5)\n\n    listOfEvents.map( eventClient.insert(_, appId, Some(channelId)) )\n\n    success\n  }\n\n  def insertAndDeleteChannel(eventClient: LEvents) = {\n\n    val eventId = eventClient.insert(r2, appId, Some(channelId))\n\n    val resultBefore = eventClient.get(eventId, appId, Some(channelId))\n\n    val expectedBefore = r2.copy(eventId = Some(eventId))\n\n    val deleteStatus = eventClient.delete(eventId, appId, Some(channelId))\n\n    val resultAfter = eventClient.get(eventId, appId, Some(channelId))\n\n    (resultBefore must beEqualTo(Some(expectedBefore))) and\n    (deleteStatus must beEqualTo(true)) and\n    (resultAfter must beEqualTo(None))\n  }\n\n  def findChannel(eventClient: LEvents) = {\n\n    val results: List[Event] = eventClient.find(\n      appId = appId,\n      channelId = Some(channelId)\n    )\n    .toList\n    .map(e => e.copy(eventId = None)) // ignore eventId\n\n    // same events in insertChannel\n    val expected = List(r4, r5)\n\n    results must containTheSameElementsAs(expected)\n  }\n\n  def removeDefault(eventClient: LEvents) = {\n    eventClient.remove(appId)\n  }\n\n  def removeChannel(eventClient: LEvents) = {\n    eventClient.remove(appId, Some(channelId))\n  }\n}\n"
  },
  {
    "path": "storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/PEventsSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.jdbc\n\nimport org.apache.predictionio.data.storage._\nimport org.apache.spark.SparkContext\nimport org.apache.spark.rdd.RDD\nimport org.specs2._\nimport org.specs2.specification.Step\n\nclass PEventsSpec extends Specification with TestEvents {\n\n  System.clearProperty(\"spark.driver.port\")\n  System.clearProperty(\"spark.hostPort\")\n  val sc = new SparkContext(\"local[4]\", \"PEventAggregatorSpec test\")\n\n  val appId = 1\n  val channelId = 6\n  val dbName = \"test_pio_storage_events_\" + hashCode\n\n  def jdbcLocal = Storage.getDataObject[LEvents](\n    StorageTestUtils.jdbcSourceName,\n    dbName\n  )\n\n  def jdbcPar = Storage.getDataObject[PEvents](\n    StorageTestUtils.jdbcSourceName,\n    dbName\n  )\n\n  def stopSpark = {\n    sc.stop()\n  }\n\n  def is = s2\"\"\"\n\n  PredictionIO Storage PEvents Specification\n\n    PEvents can be implemented by:\n    - JDBCPEvents ${jdbcPEvents}\n    - (stop Spark) ${Step(sc.stop())}\n\n  \"\"\"\n\n  def jdbcPEvents = sequential ^ s2\"\"\"\n\n    JDBCPEvents should\n    - behave like any PEvents implementation ${events(jdbcLocal, jdbcPar)}\n    - (table cleanup) ${Step(StorageTestUtils.dropJDBCTable(s\"${dbName}_$appId\"))}\n    - (table cleanup) ${Step(StorageTestUtils.dropJDBCTable(s\"${dbName}_${appId}_$channelId\"))}\n\n  \"\"\"\n\n  def events(localEventClient: LEvents, parEventClient: PEvents) = sequential ^ s2\"\"\"\n\n    - (init test) ${initTest(localEventClient)}\n    - (insert test events) ${insertTestEvents(localEventClient)}\n    find in default ${find(parEventClient)}\n    find in channel ${findChannel(parEventClient)}\n    aggregate user properties in default ${aggregateUserProperties(parEventClient)}\n    aggregate user properties in channel ${aggregateUserPropertiesChannel(parEventClient)}\n    write to default ${write(parEventClient)}\n    write to channel ${writeChannel(parEventClient)}\n\n  \"\"\"\n\n  /* setup */\n\n  // events from TestEvents trait\n  val listOfEvents = List(u1e5, u2e2, u1e3, u1e1, u2e3, u2e1, u1e4, u1e2, r1, r2)\n  val listOfEventsChannel = List(u3e1, u3e2, u3e3, r3, r4)\n\n  def initTest(localEventClient: LEvents) = {\n    localEventClient.init(appId)\n    localEventClient.init(appId, Some(channelId))\n  }\n\n  def insertTestEvents(localEventClient: LEvents) = {\n    listOfEvents.map( localEventClient.insert(_, appId) )\n    // insert to channel\n    listOfEventsChannel.map( localEventClient.insert(_, appId, Some(channelId)) )\n    success\n  }\n\n  /* following are tests */\n\n  def find(parEventClient: PEvents) = {\n    val resultRDD: RDD[Event] = parEventClient.find(\n      appId = appId\n    )(sc)\n\n    val results = resultRDD.collect.toList\n      .map {_.copy(eventId = None)} // ignore eventId\n\n    results must containTheSameElementsAs(listOfEvents)\n  }\n\n  def findChannel(parEventClient: PEvents) = {\n    val resultRDD: RDD[Event] = parEventClient.find(\n      appId = appId,\n      channelId = Some(channelId)\n    )(sc)\n\n    val results = resultRDD.collect.toList\n      .map {_.copy(eventId = None)} // ignore eventId\n\n    results must containTheSameElementsAs(listOfEventsChannel)\n  }\n\n  def aggregateUserProperties(parEventClient: PEvents) = {\n    val resultRDD: RDD[(String, PropertyMap)] = parEventClient.aggregateProperties(\n      appId = appId,\n      entityType = \"user\"\n    )(sc)\n    val result: Map[String, PropertyMap] = resultRDD.collectAsMap.toMap\n\n    val expected = Map(\n      \"u1\" -> PropertyMap(u1, u1BaseTime, u1LastTime),\n      \"u2\" -> PropertyMap(u2, u2BaseTime, u2LastTime)\n    )\n\n    result must beEqualTo(expected)\n  }\n\n  def aggregateUserPropertiesChannel(parEventClient: PEvents) = {\n    val resultRDD: RDD[(String, PropertyMap)] = parEventClient.aggregateProperties(\n      appId = appId,\n      channelId = Some(channelId),\n      entityType = \"user\"\n    )(sc)\n    val result: Map[String, PropertyMap] = resultRDD.collectAsMap.toMap\n\n    val expected = Map(\n      \"u3\" -> PropertyMap(u3, u3BaseTime, u3LastTime)\n    )\n\n    result must beEqualTo(expected)\n  }\n\n  def write(parEventClient: PEvents) = {\n    val written = List(r5, r6)\n    val writtenRDD = sc.parallelize(written)\n    parEventClient.write(writtenRDD, appId)(sc)\n\n    // read back\n    val resultRDD = parEventClient.find(\n      appId = appId\n    )(sc)\n\n    val results = resultRDD.collect.toList\n      .map { _.copy(eventId = None)} // ignore eventId\n\n    val expected = listOfEvents ++ written\n\n    results must containTheSameElementsAs(expected)\n  }\n\n  def writeChannel(parEventClient: PEvents) = {\n    val written = List(r1, r5, r6)\n    val writtenRDD = sc.parallelize(written)\n    parEventClient.write(writtenRDD, appId, Some(channelId))(sc)\n\n    // read back\n    val resultRDD = parEventClient.find(\n      appId = appId,\n      channelId = Some(channelId)\n    )(sc)\n\n    val results = resultRDD.collect.toList\n      .map { _.copy(eventId = None)} // ignore eventId\n\n    val expected = listOfEventsChannel ++ written\n\n    results must containTheSameElementsAs(expected)\n  }\n\n}\n"
  },
  {
    "path": "storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/StorageTestUtils.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.jdbc\n\nimport scalikejdbc._\n\nobject StorageTestUtils {\n  val jdbcSourceName = \"PGSQL\"\n\n  def dropJDBCTable(table: String): Unit = DB autoCommit { implicit s =>\n    SQL(s\"drop table $table\").execute().apply()\n  }\n}\n"
  },
  {
    "path": "storage/jdbc/src/test/scala/org/apache/predictionio/data/storage/jdbc/TestEvents.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.jdbc\n\nimport org.apache.predictionio.data.storage.{DataMap, Event}\nimport org.joda.time.{DateTime, DateTimeZone}\n\ntrait TestEvents {\n\n  val u1BaseTime = new DateTime(654321)\n  val u2BaseTime = new DateTime(6543210)\n  val u3BaseTime = new DateTime(6543410)\n\n  // u1 events\n  val u1e1 = Event(\n    event = \"$set\",\n    entityType = \"user\",\n    entityId = \"u1\",\n    properties = DataMap(\n      \"\"\"{\n        \"a\" : 1,\n        \"b\" : \"value2\",\n        \"d\" : [1, 2, 3],\n      }\"\"\"),\n    eventTime = u1BaseTime\n  )\n\n  val u1e2 = u1e1.copy(\n    event = \"$set\",\n    properties = DataMap(\"\"\"{\"a\" : 2}\"\"\"),\n    eventTime = u1BaseTime.plusDays(1)\n  )\n\n  val u1e3 = u1e1.copy(\n    event = \"$set\",\n    properties = DataMap(\"\"\"{\"b\" : \"value4\"}\"\"\"),\n    eventTime = u1BaseTime.plusDays(2)\n  )\n\n  val u1e4 = u1e1.copy(\n    event = \"$unset\",\n    properties = DataMap(\"\"\"{\"b\" : null}\"\"\"),\n    eventTime = u1BaseTime.plusDays(3)\n  )\n\n  val u1e5 = u1e1.copy(\n    event = \"$set\",\n    properties = DataMap(\"\"\"{\"e\" : \"new\"}\"\"\"),\n    eventTime = u1BaseTime.plusDays(4)\n  )\n\n  val u1LastTime = u1BaseTime.plusDays(4)\n  val u1 = \"\"\"{\"a\": 2, \"d\": [1, 2, 3], \"e\": \"new\"}\"\"\"\n\n  // delete event for u1\n  val u1ed = u1e1.copy(\n    event = \"$delete\",\n    properties = DataMap(),\n    eventTime = u1BaseTime.plusDays(5)\n  )\n\n  // u2 events\n  val u2e1 = Event(\n    event = \"$set\",\n    entityType = \"user\",\n    entityId = \"u2\",\n    properties = DataMap(\n      \"\"\"{\n        \"a\" : 21,\n        \"b\" : \"value12\",\n        \"d\" : [7, 5, 6],\n      }\"\"\"),\n    eventTime = u2BaseTime\n  )\n\n  val u2e2 = u2e1.copy(\n    event = \"$unset\",\n    properties = DataMap(\"\"\"{\"a\" : null}\"\"\"),\n    eventTime = u2BaseTime.plusDays(1)\n  )\n\n  val u2e3 = u2e1.copy(\n    event = \"$set\",\n    properties = DataMap(\"\"\"{\"b\" : \"value9\", \"g\": \"new11\"}\"\"\"),\n    eventTime = u2BaseTime.plusDays(2)\n  )\n\n  val u2LastTime = u2BaseTime.plusDays(2)\n  val u2 = \"\"\"{\"b\": \"value9\", \"d\": [7, 5, 6], \"g\": \"new11\"}\"\"\"\n\n  // u3 events\n  val u3e1 = Event(\n    event = \"$set\",\n    entityType = \"user\",\n    entityId = \"u3\",\n    properties = DataMap(\n      \"\"\"{\n        \"a\" : 22,\n        \"b\" : \"value13\",\n        \"d\" : [5, 6, 1],\n      }\"\"\"),\n    eventTime = u3BaseTime\n  )\n\n  val u3e2 = u3e1.copy(\n    event = \"$unset\",\n    properties = DataMap(\"\"\"{\"a\" : null}\"\"\"),\n    eventTime = u3BaseTime.plusDays(1)\n  )\n\n  val u3e3 = u3e1.copy(\n    event = \"$set\",\n    properties = DataMap(\"\"\"{\"b\" : \"value10\", \"f\": \"new12\", \"d\" : [1, 3, 2]}\"\"\"),\n    eventTime = u3BaseTime.plusDays(2)\n  )\n\n  val u3LastTime = u3BaseTime.plusDays(2)\n  val u3 = \"\"\"{\"b\": \"value10\", \"d\": [1, 3, 2], \"f\": \"new12\"}\"\"\"\n\n  // some random events\n  val r1 = Event(\n    event = \"my_event\",\n    entityType = \"my_entity_type\",\n    entityId = \"my_entity_id\",\n    targetEntityType = Some(\"my_target_entity_type\"),\n    targetEntityId = Some(\"my_target_entity_id\"),\n    properties = DataMap(\n      \"\"\"{\n        \"prop1\" : 1,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [1, 2, 3],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\"],\n        \"prop6\" : 4.56\n      }\"\"\"\n    ),\n    eventTime = DateTime.now,\n    prId = Some(\"my_prid\")\n  )\n  val r2 = Event(\n    event = \"my_event2\",\n    entityType = \"my_entity_type2\",\n    entityId = \"my_entity_id2\"\n  )\n  val r3 = Event(\n    event = \"my_event3\",\n    entityType = \"my_entity_type\",\n    entityId = \"my_entity_id\",\n    targetEntityType = Some(\"my_target_entity_type\"),\n    targetEntityId = Some(\"my_target_entity_id\"),\n    properties = DataMap(\n      \"\"\"{\n        \"propA\" : 1.2345,\n        \"propB\" : \"valueB\",\n      }\"\"\"\n    ),\n    prId = Some(\"my_prid\")\n  )\n  val r4 = Event(\n    event = \"my_event4\",\n    entityType = \"my_entity_type4\",\n    entityId = \"my_entity_id4\",\n    targetEntityType = Some(\"my_target_entity_type4\"),\n    targetEntityId = Some(\"my_target_entity_id4\"),\n    properties = DataMap(\n      \"\"\"{\n        \"prop1\" : 1,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [1, 2, 3],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\"],\n        \"prop6\" : 4.56\n      }\"\"\"),\n    eventTime = DateTime.now\n  )\n  val r5 = Event(\n    event = \"my_event5\",\n    entityType = \"my_entity_type5\",\n    entityId = \"my_entity_id5\",\n    targetEntityType = Some(\"my_target_entity_type5\"),\n    targetEntityId = Some(\"my_target_entity_id5\"),\n    properties = DataMap(\n      \"\"\"{\n        \"prop1\" : 1,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [1, 2, 3],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\"],\n        \"prop6\" : 4.56\n      }\"\"\"\n    ),\n    eventTime = DateTime.now\n  )\n  val r6 = Event(\n    event = \"my_event6\",\n    entityType = \"my_entity_type6\",\n    entityId = \"my_entity_id6\",\n    targetEntityType = Some(\"my_target_entity_type6\"),\n    targetEntityId = Some(\"my_target_entity_id6\"),\n    properties = DataMap(\n      \"\"\"{\n        \"prop1\" : 6,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [6, 7, 8],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\"],\n        \"prop6\" : 4.56\n      }\"\"\"\n    ),\n    eventTime = DateTime.now\n  )\n\n  // timezone\n  val tz1 = Event(\n    event = \"my_event\",\n    entityType = \"my_entity_type\",\n    entityId = \"my_entity_id0\",\n    targetEntityType = Some(\"my_target_entity_type\"),\n    targetEntityId = Some(\"my_target_entity_id\"),\n    properties = DataMap(\n      \"\"\"{\n        \"prop1\" : 1,\n        \"prop2\" : \"value2\",\n        \"prop3\" : [1, 2, 3],\n        \"prop4\" : true,\n        \"prop5\" : [\"a\", \"b\", \"c\"],\n        \"prop6\" : 4.56\n      }\"\"\"\n    ),\n    eventTime = new DateTime(12345678, DateTimeZone.forID(\"-08:00\")),\n    prId = Some(\"my_prid\")\n  )\n\n  val tz2 = Event(\n    event = \"my_event\",\n    entityType = \"my_entity_type\",\n    entityId = \"my_entity_id1\",\n    eventTime = new DateTime(12345678, DateTimeZone.forID(\"+02:00\")),\n    prId = Some(\"my_prid\")\n  )\n\n  val tz3 = Event(\n    event = \"my_event\",\n    entityType = \"my_entity_type\",\n    entityId = \"my_entity_id2\",\n    eventTime = new DateTime(12345678, DateTimeZone.forID(\"+08:00\")),\n    prId = Some(\"my_prid\")\n  )\n\n}\n"
  },
  {
    "path": "storage/localfs/.gitignore",
    "content": "/bin/\n"
  },
  {
    "path": "storage/localfs/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport PIOBuild._\n\nname := \"apache-predictionio-data-localfs\"\n\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % version.value % \"provided\",\n  \"org.scalatest\"           %% \"scalatest\"      % \"2.1.7\" % \"test\")\n\nparallelExecution in Test := false\n\npomExtra := childrenPomExtra.value\n\nassemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)\n\n// skip test in assembly\ntest in assembly := {}\n\nassemblyOutputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile /\n  \"assembly\" / \"src\" / \"universal\" / \"lib\" / \"spark\" /\n  s\"pio-data-localfs-assembly-${version.value}.jar\"\n"
  },
  {
    "path": "storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/LocalFSModels.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.localfs\n\nimport java.io.File\nimport java.io.FileNotFoundException\nimport java.io.FileOutputStream\n\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.data.storage.Model\nimport org.apache.predictionio.data.storage.Models\nimport org.apache.predictionio.data.storage.StorageClientConfig\n\nimport scala.io.Source\n\nclass LocalFSModels(f: File, config: StorageClientConfig, prefix: String)\n  extends Models with Logging {\n\n  def insert(i: Model): Unit = {\n    try {\n      val fos = new FileOutputStream(new File(f, s\"${prefix}${i.id}\"))\n      fos.write(i.models)\n      fos.close\n    } catch {\n      case e: FileNotFoundException => error(e.getMessage)\n    }\n  }\n\n  def get(id: String): Option[Model] = {\n    try {\n      Some(Model(\n        id = id,\n        models = Source.fromFile(new File(f, s\"${prefix}${id}\"))(\n          scala.io.Codec.ISO8859).map(_.toByte).toArray))\n    } catch {\n      case e: Throwable =>\n        error(e.getMessage)\n        None\n    }\n  }\n\n  def delete(id: String): Unit = {\n    val m = new File(f, s\"${prefix}${id}\")\n    if (!m.delete) error(s\"Unable to delete ${m.getCanonicalPath}!\")\n  }\n}\n"
  },
  {
    "path": "storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage.localfs\n\nimport java.io.File\n\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.data.storage.BaseStorageClient\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport org.apache.predictionio.data.storage.StorageClientException\n\nclass StorageClient(val config: StorageClientConfig) extends BaseStorageClient\n    with Logging {\n  override val prefix = \"LocalFS\"\n  val f = new File(\n    config.properties.getOrElse(\"PATH\", config.properties(\"HOSTS\")))\n  if (f.exists) {\n    if (!f.isDirectory) throw new StorageClientException(\n      s\"${f} already exists but it is not a directory!\",\n      null)\n    if (!f.canWrite) throw new StorageClientException(\n      s\"${f} already exists but it is not writable!\",\n      null)\n  } else {\n    if (!f.mkdirs) throw new StorageClientException(\n      s\"${f} does not exist and automatic creation failed!\",\n      null)\n  }\n  val client = f\n}\n"
  },
  {
    "path": "storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\n/** Local file system implementation of storage traits, supporting model data only\n  *\n  * @group Implementation\n  */\npackage object localfs {}\n"
  },
  {
    "path": "storage/localfs/src/test/resources/application.conf",
    "content": "org.apache.predictionio.data.storage {\n  sources {\n    mongodb {\n      type = mongodb\n      hosts = [localhost]\n      ports = [27017]\n    }\n    elasticsearch {\n      type = elasticsearch\n      hosts = [localhost]\n      ports = [9300]\n    }\n  }\n  repositories {\n    # This section is dummy just to make storage happy.\n    # The actual testing will not bypass these repository settings completely.\n    # Please refer to StorageTestUtils.scala.\n    settings {\n      name = \"test_predictionio\"\n      source = mongodb\n    }\n\n    appdata {\n      name = \"test_predictionio_appdata\"\n      source = mongodb\n    }\n  }\n}\n"
  },
  {
    "path": "storage/s3/.gitignore",
    "content": "/bin/\n"
  },
  {
    "path": "storage/s3/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport PIOBuild._\n\nname := \"apache-predictionio-data-s3\"\n\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % version.value % \"provided\",\n  \"com.google.guava\"        % \"guava\"                     % \"14.0.1\"      % \"provided\",\n  \"com.amazonaws\"           % \"aws-java-sdk-s3\"           % \"1.11.132\",\n  \"org.scalatest\"           %% \"scalatest\"                % \"2.1.7\" % \"test\")\n\nparallelExecution in Test := false\n\npomExtra := childrenPomExtra.value\n\nassemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)\n\nassemblyShadeRules in assembly := Seq(\n  ShadeRule.rename(\"org.apache.http.**\" -> \"shadeio.data.s3.http.@1\").inAll,\n  ShadeRule.rename(\"com.fasterxml.**\" -> \"shadeio.data.s3.fasterxml.@1\").inAll\n)\n\n// skip test in assembly\ntest in assembly := {}\n\nassemblyOutputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile /\n  \"assembly\" / \"src\" / \"universal\" / \"lib\" / \"spark\" /\n  s\"pio-data-s3-assembly-${version.value}.jar\"\n"
  },
  {
    "path": "storage/s3/src/main/scala/org/apache/predictionio/data/storage/s3/S3Models.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.s3\n\nimport java.io.ByteArrayInputStream\n\nimport org.apache.predictionio.data.storage.Model\nimport org.apache.predictionio.data.storage.Models\nimport org.apache.predictionio.data.storage.StorageClientConfig\n\nimport com.amazonaws.services.s3.AmazonS3\nimport com.amazonaws.services.s3.model.DeleteObjectRequest\nimport com.amazonaws.services.s3.model.GetObjectRequest\nimport com.amazonaws.services.s3.model.ObjectMetadata\nimport com.amazonaws.services.s3.model.PutObjectRequest\nimport com.amazonaws.services.s3.model.S3Object\nimport com.google.common.io.ByteStreams\n\nimport grizzled.slf4j.Logging\n\nclass S3Models(s3Client: AmazonS3, config: StorageClientConfig, prefix: String)\n    extends Models with Logging {\n\n  def insert(i: Model): Unit = {\n    def getModel(bucketName: String, key: String): Option[Model] = {\n      val data = i.models\n      val metadata: ObjectMetadata = new ObjectMetadata()\n      metadata.setContentLength(data.length)\n      val req = new PutObjectRequest(bucketName, key, new ByteArrayInputStream(data), metadata)\n      try {\n        s3Client.putObject(req)\n      } catch {\n        case e: Throwable => error(s\"Failed to insert a model to s3://${bucketName}/${key}\", e)\n      }\n      None\n    }\n    doAction(i.id, getModel)\n  }\n\n  def get(id: String): Option[Model] = {\n    def getModel(bucketName: String, key: String): Option[Model] = {\n      val s3object: S3Object = s3Client.getObject(new GetObjectRequest(\n        bucketName, key));\n      val is = s3object.getObjectContent\n      try {\n        Some(Model(\n          id = id,\n          models = ByteStreams.toByteArray(is)))\n      } catch {\n        case e: Throwable =>\n          error(s\"Failed to get a model from s3://${bucketName}/${key}\", e)\n          None\n      } finally {\n        is.close()\n      }\n    }\n    doAction(id, getModel)\n  }\n\n  def delete(id: String): Unit = {\n    def deleteModel(bucketName: String, key: String): Option[Model] = {\n      try {\n        s3Client.deleteObject(new DeleteObjectRequest(bucketName, key))\n      } catch {\n        case e: Throwable => error(s\"Failed to delete s3://${bucketName}/${key}\", e)\n      }\n      None\n    }\n    doAction(id, deleteModel)\n  }\n\n  def doAction(id: String, action: (String, String) => Option[Model]): Option[Model] = {\n    config.properties.get(\"BUCKET_NAME\") match {\n      case Some(bucketName) =>\n        val key = config.properties.get(\"BASE_PATH\") match {\n          case Some(basePath) => s\"${basePath}/${prefix}${id}\"\n          case None => s\"${prefix}${id}\"\n        }\n        action(bucketName, key)\n      case None =>\n        error(\"S3 bucket is empty.\")\n        None\n    }\n  }\n\n}\n"
  },
  {
    "path": "storage/s3/src/main/scala/org/apache/predictionio/data/storage/s3/StorageClient.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.data.storage.s3\n\nimport com.amazonaws.auth.DefaultAWSCredentialsProviderChain\nimport org.apache.predictionio.data.storage.BaseStorageClient\nimport org.apache.predictionio.data.storage.StorageClientConfig\nimport com.amazonaws.client.builder.AwsClientBuilder\nimport com.amazonaws.services.s3.AmazonS3\nimport com.amazonaws.services.s3.AmazonS3ClientBuilder\nimport grizzled.slf4j.Logging\n\nclass StorageClient(val config: StorageClientConfig) extends BaseStorageClient\n    with Logging {\n  override val prefix = \"S3\"\n  val client: AmazonS3 = {\n    val builder = AmazonS3ClientBuilder\n                    .standard()\n                    .withCredentials(DefaultAWSCredentialsProviderChain.getInstance())\n    (config.properties.get(\"ENDPOINT\"), config.properties.get(\"REGION\")) match {\n      case (Some(endpoint), Some(region)) =>\n        builder.withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(endpoint, region))\n      case (None, Some(region)) => builder.withRegion(region)\n      case _ =>\n    }\n    config.properties.get(\"DISABLE_CHUNKED_ENCODING\") match {\n      case Some(x) if x.equalsIgnoreCase(\"true\") => builder.disableChunkedEncoding()\n      case _ =>\n    }\n    builder.build()\n  }\n}\n"
  },
  {
    "path": "storage/s3/src/main/scala/org/apache/predictionio/data/storage/s3/package.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.data.storage\n\n/** AWS S3 implementation of storage traits, supporting model data only\n  *\n  * @group Implementation\n  */\npackage object s3 {}\n"
  },
  {
    "path": "tests/.rat-excludes",
    "content": "RELEASE\nKEYS\nspark-env.sh\n.gitignore\n.gitattributes\n.npmignore\n.rat-excludes\n.project\nsbt-launch-lib.bash\nplugins.sbt\nbuild.properties\neventserver.pid\napplication.conf\nassembly.sbt\npio-build.sbt\npio.sbt\nunidoc.sbt\nspark-defaults.conf\nGemfile.lock\ntemplates.yaml\nsemver.sh\npgpass\n\nPredictionIO-.*/*\ntarget/*\n/source\ntest-reports/*\ndist/*\nvendors/*\n.logs/*\nsbt/*\n\n.*slim\n.*eps\n.*txt\n.*svg\n.*jks\n.*json\n.*log\n.*template\n.*js\n.*css\n.*map\n.*data\n.*csv\n.*Driver\n.*rst\n"
  },
  {
    "path": "tests/Dockerfile",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nFROM predictionio/pio\n\nARG SPARK_ARCHIVE\nARG SPARK_DIR\nARG PGSQL_JAR\nARG PIO_SCALA_VERSION\nARG PIO_SPARK_VERSION\nARG PIO_HADOOP_VERSION\nARG PIO_ELASTICSEARCH_VERSION\n\nENV PIO_SCALA_VERSION=$PIO_SCALA_VERSION\nENV PIO_SPARK_VERSION=$PIO_SPARK_VERSION\nENV PIO_HADOOP_VERSION=$PIO_HADOOP_VERSION\nENV PIO_ELASTICSEARCH_VERSION=$PIO_ELASTICSEARCH_VERSION\n\nENV PGSQL_JAR=$PGSQL_JAR\n\n# WORKAROUND: es-hadoop stops on RDD#take(1)\nADD docker-files/${SPARK_ARCHIVE} /vendors\nRUN echo \"spark.locality.wait.node 0s\" > /vendors/${SPARK_DIR}/conf/spark-defaults.conf\nENV SPARK_HOME /vendors/${SPARK_DIR}\n\nCOPY docker-files/${PGSQL_JAR} /drivers/${PGSQL_JAR}\nCOPY docker-files/init.sh init.sh\nCOPY docker-files/env-conf/hbase-site.xml ${PIO_HOME}/conf/hbase-site.xml\nCOPY docker-files/env-conf/pio-env.sh ${PIO_HOME}/conf/pio-env.sh\nCOPY docker-files/pgpass /root/.pgpass\nCOPY docker-files/awscredentials /root/.aws/credentials\nRUN chmod 600 /root/.pgpass\n\n# Python\nRUN pip install python-dateutil\nRUN pip install pytz\nRUN pip install awscli\n\n# Default repositories setup\nENV PIO_STORAGE_REPOSITORIES_METADATA_SOURCE PGSQL\nENV PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE PGSQL\nENV PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE PGSQL\n\n# JVM settings\nENV JVM_OPTS '-Dfile.encoding=UTF8 -Xms2048M -Xmx2048M -Xss8M -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=256M'\n\n# Expose relevant ports\n# pio engine\nEXPOSE 8000\n# eventserver\nEXPOSE 7070\n\nENV SLEEP_TIME 30\n\nENTRYPOINT [\"/init.sh\"]\nCMD 'bash'\n"
  },
  {
    "path": "tests/Dockerfile.base",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# WARNING: THIS DOCKERFILE IS NOT INTENDED FOR PRODUCTION USE OR DEPLOYMENT. AT\n#          THIS POINT, THIS IS ONLY INTENDED FOR USE IN AUTOMATED TESTS. IF YOU\n#          ARE LOOKING TO DEPLOY PREDICTIONIO WITH DOCKER, PLEASE REFER TO\n#          http://predictionio.apache.org/community/projects/#docker-installation-for-predictionio\n\n# Tests do not like the musl libc :(, and we need Python 3.5\nFROM ubuntu:xenial\n\n# Install OpenJDK 8 and Python 3.5\nRUN apt-get update && apt-get install -y \\\n    openjdk-8-jdk \\\n    wget curl \\\n    python-pip \\\n    python3-pip \\\n    postgresql-client \\\n    openssh-client openssh-server \\\n    git\n\nRUN pip install predictionio && pip3 install --upgrade \\\n    pip \\\n    xmlrunner \\\n    requests \\\n    urllib3\n\nENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/jre\n"
  },
  {
    "path": "tests/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n# Testing PredictionIO\n\nIntention of this subdirectory is to amass different types of tests other than unit-tests and also to make developers life easier giving them with means to check the application deterministically for different configurations.\nMoreover, it provides testing scenarios for **TravisCI** to be run on pull requests and commits.\n\n\n## Integration Tests\nThese tests are mostly user-functionality tests. They check logic and reliability of the system.\nIn order to get familiar with their structure, please see [README](pio_tests/README.md).\n\n## Docker image\nAfter introducing some changes, a developer would like to try them against different configurations, namely to see if everything works as expected e.g. when you change the data repository for the events or meta-data.\nA good way to that is to use the docker image with installed and running dependencies.\n\nTo download the image run:\n```\n$ docker pull predictionio/pio-testing\n```\n\nTo build the image use the script:\n```\n$ tests/docker-build.sh <image_name>\n```\nThis is necessary to infer proper versions of dependencies e.g. Spark to be included in the image.\n\nThe most convenient way to make use of it is to execute ***run_docker.sh*** script passing it the configuration, the path to PredictionIO's repository with archived snapshot and the command to run. When no command is provided it opens a bash shell inside the docker image. Example of usage:\n```sh\n$ ./run_docker.sh ELASTICSEARCH HBASE LOCALFS \\\n    ~/projects/predictionio \"echo 'All tests passed...'\"\n```\n\nDirectory structure inside the image:\n* ***/PredictionIO*** - extracted snapshot (***/PredictionIO/bin*** is also already added to PATH)\n* ***/pio_host*** - mounted path to repository\n* ***/tests/pio_tests*** - copy of integration tests\n* ***/vendors*** - directory with installed services\n* ***/drivers*** - jars with database drivers\n"
  },
  {
    "path": "tests/after_script.travis.sh",
    "content": "#!/bin/bash\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nDIR=\"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )\" && pwd )\"\n\nsource $DIR/../conf/pio-vendors.sh\n\n# Print a summary of containers used\ndocker ps -a\n\n# Clean up used containers\ndocker-compose -f $DIR/docker-compose.yml down\n"
  },
  {
    "path": "tests/before_script.travis.sh",
    "content": "#!/bin/bash\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nDIR=\"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )\" && pwd )\"\n\n$DIR/build_docker.sh\n"
  },
  {
    "path": "tests/build_docker.sh",
    "content": "#!/usr/bin/env bash\n\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nDIR=\"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )\" && pwd )\"\n\ndocker pull predictionio/pio-testing-base\n\npushd $DIR/..\n\nsource conf/pio-vendors.sh\nif [ ! -f $DIR/docker-files/${PGSQL_JAR} ]; then\n  wget $PGSQL_DOWNLOAD\n  mv ${PGSQL_JAR} $DIR/docker-files/\nfi\nif [ ! -f $DIR/docker-files/${SPARK_ARCHIVE} ]; then\n  curl -fLo $SPARK_ARCHIVE $SPARK_DOWNLOAD_MIRROR\n  if [[ $? -ne 0 ]]; then\n    curl -fLo $SPARK_ARCHIVE $SPARK_DOWNLOAD_ARCHIVE\n  fi\n  mv $SPARK_ARCHIVE $DIR/docker-files/\nfi\n\nset -e\n\n./make-distribution.sh \\\n    -Dscala.version=$PIO_SCALA_VERSION \\\n    -Dspark.version=$PIO_SPARK_VERSION \\\n    -Dhadoop.version=$PIO_HADOOP_VERSION \\\n    -Delasticsearch.version=$PIO_ELASTICSEARCH_VERSION \\\n    -Dhbase.version=$PIO_HBASE_VERSION\nsbt/sbt clean storage/clean\n\nassembly_folder=assembly/src/universal/lib\nrm -rf ${assembly_folder}/*.jar\nrm -rf ${assembly_folder}/spark\nmkdir -p ${assembly_folder}/spark\n\ncp dist/lib/*.jar ${assembly_folder}\ncp dist/lib/spark/*.jar ${assembly_folder}/spark\nrm *.tar.gz\ndocker build -t predictionio/pio .\npopd\n\ndocker build -t predictionio/pio-testing $DIR \\\n  --build-arg SPARK_ARCHIVE=$SPARK_ARCHIVE \\\n  --build-arg SPARK_DIR=$SPARK_DIR \\\n  --build-arg PGSQL_JAR=$PGSQL_JAR \\\n  --build-arg PIO_SCALA_VERSION=$PIO_SCALA_VERSION \\\n  --build-arg PIO_SPARK_VERSION=$PIO_SPARK_VERSION \\\n  --build-arg PIO_HADOOP_VERSION=$PIO_HADOOP_VERSION \\\n  --build-arg PIO_ELASTICSEARCH_VERSION=$PIO_ELASTICSEARCH_VERSION\n"
  },
  {
    "path": "tests/check_libraries.sh",
    "content": "#!/usr/bin/env bash\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# Go to PredictionIO directory\nFWDIR=\"$(cd \"`dirname \"$0\"`\"/..; pwd)\"\nmkdir -p ${FWDIR}/lib\ncd ${FWDIR}\n\nREPORT_DIR=\"${FWDIR}/test-reports\"\nGATHERED_FILE=\"${REPORT_DIR}/licences-gathered.csv\"\nFILTERED_FILE=\"${REPORT_DIR}/licences-filtered.csv\"\nERROR_FILE=\"${REPORT_DIR}/licences-errors.csv\"\n\n# Extract libraries which are not described in LICENSE.txt\necho \"Check library dependencies...\"\n\n# Generate license report\nsbt/sbt clean\nsbt/sbt dumpLicenseReport\n\nsbt/sbt storage/clean\nsbt/sbt storage/dumpLicenseReport\n\n# Clean up\nmkdir -p ${REPORT_DIR}\n\nrm -f ${GATHERED_FILE}\nrm -f ${FILTERED_FILE}\nrm -f ${ERROR_FILE}\n\n# Gather and filter reports\nfind . -name \"*-licenses.csv\" -exec cat {} >> ${GATHERED_FILE} \\;\ncat ${GATHERED_FILE} | sort | uniq | grep -v \"Category,License,Dependency,Notes\" | \\\n  grep -v \"Apache\" | \\\n  grep -v \"ASL\" | \\\n  grep -v \"org.apache\" | \\\n  grep -v \"commons-\" | \\\n  grep -v \"tomcat\" | \\\n  grep -v \"org.codehaus.jettison\" | \\\n  grep -v \"xml-apis\" | \\\n  grep -v \"org.mortbay.jetty\" | \\\n  grep -v \"com.google.guava\" | \\\n  grep -v \"predictionio\" > ${FILTERED_FILE}\n\n# Check undocumented\ncat ${FILTERED_FILE} | while read LINE\ndo\n  LIBRARY=`echo ${LINE} | cut -d ',' -f 3`\n  grep -q \"$LIBRARY\" \"${FWDIR}/LICENSE.txt\"\n  if [ $? -ne 0 ]; then\n    echo -e \"\\033[0;31m[error]\\033[0;39m Undocumented dependency: $LINE\"\n    echo \"Undocumented dependency: $LINE\" >> ${ERROR_FILE}\n  fi\ndone\n\n# Extract libraries which are described in LICENSE.txt but not exist actually\necho \"Check libraries described in LICENSE.txt...\"\n  \ncat \"${FWDIR}/LICENSE.txt\" | grep \"#\" | sed -e 's/(.*)//' | sed -e '/^#/d' | while read LINE\ndo\n  grep -q \"$LINE\" ${GATHERED_FILE}\n  if [ $? -ne 0 ]; then\n    echo -e \"\\033[0;31m[error]\\033[0;39m Can't find: $LINE\"\n    echo \"Unused dependency: $LINE\" >> ${ERROR_FILE}\n  fi\ndone\n\nif [ -f ${ERROR_FILE} ]; then\n  echo \"Library checks failed.\"\n  exit 1\nelse \n  echo \"Library checks passed.\"\n  exit 0\nfi  \n"
  },
  {
    "path": "tests/check_license.sh",
    "content": "#!/usr/bin/env bash\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# Go to PredictionIO directory\nFWDIR=\"$(cd \"`dirname \"$0\"`\"/..; pwd)\"\nmkdir -p ${FWDIR}/lib\ncd ${FWDIR}/lib\n\n# Download RAT jar in lib/\nRAT_VERSION=0.11\nRAT_JAR=\"${FWDIR}/lib/apache-rat-${RAT_VERSION}.jar\"\nURL=\"http://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar\"\nif [ ! -f \"$RAT_JAR\" ]; then\n  if [ $(command -v curl) ]; then\n    curl -OL --silent \"${URL}\"\n  elif [ $(command -v wget) ]; then\n    wget --quiet ${URL}\n  fi\nfi\nif [ ! -f \"$RAT_JAR\" ]; then\n  echo \"${RAT_JAR} download failed. Please install rat manually.\\n\"\n  exit 1\nfi\n\n# Run RAT testing\nTEST_DIR=\"${FWDIR}/tests\"\nREPORT_DIR=\"${FWDIR}/test-reports\"\nmkdir -p ${REPORT_DIR}\njava -jar ${RAT_JAR} -E ${TEST_DIR}/.rat-excludes -d ${FWDIR} > ${REPORT_DIR}/rat-results.txt\nif [ $? -ne 0 ]; then\n echo \"RAT exited abnormally\"\n exit 1\nfi\n\n# Print results\nERRORS=\"$(cat ${REPORT_DIR}/rat-results.txt | grep -e \"??\")\"\nif test ! -z \"$ERRORS\"; then\n  echo \"Could not find Apache license headers in the following files:\"\n  echo \"$ERRORS\"\n  exit 1\nelse\n  echo -e \"RAT checks passed.\"\nfi\n"
  },
  {
    "path": "tests/docker-compose.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\nversion: \"2\"\nservices:\n  elasticsearch:\n    image: ${ES_IMAGE}:${ES_TAG}\n    environment:\n      - xpack.security.enabled=false\n      - \"ES_JAVA_OPTS=-Xms512m -Xmx512m\"\n  hbase:\n    image: harisekhon/hbase:${HBASE_TAG}\n  postgres:\n    image: postgres:9\n    environment:\n      POSTGRES_USER: pio\n      POSTGRES_PASSWORD: pio\n      POSTGRES_INITDB_ARGS: --encoding=UTF8\n  localstack:\n    image: atlassianlabs/localstack\n    environment:\n      - SERVICES=s3\n      - DEBUG=1\n  pio-testing:\n    image: predictionio/pio-testing:latest\n    depends_on:\n      - elasticsearch\n      - hbase\n      - postgres\n      - localstack\n    volumes:\n      - ~/.ivy2:/root/.ivy2\n      - ~/.sbt:/root/.sbt\n"
  },
  {
    "path": "tests/docker-files/awscredentials",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n[default]\naws_access_key_id = foo\naws_secret_access_key = foo\n"
  },
  {
    "path": "tests/docker-files/env-conf/hbase-site.xml",
    "content": "<?xml version=\"1.0\"?>\n<?xml-stylesheet type=\"text/xsl\" href=\"configuration.xsl\"?>\n<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n   http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n<configuration>\n  <property>\n    <name>hbase.zookeeper.quorum</name>\n    <value>hbase</value>\n  </property>\n  <!-- https://issues.apache.org/jira/browse/SPARK-21549 -->\n  <property>\n    <name>mapreduce.output.fileoutputformat.outputdir</name>\n    <value>/tmp</value>\n  </property>\n</configuration>\n"
  },
  {
    "path": "tests/docker-files/env-conf/pio-env.sh",
    "content": "#!/usr/bin/env bash\n#\n# Copy this file as pio-env.sh and edit it for your site's configuration.\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# PredictionIO Main Configuration\n#\n# This section controls core behavior of PredictionIO. It is very likely that\n# you need to change these to fit your site.\n\n# SPARK_HOME: Apache Spark is a hard dependency and must be configured.\n# SPARK_HOME=$SPARK_HOME\n\nPOSTGRES_JDBC_DRIVER=/drivers/$PGSQL_JAR\nMYSQL_JDBC_DRIVER=\n\n# ES_CONF_DIR: You must configure this if you have advanced configuration for\n#              your Elasticsearch setup.\n# ES_CONF_DIR=/opt/elasticsearch\n\n# HADOOP_CONF_DIR: You must configure this if you intend to run PredictionIO\n#                  with Hadoop 2.\n# HADOOP_CONF_DIR=/opt/hadoop\n\n# HBASE_CONF_DIR: You must configure this if you intend to run PredictionIO\n#                 with HBase on a remote cluster.\nHBASE_CONF_DIR=$PIO_HOME/conf\n\n# Filesystem paths where PredictionIO uses as block storage.\nPIO_FS_BASEDIR=$HOME/.pio_store\nPIO_FS_ENGINESDIR=$PIO_FS_BASEDIR/engines\nPIO_FS_TMPDIR=$PIO_FS_BASEDIR/tmp\n\n# PredictionIO Storage Configuration\n#\n# This section controls programs that make use of PredictionIO's built-in\n# storage facilities. Default values are shown below.\n#\n# For more information on storage configuration please refer to\n# https://predictionio.apache.org/system/anotherdatastore/\n\n# Storage Repositories\n\n# Default is to use PostgreSQL\nPIO_STORAGE_REPOSITORIES_METADATA_NAME=pio_meta\nPIO_STORAGE_REPOSITORIES_METADATA_SOURCE=$PIO_STORAGE_REPOSITORIES_METADATA_SOURCE\n\nPIO_STORAGE_REPOSITORIES_EVENTDATA_NAME=pio_event\nPIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=$PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE\n\nPIO_STORAGE_REPOSITORIES_MODELDATA_NAME=pio_model\nPIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=$PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE\n\n# Storage Data Sources\n\n# PostgreSQL Default Settings\n# Please change \"pio\" to your database name in PIO_STORAGE_SOURCES_PGSQL_URL\n# Please change PIO_STORAGE_SOURCES_PGSQL_USERNAME and\n# PIO_STORAGE_SOURCES_PGSQL_PASSWORD accordingly\nPIO_STORAGE_SOURCES_PGSQL_TYPE=jdbc\nPIO_STORAGE_SOURCES_PGSQL_URL=jdbc:postgresql://postgres/pio\nPIO_STORAGE_SOURCES_PGSQL_USERNAME=pio\nPIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio\n\n# MySQL Example\n# PIO_STORAGE_SOURCES_MYSQL_TYPE=jdbc\n# PIO_STORAGE_SOURCES_MYSQL_URL=jdbc:mysql://localhost/pio\n# PIO_STORAGE_SOURCES_MYSQL_USERNAME=pio\n# PIO_STORAGE_SOURCES_MYSQL_PASSWORD=pio\n\n# Elasticsearch Example\nPIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch\n#PIO_STORAGE_SOURCES_ELASTICSEARCH_CLUSTERNAME=pio\nPIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=elasticsearch\nPIO_STORAGE_SOURCES_ELASTICSEARCH_SCHEMES=http\nPIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200\n#PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$ELASTICSEARCH_HOME\n\n# Local File System Example\nPIO_STORAGE_SOURCES_LOCALFS_TYPE=localfs\nPIO_STORAGE_SOURCES_LOCALFS_PATH=$PIO_FS_BASEDIR/local_models\n\n# HBase Example\nPIO_STORAGE_SOURCES_HBASE_TYPE=hbase\n#PIO_STORAGE_SOURCES_HBASE_HOME=$HBASE_HOME\n\n# HDFS config\nPIO_STORAGE_SOURCES_HDFS_TYPE=hdfs\nPIO_STORAGE_SOURCES_HDFS_PATH=/hdfs_models\n\n# AWS S3 Example\nPIO_STORAGE_SOURCES_S3_TYPE=s3\nPIO_STORAGE_SOURCES_S3_ENDPOINT=http://localstack:4572\nPIO_STORAGE_SOURCES_S3_REGION=us-east-1\nPIO_STORAGE_SOURCES_S3_BUCKET_NAME=pio_bucket\nPIO_STORAGE_SOURCES_S3_BASE_PATH=pio_model\nPIO_STORAGE_SOURCES_S3_DISABLE_CHUNKED_ENCODING=true\n"
  },
  {
    "path": "tests/docker-files/init.sh",
    "content": "#!/bin/bash\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nset -e\nexport PYTHONPATH=$PIO_HOME/tests:$PYTHONPATH\necho \"Sleeping $SLEEP_TIME seconds for all services to be ready...\"\nsleep $SLEEP_TIME\n\n# create S3 bucket in localstack\naws --endpoint-url=http://localstack:4572 --region=us-east-1 s3 mb s3://pio_bucket\n\neval $@\n"
  },
  {
    "path": "tests/docker-files/pgpass",
    "content": "postgres:5432:pio:pio:pio\n"
  },
  {
    "path": "tests/docker-files/set_build_profile.sh",
    "content": "#!/bin/bash -\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\n# Sets version of profile dependencies from sbt configuration.\n# eg. Run `source ./set_build_profile.sh scala-2.11`\n\nset -e\n\nif [[ \"$#\" -ne 1 ]]; then\n  echo \"Usage: set-build-profile.sh <build-profile>\"\n  exit 1\nfi\n\nset -a\neval `$PIO_HOME/sbt/sbt --error 'set showSuccess := false' -Dbuild.profile=$1 printProfile | grep '.*_VERSION=.*'`\nset +a\n"
  },
  {
    "path": "tests/pio_tests/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n# PredictionIO - Integration Tests\n\nThis python module introduces a basic framework for adding integration tests to\nPredictionIO. It is nothing more than a collection of utility functions mostly being wrappers\nover shell executed commands.\n\n### Prerequisites\nIn order to execute tests, besides a configured **PredictionIO** environment one\nhas to download the following python-3 packages:\n* requests\n* unittest\n* xmlrunner\n\n### Execution\n*tests.py* - the executable script. Launches eventserver to be available for the tests.\nYou can pass it arguments to:\n* suppress the output of executed shell commands within the tests\n* enable logging\n* specify which tests should be executed (by names)\n\nFor more information run:\n```shell\npython3 tests.py -h\n```\n\nAs soon as the tests are finished an XML file with JUnit-like test reports\nis created in the directory of execution.\n\n### Adding new tests\nEvery test should be an instance of **pio_tests.integration.BaseTestCase** defined in **pio_tests.integration**.  \nUpon creation, a **pio_tests.integration.TestContext**  object is provided to it with description of:\n* ip address and a port of running eventserver\n* directories containing stored engines and data for specific tests\n\nEvery test should be registered in the appropriate place in *tests.py* file, whereas\nits definition should reside in **pio_tests.scenarios** module. If the test requires some additional files\nduring the execution, you should put them under *data* directory mentioned above.\n\nThe best way to test different application engines is to make use of **pio_tests.utility.AppEngine**.\nApart from containing utility functions, it downloads engine templates if necessary.\n\nTo see an example of implemented test check **pio_tests.scenarios.quickstart_test**, which is\na repetition of the QuickStart tutorial from the doc site.\n"
  },
  {
    "path": "tests/pio_tests/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#"
  },
  {
    "path": "tests/pio_tests/data/eventserver_test/partially_malformed_events.json",
    "content": "[\n  { \n    \"event\" : \"test\",\n    \"entityType\" : \"test\",\n    \"entityId\" : \"t2\"\n  },\n  {\n    \"event\" : \"malformed-event\" \n  }\n]"
  },
  {
    "path": "tests/pio_tests/data/eventserver_test/rate_events_25.json",
    "content": "[\n  {\n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"1\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"1\",\n    \"properties\" : {\n      \"rating\" : 5\n    },\n    \"eventTime\" : \"2014-11-01T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"1\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"2\",\n    \"properties\" : {\n      \"rating\" : 3\n    },\n    \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"1\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"3\",\n    \"properties\" : {\n      \"rating\" : 1\n    },\n    \"eventTime\" : \"2014-11-03T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"1\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"4\",\n    \"properties\" : {\n      \"rating\" : 5\n    },\n    \"eventTime\" : \"2014-11-04T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"1\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"5\",\n    \"properties\" : {\n      \"rating\" : 3\n    },\n    \"eventTime\" : \"2014-11-05T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"2\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"1\",\n    \"properties\" : {\n      \"rating\" : 1\n    },\n    \"eventTime\" : \"2014-11-01T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"2\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"2\",\n    \"properties\" : {\n      \"rating\" : 5\n    },\n    \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"2\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"3\",\n    \"properties\" : {\n      \"rating\" : 3\n    },\n    \"eventTime\" : \"2014-11-03T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"2\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"4\",\n    \"properties\" : {\n      \"rating\" : 3\n    },\n    \"eventTime\" : \"2014-11-04T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"2\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"5\",\n    \"properties\" : {\n      \"rating\" : 4\n    },\n    \"eventTime\" : \"2014-11-05T09:39:45.618-08:00\" \n  },\n  {\n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"3\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"1\",\n    \"properties\" : {\n      \"rating\" : 5\n    },\n    \"eventTime\" : \"2014-11-01T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"3\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"2\",\n    \"properties\" : {\n      \"rating\" : 2\n    },\n    \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"3\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"3\",\n    \"properties\" : {\n      \"rating\" : 1\n    },\n    \"eventTime\" : \"2014-11-03T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"3\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"4\",\n    \"properties\" : {\n      \"rating\" : 5\n    },\n    \"eventTime\" : \"2014-11-04T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"3\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"5\",\n    \"properties\" : {\n      \"rating\" : 3\n    },\n    \"eventTime\" : \"2014-11-05T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"4\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"1\",\n    \"properties\" : {\n      \"rating\" : 3\n    },\n    \"eventTime\" : \"2014-11-01T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"4\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"2\",\n    \"properties\" : {\n      \"rating\" : 5\n    },\n    \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"4\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"3\",\n    \"properties\" : {\n      \"rating\" : 4\n    },\n    \"eventTime\" : \"2014-11-03T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"4\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"4\",\n    \"properties\" : {\n      \"rating\" : 2\n    },\n    \"eventTime\" : \"2014-11-04T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"4\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"5\",\n    \"properties\" : {\n      \"rating\" : 4\n    },\n    \"eventTime\" : \"2014-11-05T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"5\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"1\",\n    \"properties\" : {\n      \"rating\" : 2\n    },\n    \"eventTime\" : \"2014-11-01T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"5\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"2\",\n    \"properties\" : {\n      \"rating\" : 5\n    },\n    \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"5\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"3\",\n    \"properties\" : {\n      \"rating\" : 3\n    },\n    \"eventTime\" : \"2014-11-03T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"5\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"4\",\n    \"properties\" : {\n      \"rating\" : 5\n    },\n    \"eventTime\" : \"2014-11-04T09:39:45.618-08:00\" \n  },\n  { \n    \"event\" : \"rate\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"5\",\n    \"targetEntityType\" : \"item\",\n    \"targetEntityId\" : \"5\",\n    \"properties\" : {\n      \"rating\" : 4\n    },\n    \"eventTime\" : \"2014-11-05T09:39:45.618-08:00\" \n  }\n]\n\n"
  },
  {
    "path": "tests/pio_tests/data/eventserver_test/signup_events_51.json",
    "content": "[\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"1\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"2\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"3\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"4\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"5\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"6\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"7\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"8\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"9\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"10\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"11\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"12\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"13\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"14\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"15\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"16\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"17\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"18\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"19\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"20\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"21\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"22\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"23\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"24\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"25\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"26\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"27\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"28\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"29\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"30\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"31\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"32\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"33\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"34\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"35\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"36\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"37\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"38\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"39\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"40\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"41\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"42\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"43\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"44\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"45\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"46\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"47\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"48\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"49\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"50\"\n  },\n  { \n    \"event\" : \"sign-up\",\n    \"entityType\" : \"user\",\n    \"entityId\" : \"51\"\n  }\n]"
  },
  {
    "path": "tests/pio_tests/data/quickstart_test/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.template.recommendation.RecommendationEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyRecommender\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\": 10,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "tests/pio_tests/engines/recommendation-engine/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n# Recommendation Template\n\n## Documentation\n\nPlease refer to http://predictionio.apache.org/templates/recommendation/quickstart/\n\n## Versions\n\n### develop\n\n### v0.3.2\n\n- Fix incorrect top items in batchPredict() (issue #5)\n\n### v0.3.1\n\n- Add Evaluation module and modify DataSource for it\n\n### v0.3.0\n\n- update for PredictionIO 0.9.2, including:\n\n  - use new PEventStore API\n  - use appName in DataSource parameter\n\n### v0.2.0\n\n- update build.sbt and template.json for PredictionIO 0.9.2\n\n### v0.1.2\n\n- update for PredictionIO 0.9.0\n\n### v0.1.1\n\n- Persist RDD to memory (.cache()) in DataSource for better performance and quick fix for new user/item ID BiMap error issue.\n\n### v0.1.0\n\n- initial version\n- known issue:\n  * If importing new events of new users/itesm during training, the new user/item id can't be found in the BiMap.\n"
  },
  {
    "path": "tests/pio_tests/engines/recommendation-engine/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nscalaVersion in ThisBuild := sys.env.getOrElse(\"PIO_SCALA_VERSION\", \"2.11.12\")\n\nname := \"template-scala-parallel-recommendation\"\n\norganization := \"org.apache.predictionio\"\n\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\" %% \"apache-predictionio-core\" % \"0.15.0-SNAPSHOT\" % \"provided\",\n  \"org.apache.spark\"        %% \"spark-core\"    % sys.env.getOrElse(\"PIO_SPARK_VERSION\", \"2.1.3\") % \"provided\",\n  \"org.apache.spark\"        %% \"spark-mllib\"   % sys.env.getOrElse(\"PIO_SPARK_VERSION\", \"2.1.3\") % \"provided\")\n"
  },
  {
    "path": "tests/pio_tests/engines/recommendation-engine/engine.json",
    "content": "{\n  \"id\": \"default\",\n  \"description\": \"Default settings\",\n  \"engineFactory\": \"org.template.recommendation.RecommendationEngine\",\n  \"datasource\": {\n    \"params\" : {\n      \"appName\": \"MyApp1\"\n    }\n  },\n  \"algorithms\": [\n    {\n      \"name\": \"als\",\n      \"params\": {\n        \"rank\": 10,\n        \"numIterations\": 10,\n        \"lambda\": 0.01,\n        \"seed\": 3\n      }\n    }\n  ]\n}\n"
  },
  {
    "path": "tests/pio_tests/engines/recommendation-engine/project/assembly.sbt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.14.9\")\n"
  },
  {
    "path": "tests/pio_tests/engines/recommendation-engine/src/main/scala/ALSAlgorithm.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.template.recommendation\n\nimport org.apache.predictionio.controller.PAlgorithm\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\nimport org.apache.spark.mllib.recommendation.ALS\nimport org.apache.spark.mllib.recommendation.{Rating => MLlibRating}\nimport org.apache.spark.mllib.recommendation.ALSModel\n\nimport grizzled.slf4j.Logger\n\ncase class ALSAlgorithmParams(\n  rank: Int,\n  numIterations: Int,\n  lambda: Double,\n  seed: Option[Long]) extends Params\n\nclass ALSAlgorithm(val ap: ALSAlgorithmParams)\n  extends PAlgorithm[PreparedData, ALSModel, Query, PredictedResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  if (ap.numIterations > 30) {\n    logger.warn(\n      s\"ALSAlgorithmParams.numIterations > 30, current: ${ap.numIterations}. \" +\n      s\"There is a chance of running to StackOverflowException. Lower this number to remedy it\")\n  }\n\n  def train(sc: SparkContext, data: PreparedData): ALSModel = {\n    // MLLib ALS cannot handle empty training data.\n    require(!data.ratings.take(1).isEmpty,\n      s\"RDD[Rating] in PreparedData cannot be empty.\" +\n      \" Please check if DataSource generates TrainingData\" +\n      \" and Preprator generates PreparedData correctly.\")\n    // Convert user and item String IDs to Int index for MLlib\n\n    val userStringIntMap = BiMap.stringInt(data.ratings.map(_.user))\n    val itemStringIntMap = BiMap.stringInt(data.ratings.map(_.item))\n    val mllibRatings = data.ratings.map( r =>\n      // MLlibRating requires integer index for user and item\n      MLlibRating(userStringIntMap(r.user), itemStringIntMap(r.item), r.rating)\n    )\n\n    // seed for MLlib ALS\n    val seed = ap.seed.getOrElse(System.nanoTime)\n\n    // If you only have one type of implicit event (Eg. \"view\" event only),\n    // replace ALS.train(...) with\n    //val m = ALS.trainImplicit(\n      //ratings = mllibRatings,\n      //rank = ap.rank,\n      //iterations = ap.numIterations,\n      //lambda = ap.lambda,\n      //blocks = -1,\n      //alpha = 1.0,\n      //seed = seed)\n\n    val m = ALS.train(\n      ratings = mllibRatings,\n      rank = ap.rank,\n      iterations = ap.numIterations,\n      lambda = ap.lambda,\n      blocks = -1,\n      seed = seed)\n\n    new ALSModel(\n      rank = m.rank,\n      userFeatures = m.userFeatures,\n      productFeatures = m.productFeatures,\n      userStringIntMap = userStringIntMap,\n      itemStringIntMap = itemStringIntMap)\n  }\n\n  def predict(model: ALSModel, query: Query): PredictedResult = {\n    // Convert String ID to Int index for Mllib\n    model.userStringIntMap.get(query.user).map { userInt =>\n      // create inverse view of itemStringIntMap\n      val itemIntStringMap = model.itemStringIntMap.inverse\n      // recommendProducts() returns Array[MLlibRating], which uses item Int\n      // index. Convert it to String ID for returning PredictedResult\n      val itemScores = model.recommendProducts(userInt, query.num)\n        .map (r => ItemScore(itemIntStringMap(r.product), r.rating))\n      new PredictedResult(itemScores)\n    }.getOrElse{\n      logger.info(s\"No prediction for unknown user ${query.user}.\")\n      new PredictedResult(Array.empty)\n    }\n  }\n\n  // This function is used by the evaluation module, where a batch of queries is sent to this engine\n  // for evaluation purpose.\n  override def batchPredict(model: ALSModel, queries: RDD[(Long, Query)]): RDD[(Long, PredictedResult)] = {\n    val userIxQueries: RDD[(Int, (Long, Query))] = queries\n    .map { case (ix, query) => {\n      // If user not found, then the index is -1\n      val userIx = model.userStringIntMap.get(query.user).getOrElse(-1)\n      (userIx, (ix, query))\n    }}\n\n    // Cross product of all valid users from the queries and products in the model.\n    val usersProducts: RDD[(Int, Int)] = userIxQueries\n      .keys\n      .filter(_ != -1)\n      .cartesian(model.productFeatures.map(_._1))\n\n    // Call mllib ALS's predict function.\n    val ratings: RDD[MLlibRating] = model.predict(usersProducts)\n\n    // The following code construct predicted results from mllib's ratings.\n    // Not optimal implementation. Instead of groupBy, should use combineByKey with a PriorityQueue\n    val userRatings: RDD[(Int, Iterable[MLlibRating])] = ratings.groupBy(_.user)\n\n    userIxQueries.leftOuterJoin(userRatings)\n    .map {\n      // When there are ratings\n      case (userIx, ((ix, query), Some(ratings))) => {\n        val topItemScores: Array[ItemScore] = ratings\n        .toArray\n        .sortBy(_.rating)(Ordering.Double.reverse) // note: from large to small ordering\n        .take(query.num)\n        .map { rating => ItemScore(\n          model.itemStringIntMap.inverse(rating.product),\n          rating.rating) }\n\n        (ix, PredictedResult(itemScores = topItemScores))\n      }\n      // When user doesn't exist in training data\n      case (userIx, ((ix, query), None)) => {\n        require(userIx == -1)\n        (ix, PredictedResult(itemScores = Array.empty))\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "tests/pio_tests/engines/recommendation-engine/src/main/scala/ALSModel.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.spark.mllib.recommendation\n// This must be the same package as Spark's MatrixFactorizationModel because\n// MatrixFactorizationModel's constructor is private and we are using\n// its constructor in order to save and load the model\n\nimport org.template.recommendation.ALSAlgorithmParams\n\nimport org.apache.predictionio.controller.IPersistentModel\nimport org.apache.predictionio.controller.IPersistentModelLoader\nimport org.apache.predictionio.data.storage.BiMap\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass ALSModel(\n    override val rank: Int,\n    override val userFeatures: RDD[(Int, Array[Double])],\n    override val productFeatures: RDD[(Int, Array[Double])],\n    val userStringIntMap: BiMap[String, Int],\n    val itemStringIntMap: BiMap[String, Int])\n  extends MatrixFactorizationModel(rank, userFeatures, productFeatures)\n  with IPersistentModel[ALSAlgorithmParams] {\n\n  def save(id: String, params: ALSAlgorithmParams,\n    sc: SparkContext): Boolean = {\n\n    sc.parallelize(Seq(rank)).saveAsObjectFile(s\"/tmp/${id}/rank\")\n    userFeatures.saveAsObjectFile(s\"/tmp/${id}/userFeatures\")\n    productFeatures.saveAsObjectFile(s\"/tmp/${id}/productFeatures\")\n    sc.parallelize(Seq(userStringIntMap))\n      .saveAsObjectFile(s\"/tmp/${id}/userStringIntMap\")\n    sc.parallelize(Seq(itemStringIntMap))\n      .saveAsObjectFile(s\"/tmp/${id}/itemStringIntMap\")\n    true\n  }\n\n  override def toString = {\n    s\"userFeatures: [${userFeatures.count()}]\" +\n    s\"(${userFeatures.take(2).toList}...)\" +\n    s\" productFeatures: [${productFeatures.count()}]\" +\n    s\"(${productFeatures.take(2).toList}...)\" +\n    s\" userStringIntMap: [${userStringIntMap.size}]\" +\n    s\"(${userStringIntMap.take(2)}...)\" +\n    s\" itemStringIntMap: [${itemStringIntMap.size}]\" +\n    s\"(${itemStringIntMap.take(2)}...)\"\n  }\n}\n\nobject ALSModel\n  extends IPersistentModelLoader[ALSAlgorithmParams, ALSModel] {\n  def apply(id: String, params: ALSAlgorithmParams,\n    sc: Option[SparkContext]) = {\n    new ALSModel(\n      rank = sc.get.objectFile[Int](s\"/tmp/${id}/rank\").first,\n      userFeatures = sc.get.objectFile(s\"/tmp/${id}/userFeatures\"),\n      productFeatures = sc.get.objectFile(s\"/tmp/${id}/productFeatures\"),\n      userStringIntMap = sc.get\n        .objectFile[BiMap[String, Int]](s\"/tmp/${id}/userStringIntMap\").first,\n      itemStringIntMap = sc.get\n        .objectFile[BiMap[String, Int]](s\"/tmp/${id}/itemStringIntMap\").first)\n  }\n}\n"
  },
  {
    "path": "tests/pio_tests/engines/recommendation-engine/src/main/scala/DataSource.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.template.recommendation\n\nimport org.apache.predictionio.controller.PDataSource\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EmptyActualResult\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.store.PEventStore\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nimport grizzled.slf4j.Logger\n\ncase class DataSourceEvalParams(kFold: Int, queryNum: Int)\n\ncase class DataSourceParams(\n  appName: String,\n  evalParams: Option[DataSourceEvalParams]) extends Params\n\nclass DataSource(val dsp: DataSourceParams)\n  extends PDataSource[TrainingData,\n      EmptyEvaluationInfo, Query, ActualResult] {\n\n  @transient lazy val logger = Logger[this.type]\n\n  def getRatings(sc: SparkContext): RDD[Rating] = {\n\n    val eventsRDD: RDD[Event] = PEventStore.find(\n      appName = dsp.appName,\n      entityType = Some(\"user\"),\n      eventNames = Some(List(\"rate\", \"buy\")), // read \"rate\" and \"buy\" event\n      // targetEntityType is optional field of an event.\n      targetEntityType = Some(Some(\"item\")))(sc)\n\n    val ratingsRDD: RDD[Rating] = eventsRDD.map { event =>\n      val rating = try {\n        val ratingValue: Double = event.event match {\n          case \"rate\" => event.properties.get[Double](\"rating\")\n          case \"buy\" => 4.0 // map buy event to rating value of 4\n          case _ => throw new Exception(s\"Unexpected event ${event} is read.\")\n        }\n        // entityId and targetEntityId is String\n        Rating(event.entityId,\n          event.targetEntityId.get,\n          ratingValue)\n      } catch {\n        case e: Exception => {\n          logger.error(s\"Cannot convert ${event} to Rating. Exception: ${e}.\")\n          throw e\n        }\n      }\n      rating\n    }.cache()\n\n    ratingsRDD\n  }\n\n  override\n  def readTraining(sc: SparkContext): TrainingData = {\n    new TrainingData(getRatings(sc))\n  }\n\n  override\n  def readEval(sc: SparkContext)\n  : Seq[(TrainingData, EmptyEvaluationInfo, RDD[(Query, ActualResult)])] = {\n    require(!dsp.evalParams.isEmpty, \"Must specify evalParams\")\n    val evalParams = dsp.evalParams.get\n\n    val kFold = evalParams.kFold\n    val ratings: RDD[(Rating, Long)] = getRatings(sc).zipWithUniqueId\n    ratings.cache\n\n    (0 until kFold).map { idx => {\n      val trainingRatings = ratings.filter(_._2 % kFold != idx).map(_._1)\n      val testingRatings = ratings.filter(_._2 % kFold == idx).map(_._1)\n\n      val testingUsers: RDD[(String, Iterable[Rating])] = testingRatings.groupBy(_.user)\n\n      (new TrainingData(trainingRatings),\n        new EmptyEvaluationInfo(),\n        testingUsers.map {\n          case (user, ratings) => (Query(user, evalParams.queryNum), ActualResult(ratings.toArray))\n        }\n      )\n    }}\n  }\n}\n\ncase class Rating(\n  user: String,\n  item: String,\n  rating: Double\n)\n\nclass TrainingData(\n  val ratings: RDD[Rating]\n) extends Serializable {\n  override def toString = {\n    s\"ratings: [${ratings.count()}] (${ratings.take(2).toList}...)\"\n  }\n}\n"
  },
  {
    "path": "tests/pio_tests/engines/recommendation-engine/src/main/scala/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.template.recommendation\n\nimport org.apache.predictionio.controller.IEngineFactory\nimport org.apache.predictionio.controller.Engine\n\ncase class Query(\n  user: String,\n  num: Int\n) extends Serializable\n\ncase class PredictedResult(\n  itemScores: Array[ItemScore]\n) extends Serializable\n\ncase class ActualResult(\n  ratings: Array[Rating]\n) extends Serializable\n\ncase class ItemScore(\n  item: String,\n  score: Double\n) extends Serializable\n\nobject RecommendationEngine extends IEngineFactory {\n  def apply() = {\n    new Engine(\n      classOf[DataSource],\n      classOf[Preparator],\n      Map(\"als\" -> classOf[ALSAlgorithm]),\n      classOf[Serving])\n  }\n}\n"
  },
  {
    "path": "tests/pio_tests/engines/recommendation-engine/src/main/scala/Evaluation.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.template.recommendation\n\nimport org.apache.predictionio.controller.Evaluation\nimport org.apache.predictionio.controller.OptionAverageMetric\nimport org.apache.predictionio.controller.AverageMetric\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.EngineParamsGenerator\nimport org.apache.predictionio.controller.EngineParams\nimport org.apache.predictionio.controller.MetricEvaluator\n\n// Usage:\n// $ pio eval org.template.recommendation.RecommendationEvaluation \\\n//   org.template.recommendation.EngineParamsList\n\ncase class PrecisionAtK(k: Int, ratingThreshold: Double = 2.0)\n    extends OptionAverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  require(k > 0, \"k must be greater than 0\")\n\n  override def header = s\"Precision@K (k=$k, threshold=$ratingThreshold)\"\n\n  def calculate(q: Query, p: PredictedResult, a: ActualResult): Option[Double] = {\n    val positives: Set[String] = a.ratings.filter(_.rating >= ratingThreshold).map(_.item).toSet\n\n    // If there is no positive results, Precision is undefined. We don't consider this case in the\n    // metrics, hence we return None.\n    if (positives.size == 0) {\n      return None\n    }\n\n    val tpCount: Int = p.itemScores.take(k).filter(is => positives(is.item)).size\n\n    Some(tpCount.toDouble / math.min(k, positives.size))\n  }\n}\n\ncase class PositiveCount(ratingThreshold: Double = 2.0)\n    extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {\n  override def header = s\"PositiveCount (threshold=$ratingThreshold)\"\n\n  def calculate(q: Query, p: PredictedResult, a: ActualResult): Double = {\n    a.ratings.filter(_.rating >= ratingThreshold).size\n  }\n}\n\nobject RecommendationEvaluation extends Evaluation {\n  engineEvaluator = (\n    RecommendationEngine(),\n    MetricEvaluator(\n      metric = PrecisionAtK(k = 10, ratingThreshold = 4.0),\n      otherMetrics = Seq(\n        PositiveCount(ratingThreshold = 4.0),\n        PrecisionAtK(k = 10, ratingThreshold = 2.0),\n        PositiveCount(ratingThreshold = 2.0),\n        PrecisionAtK(k = 10, ratingThreshold = 1.0),\n        PositiveCount(ratingThreshold = 1.0)\n      )))\n}\n\n\nobject ComprehensiveRecommendationEvaluation extends Evaluation {\n  val ratingThresholds = Seq(0.0, 2.0, 4.0)\n  val ks = Seq(1, 3, 10)\n\n  engineEvaluator = (\n    RecommendationEngine(),\n    MetricEvaluator(\n      metric = PrecisionAtK(k = 3, ratingThreshold = 2.0),\n      otherMetrics = (\n        (for (r <- ratingThresholds) yield PositiveCount(ratingThreshold = r)) ++\n        (for (r <- ratingThresholds; k <- ks) yield PrecisionAtK(k = k, ratingThreshold = r))\n      )))\n}\n\n\ntrait BaseEngineParamsList extends EngineParamsGenerator {\n  protected val baseEP = EngineParams(\n    dataSourceParams = DataSourceParams(\n      appName = \"INVALID_APP_NAME\",\n      evalParams = Some(DataSourceEvalParams(kFold = 5, queryNum = 10))))\n}\n\nobject EngineParamsList extends BaseEngineParamsList {\n  engineParamsList = for(\n    rank <- Seq(5, 10, 20);\n    numIterations <- Seq(1, 5, 10))\n    yield baseEP.copy(\n      algorithmParamsList = Seq(\n        (\"als\", ALSAlgorithmParams(rank, numIterations, 0.01, Some(3)))))\n}\n"
  },
  {
    "path": "tests/pio_tests/engines/recommendation-engine/src/main/scala/Preparator.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.template.recommendation\n\nimport org.apache.predictionio.controller.PPreparator\n\nimport org.apache.spark.SparkContext\nimport org.apache.spark.SparkContext._\nimport org.apache.spark.rdd.RDD\n\nclass Preparator\n  extends PPreparator[TrainingData, PreparedData] {\n\n  def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = {\n    new PreparedData(ratings = trainingData.ratings)\n  }\n}\n\nclass PreparedData(\n  val ratings: RDD[Rating]\n) extends Serializable\n"
  },
  {
    "path": "tests/pio_tests/engines/recommendation-engine/src/main/scala/Serving.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.template.recommendation\n\nimport org.apache.predictionio.controller.LServing\n\nclass Serving\n  extends LServing[Query, PredictedResult] {\n\n  override\n  def serve(query: Query,\n    predictedResults: Seq[PredictedResult]): PredictedResult = {\n    predictedResults.head\n  }\n}\n"
  },
  {
    "path": "tests/pio_tests/engines/recommendation-engine/template.json",
    "content": "{\"pio\": {\"version\": { \"min\": \"0.9.2\" }}}\n"
  },
  {
    "path": "tests/pio_tests/globals.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nimport subprocess\n\nSUPPRESS_STDOUT=False\nSUPPRESS_STDERR=False\nLOGGER_NAME='INT_TESTS'\n\ndef std_out():\n  if SUPPRESS_STDOUT:\n    return subprocess.DEVNULL\n  else:\n    return None\n\ndef std_err():\n  if SUPPRESS_STDERR:\n    return subprocess.DEVNULL\n  else:\n    return None\n"
  },
  {
    "path": "tests/pio_tests/integration.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nimport unittest\nimport logging\nimport pio_tests.globals as globals\n\nclass TestContext:\n  \"\"\"Class representing the settings provided for every test\"\"\"\n\n  def __init__(self, engine_directory, data_directory, es_ip='0.0.0.0', es_port=7070):\n    \"\"\"\n    Args:\n      engine_directory (str): path to the directory where the engines are stored\n      data_directory (str):   path to the directory where tests can keep their data\n      es_ip (str):            ip of the eventserver\n      es_port (int):          port of the eventserver\n    \"\"\"\n    self.engine_directory = engine_directory\n    self.data_directory = data_directory\n    self.es_ip = es_ip\n    self.es_port = es_port\n\nclass BaseTestCase(unittest.TestCase):\n  \"\"\"This is the base class for all integration tests\n\n  This class sets up a `TestContext` object and a logger for every test case\n  \"\"\"\n  def __init__(self, test_context, methodName='runTest'):\n    super(BaseTestCase, self).__init__(methodName)\n    self.test_context = test_context\n    self.log = logging.getLogger(globals.LOGGER_NAME)\n\nclass AppContext:\n  \"\"\" This class is a description of an instance of the engine\"\"\"\n\n  def __init__(self, name, template, engine_json_path=None):\n    \"\"\"\n    Args:\n      name (str): application name\n      template (str): either the name of an engine from the engines directory\n          or a link to repository with the engine\n      engine_json_path (str): path to json file describing an engine (a custom engine.json)\n          to be used for the application. If `None`, engine.json from the engine's directory\n          will be used\n    \"\"\"\n    self.name = name\n    self.template = template\n    self.engine_json_path = engine_json_path\n"
  },
  {
    "path": "tests/pio_tests/scenarios/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#"
  },
  {
    "path": "tests/pio_tests/scenarios/basic_app_usecases.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nimport os\nimport unittest\nimport random\nimport logging\nimport time\nfrom subprocess import CalledProcessError\nfrom pio_tests.integration import BaseTestCase, AppContext\nfrom utils import *\n\nITEMS_COUNT = 12\n\ndef get_buy_events(users, per_user=2):\n  events = []\n  for u in range(users):\n    items = set([random.randint(0, ITEMS_COUNT) for i in range(per_user)])\n    for item in items:\n      events.append({\n        \"event\": \"buy\",\n        \"entityType\": \"user\",\n        \"entityId\": u,\n        \"targetEntityType\": \"item\",\n        \"targetEntityId\": item })\n\n  return events\n\ndef get_rate_events(users, per_user=2):\n  events = []\n  for u in range(users):\n    items = set([random.randint(0, ITEMS_COUNT) for i in range(per_user)])\n    for item in items:\n      events.append( {\n        \"event\": \"rate\",\n        \"entityType\": \"user\",\n        \"entityId\": u,\n        \"targetEntityType\": \"item\",\n        \"targetEntityId\": item,\n        \"properties\": { \"rating\" : float(random.randint(1,5)) } })\n\n  return events\n\n\nclass BasicAppUsecases(BaseTestCase):\n\n  def setUp(self):\n    random.seed(3)\n    self.log.info(\"Setting up the engine\")\n\n    template_path = pjoin(\n        self.test_context.engine_directory, \"recommendation-engine\")\n    engine_json_path = pjoin(\n        self.test_context.data_directory, \"quickstart_test/engine.json\")\n\n    app_context = AppContext(\n        name=\"MyRecommender\",\n        template=template_path,\n        engine_json_path=engine_json_path)\n\n    self.app = AppEngine(self.test_context, app_context)\n\n  def runTest(self):\n    self.app_creation()\n    self.check_app_list()\n    self.check_data()\n    self.check_build()\n    self.check_train_and_deploy()\n\n  def app_creation(self):\n    self.log.info(\"Adding a new application\")\n    description = \"SomeDescription\"\n    self.app.new(description=description)\n    self.assertEqual(description, self.app.description)\n\n    self.log.info(\"Creating an app again - should fail\")\n    self.assertRaises(CalledProcessError, lambda : self.app.new())\n\n  def check_app_list(self):\n    self.log.info(\"Checking if app is on the list\")\n    apps = pio_app_list()\n    self.assertEqual(1,\n        len([a for a in apps if a['name'] == self.app.app_context.name]))\n\n  def check_data(self):\n    self.log.info(\"Importing events\")\n    buy_events = get_buy_events(20, 1)\n    rate_events = get_rate_events(20, 1)\n\n    for ev in buy_events + rate_events:\n      self.assertEquals(201, self.app.send_event(ev).status_code)\n\n    self.log.info(\"Checking imported events\")\n    r = self.app.get_events(params={'limit': -1})\n    self.assertEqual(200, r.status_code)\n    self.assertEqual(len(buy_events) + len(rate_events), len(r.json()))\n\n    self.log.info(\"Deleting entire data\")\n    self.app.delete_data()\n    self.log.info(\"Checking if there are no events at all\")\n    r = self.app.get_events(params={'limit': -1})\n    self.assertEqual(404, r.status_code)\n\n  def check_build(self):\n    self.log.info(\"Clean build\")\n    self.app.build(clean=True)\n    self.log.info(\"Second build\")\n    self.app.build()\n\n  def check_train_and_deploy(self):\n    self.log.info(\"import some data first\")\n    buy_events = get_buy_events(20, 5)\n    rate_events = get_rate_events(20, 5)\n    for ev in buy_events + rate_events:\n      self.assertEquals(201, self.app.send_event(ev).status_code)\n\n    self.log.info(\"Training\")\n    self.app.train()\n    self.log.info(\"Deploying\")\n    self.app.deploy()\n    self.assertFalse(self.app.deployed_process.poll())\n\n    self.log.info(\"Importing more events\")\n    buy_events = get_buy_events(60, 5)\n    rate_events = get_rate_events(60, 5)\n    for ev in buy_events + rate_events:\n      self.assertEquals(201, self.app.send_event(ev).status_code)\n\n    self.log.info(\"Training again\")\n    self.app.train()\n\n    time.sleep(7)\n\n    self.log.info(\"Check serving\")\n    r = self.app.query({\"user\": 1, \"num\": 5})\n    self.assertEqual(200, r.status_code)\n    result = r.json()\n    self.assertEqual(5, len(result['itemScores']))\n    r = self.app.query({\"user\": 5, \"num\": 3})\n    self.assertEqual(200, r.status_code)\n    result = r.json()\n    self.assertEqual(3, len(result['itemScores']))\n\n    self.log.info(\"Remove data\")\n    self.app.delete_data()\n    self.log.info(\"Retraining should fail\")\n    self.assertRaises(CalledProcessError, lambda: self.app.train())\n\n\n  def tearDown(self):\n    self.log.info(\"Stopping deployed engine\")\n    self.app.stop()\n    self.log.info(\"Deleting all related data\")\n    self.app.delete_data()\n    self.log.info(\"Removing an app\")\n    self.app.delete()\n\n"
  },
  {
    "path": "tests/pio_tests/scenarios/eventserver_test.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nimport unittest\nimport requests\nimport json\nimport argparse\nimport dateutil.parser\nimport pytz\nfrom subprocess import Popen\nfrom utils import AppEngine, pjoin\nfrom pio_tests.integration import BaseTestCase, AppContext\n\nclass EventserverTest(BaseTestCase):\n  \"\"\" Integration test for PredictionIO Eventserver API\n  Refer to below for further information:\n    http://predictionio.apache.org/datacollection/eventmodel/\n    http://predictionio.apache.org/datacollection/eventapi/\n  \"\"\"\n  # Helper methods\n  def eventserver_url(self, path=None):\n    url = 'http://{}:{}'.format(\n            self.test_context.es_ip, self.test_context.es_port)\n    if path: url += '/{}'.format(path)\n    return url\n\n  def load_events(self, json_file):\n    file_path = pjoin(self.test_context.data_directory,\n        'eventserver_test/{}'.format(json_file))\n    return json.loads(open(file_path).read())\n\n\n  def setUp(self):\n    template_path = pjoin(\n        self.test_context.engine_directory, \"recommendation-engine\")\n    app_context = AppContext(\n        name=\"MyRecommender\",\n        template=template_path)\n    self.app = AppEngine(self.test_context, app_context)\n\n  def runTest(self):\n    self.log.info(\"Check if Eventserver is alive and running\")\n    r = requests.get(self.eventserver_url())\n    self.assertDictEqual(r.json(), {\"status\": \"alive\"})\n\n    self.log.info(\"Cannot view events with empty accessKey\")\n    r = requests.get(self.eventserver_url(path='events.json'))\n    self.assertDictEqual(r.json(), {\"message\": \"Missing accessKey.\"})\n\n    self.log.info(\"Cannot view events with invalid accessKey\")\n    r = requests.get(self.eventserver_url(path='events.json'),\n        params={'accessKey': ''})\n    self.assertDictEqual(r.json(), {\"message\": \"Invalid accessKey.\"})\n\n    self.log.info(\"Adding new pio application\")\n    self.app.new()\n\n    self.log.info(\"No events have been sent yet\")\n    r = self.app.get_events()\n    self.assertDictEqual(r.json(), {\"message\": \"Not Found\"})\n\n    # Testing POST\n    self.log.info(\"Sending single event\")\n    event1 = {\n      'event' : 'test',\n      'entityType' : 'test',\n      'entityId' : 't1'\n    }\n    r = self.app.send_event(event1)\n    self.assertEqual(201, r.status_code)\n\n    self.log.info(\"Sending batch of events\")\n    r = self.app.send_events_batch(\n        self.load_events(\"rate_events_25.json\"))\n    self.assertEqual(200, r.status_code)\n\n    self.log.info(\"Cannot send more than 50 events per batch\")\n    r = self.app.send_events_batch(\n        self.load_events(\"signup_events_51.json\"))\n    self.assertEqual(400, r.status_code)\n\n    self.log.info(\"Importing events from file does not have batch size limit\")\n    self.app.import_events_batch(\n        self.load_events(\"signup_events_51.json\"))\n\n    self.log.info(\"Individual events may fail when sending events as batch\")\n    r = self.app.send_events_batch(\n        self.load_events(\"partially_malformed_events.json\"))\n    self.assertEqual(200, r.status_code)\n    self.assertEqual(201, r.json()[0]['status'])\n    self.assertEqual(400, r.json()[1]['status'])\n\n    # Testing GET for different parameters\n    params = {'event': 'rate'}\n    r = self.app.get_events(params=params)\n    self.assertEqual(20, len(r.json()))\n    self.assertEqual('rate', r.json()[0]['event'])\n\n    params = {\n      'event': 'rate',\n      'limit': -1 }\n    r = self.app.get_events(params=params)\n    self.assertEqual(25, len(r.json()))\n    self.assertEqual('rate', r.json()[0]['event'])\n\n    params = {\n      'event': 'rate',\n      'limit': 10 }\n    r = self.app.get_events(params=params)\n    self.assertEqual(10, len(r.json()))\n    self.assertEqual('rate', r.json()[0]['event'])\n\n    params = {\n      'event': 'rate',\n      'entityType': 'user',\n      'entityId': '1' }\n    r = self.app.get_events(params=params)\n    self.assertEqual(5, len(r.json()))\n    self.assertEqual('1', r.json()[0]['entityId'])\n\n    params = {\n      'event': 'rate',\n      'targetEntityType': 'item',\n      'targetEntityId': '1' }\n    r = self.app.get_events(params=params)\n    self.assertEqual(5, len(r.json()))\n    self.assertEqual('1', r.json()[0]['targetEntityId'])\n\n    params = {\n      'event': 'rate',\n      'entityType': 'user',\n      'entityId': '1',\n      'startTime': '2014-11-01T09:39:45.618-08:00',\n      'untilTime': '2014-11-04T09:39:45.618-08:00' }\n    r = self.app.get_events(params=params)\n    self.assertEqual(3, len(r.json()))\n    self.assertEqual('1', r.json()[0]['entityId'])\n\n    params = {\n      'event': 'rate',\n      'entityType': 'user',\n      'entityId': '1',\n      'reversed': 'true' }\n    r = self.app.get_events(params=params)\n    self.assertEqual(5, len(r.json()))\n    event_time = dateutil.parser.parse(r.json()[0]['eventTime']).astimezone(pytz.utc)\n    self.assertEqual('2014-11-05 17:39:45.618000+00:00', str(event_time))\n\n  def tearDown(self):\n    self.log.info(\"Deleting all app data\")\n    self.app.delete_data()\n    self.log.info(\"Deleting app\")\n    self.app.delete()\n\n\nif __name__ == '__main__':\n  suite = unittest.TestSuite([BasicEventserverTest])\n  result = unittest.TextTestRunner(verbosity=2).run(suite)\n  if not result.wasSuccessful():\n    sys.exit(1)\n"
  },
  {
    "path": "tests/pio_tests/scenarios/quickstart_test.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nimport os\nimport unittest\nimport random\nimport logging\nfrom pio_tests.integration import BaseTestCase, AppContext\nfrom utils import AppEngine, srun, pjoin\n\ndef read_events(file_path):\n  RATE_ACTIONS_DELIMITER = \"::\"\n  with open(file_path, 'r') as f:\n    events = []\n    for line in f:\n      data = line.rstrip('\\r\\n').split(RATE_ACTIONS_DELIMITER)\n      if random.randint(0, 1) == 1:\n        events.append( {\n          \"event\": \"rate\",\n          \"entityType\": \"user\",\n          \"entityId\": data[0],\n          \"targetEntityType\": \"item\",\n          \"targetEntityId\": data[1],\n          \"properties\": { \"rating\" : float(data[2]) } })\n      else:\n        events.append({\n          \"event\": \"buy\",\n          \"entityType\": \"user\",\n          \"entityId\": data[0],\n          \"targetEntityType\": \"item\",\n          \"targetEntityId\": data[1] })\n\n    return events\n\n\nclass QuickStartTest(BaseTestCase):\n\n  def setUp(self):\n    self.log.info(\"Setting up the engine\")\n\n    template_path = pjoin(\n        self.test_context.engine_directory, \"recommendation-engine\")\n    engine_json_path = pjoin(\n        self.test_context.data_directory, \"quickstart_test/engine.json\")\n\n    self.training_data_path = pjoin(\n        self.test_context.data_directory,\n        \"quickstart_test/training_data.txt\")\n\n    # downloading training data\n    srun('curl https://raw.githubusercontent.com/apache/spark/master/' \\\n            'data/mllib/sample_movielens_data.txt --create-dirs -o {}'\n            .format(self.training_data_path))\n\n    app_context = AppContext(\n        name=\"MyRecommender\",\n        template=template_path,\n        engine_json_path=engine_json_path)\n\n    self.app = AppEngine(self.test_context, app_context)\n\n  def engine_dir_test(self):\n    self.log.info(\"Stopping deployed engine\")\n    self.app.stop()\n\n    self.log.info(\"Creating dummy directory\")\n    engine_path = self.app.engine_path\n    dummy_path = \"{}/dummy\".format(engine_path)\n    srun(\"mkdir -p {}\".format(dummy_path))\n\n    self.log.info(\"Testing pio commands in dummy directory with \" +\n      \"--engine-dir argument\")\n    self.app.engine_path = dummy_path\n    self.log.info(\"Building an engine...\")\n    self.app.build(engine_dir=engine_path)\n    self.log.info(\"Training...\")\n    self.app.train(engine_dir=engine_path)\n    self.log.info(\"Deploying and waiting 30s for it to start...\")\n    self.app.deploy(wait_time=30, engine_dir=engine_path)\n\n    self.log.info(\"Sending a single query and checking results\")\n    user_query = { \"user\": 1, \"num\": 4 }\n    r = self.app.query(user_query)\n    self.assertEqual(200, r.status_code)\n    result = r.json()\n    self.assertEqual(4, len(result['itemScores']))\n\n    self.log.info(\"Deleting dummy directory\")\n    srun(\"rm -rf {}\".format(dummy_path))\n    self.app.engine_path = engine_path\n\n  def runTest(self):\n    self.log.info(\"Adding a new application\")\n    self.app.new()\n\n    event1 = {\n      \"event\" : \"rate\",\n      \"entityType\" : \"user\",\n      \"entityId\" : \"u0\",\n      \"targetEntityType\" : \"item\",\n      \"targetEntityId\" : \"i0\",\n      \"properties\" : {\n        \"rating\" : 5\n      },\n      \"eventTime\" : \"2014-11-02T09:39:45.618-08:00\" }\n\n    event2 = {\n      \"event\" : \"buy\",\n      \"entityType\" : \"user\",\n      \"entityId\" : \"u1\",\n      \"targetEntityType\" : \"item\",\n      \"targetEntityId\" : \"i2\",\n      \"eventTime\" : \"2014-11-10T12:34:56.123-08:00\" }\n\n    self.log.info(\"Sending two test events\")\n    self.assertListEqual(\n        [201, 201],\n        [self.app.send_event(e).status_code for e in [event1, event2]])\n\n    self.log.info(\"Checking the number of events stored on the server\")\n    r = self.app.get_events()\n    self.assertEquals(200, r.status_code)\n    stored_events = r.json()\n    self.assertEqual(2, len(stored_events))\n\n    self.log.info(\"Importing many events\")\n    new_events = read_events(self.training_data_path)\n    for ev in new_events:\n      r = self.app.send_event(ev)\n      self.assertEqual(201, r.status_code)\n\n    self.log.info(\"Checking the number of events stored on eventserver\")\n    r = self.app.get_events(params={'limit': -1})\n    self.assertEquals(200, r.status_code)\n    stored_events = r.json()\n    self.assertEquals(len(new_events) + 2, len(stored_events))\n\n    self.log.info(\"Building an engine...\")\n    self.app.build()\n    self.log.info(\"Training...\")\n    self.app.train()\n    self.log.info(\"Deploying and waiting 35s for it to start...\")\n    self.app.deploy(wait_time=35)\n\n    self.log.info(\"Testing pio commands outside of engine directory\")\n    self.engine_dir_test()\n\n    self.log.info(\"Sending a single query and checking results\")\n    user_query = { \"user\": 1, \"num\": 4 }\n    r = self.app.query(user_query)\n    self.assertEqual(200, r.status_code)\n    result = r.json()\n    self.assertEqual(4, len(result['itemScores']))\n\n  def tearDown(self):\n    self.log.info(\"Stopping deployed engine\")\n    self.app.stop()\n    self.log.info(\"Deleting all related data\")\n    self.app.delete_data()\n    self.log.info(\"Removing an app\")\n    self.app.delete()\n"
  },
  {
    "path": "tests/pio_tests/tests.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nimport os\nimport sys\nimport unittest\nimport argparse\nimport logging\nimport time\nfrom xmlrunner import XMLTestRunner\nimport pio_tests.globals as globals\nfrom utils import srun_bg\nfrom pio_tests.integration import TestContext\nfrom pio_tests.scenarios.quickstart_test import QuickStartTest\nfrom pio_tests.scenarios.basic_app_usecases import BasicAppUsecases\nfrom pio_tests.scenarios.eventserver_test import EventserverTest\n\nparser = argparse.ArgumentParser(description='Integration tests for PredictionIO')\nparser.add_argument('--eventserver-ip', default='0.0.0.0')\nparser.add_argument('--eventserver-port', type=int, default=7070)\nparser.add_argument('--no-shell-stdout', action='store_true',\n    help='Suppress STDOUT output from shell executed commands')\nparser.add_argument('--no-shell-stderr', action='store_true',\n    help='Suppress STDERR output from shell executed commands')\nparser.add_argument('--logging', action='store', choices=['INFO', 'DEBUG', 'NO_LOGGING'],\n    default='INFO', help='Choose the logging level')\nparser.add_argument('--tests', nargs='*', type=str,\n    default=None, help='Names of the tests to execute. By default all tests will be checked')\n\nTESTS_DIRECTORY = os.path.abspath(os.path.dirname(__file__))\nENGINE_DIRECTORY = os.path.join(TESTS_DIRECTORY, \"engines\")\nDATA_DIRECTORY = os.path.join(TESTS_DIRECTORY, \"data\")\n\nLOGGING_FORMAT = '[%(levelname)s] %(module)s %(asctime)-15s: %(message)s'\nlogging.basicConfig(format=LOGGING_FORMAT)\n\ndef get_tests(test_context):\n  # ========= ADD TESTS HERE!!! ================================\n  return {'QuickStart': QuickStartTest(test_context),\n          'BasicAppUsecases': BasicAppUsecases(test_context),\n          'EventserverTest': EventserverTest(test_context)}\n\nif __name__ == \"__main__\":\n  args = vars(parser.parse_args())\n\n  if args.get('no_shell_stdout'):\n    globals.SUPPRESS_STDOUT = True\n  if args.get('no_shell_stderr'):\n    globals.SUPPRESS_STDERR = True\n\n  # setting up logging\n  log_opt = args['logging']\n  logger = logging.getLogger(globals.LOGGER_NAME)\n  if log_opt == 'INFO':\n    logger.level = logging.INFO\n  elif log_opt == 'DEBUG':\n    logger.level = logging.DEBUG\n\n  test_context = TestContext(\n      ENGINE_DIRECTORY, DATA_DIRECTORY,\n      args['eventserver_ip'], int(args['eventserver_port']))\n\n  tests_dict = get_tests(test_context)\n  test_names = args['tests']\n  tests = []\n  if test_names is not None:\n    tests = [t for name, t in tests_dict.items() if name in test_names]\n  else:\n    tests = tests_dict.values()\n\n  # Actual tests execution\n  es_wait_time = 25\n  logger.info(\"Starting eventserver and waiting {}s for it to initialize\".format(\n      es_wait_time))\n  event_server_process = srun_bg('pio eventserver --ip {} --port {}'\n      .format(test_context.es_ip, test_context.es_port))\n  time.sleep(es_wait_time)\n  result = XMLTestRunner(verbosity=2, output='test-reports').run(\n                unittest.TestSuite(tests))\n  event_server_process.kill()\n\n  if not result.wasSuccessful():\n    sys.exit(1)\n"
  },
  {
    "path": "tests/pio_tests/utils.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nimport re\nimport time\nimport os\nimport requests\nimport json\nfrom shutil import copyfile\nfrom subprocess import run, Popen, check_output\nfrom os.path import join as pjoin\nimport pio_tests.globals as globals\n\ndef srun(command):\n  \"\"\" Runs a shell command given as a `str`\n  Raises: `subprocess.CalledProcessError` when exit code != 0\n  \"\"\"\n  return run(command, shell=True, stdout=globals.std_out(),\n      stderr=globals.std_err(), check=True)\n\ndef srun_out(command):\n  \"\"\" Runs a shell command given as a `str`\n  Returns: string with command's output\n  Raises: `subprocess.CalledProcessError` when exit code != 0\n  \"\"\"\n  return check_output(command, shell=True, universal_newlines=True,\n      stderr=globals.std_err())\n\ndef srun_bg(command):\n  \"\"\" Runs a shell command given as a `str` in the background\n  Returns: (obj: `subprocess.Popen`) for executed process\n  \"\"\"\n  return Popen(command, shell=True, stdout=globals.std_out(),\n      stderr=globals.std_err())\n\ndef repository_dirname(template):\n  \"\"\" Utility function getting repository name from the link\n  Example: for \"https://github.com/user/SomeRepo\" should return \"SomeRepo\"\n  \"\"\"\n  return template.split('/')[-1]\n\ndef obtain_template(engine_dir, template):\n  \"\"\"Given a directory with engines and a template downloads an engine\n  if necessary\n  Args:\n    engine_dir (str): directory where engines are stored\n    template (str): either the name of an engine from the engines directory\n        or a link to repository with the engine\n  Returns: str with the engine's path\n  \"\"\"\n  if re.match('^https?:\\/\\/', template):\n    dest_dir = pjoin(engine_dir, repository_dirname(template))\n    if not os.path.exists(dest_dir):\n      srun('git clone --depth=1 {0} {1}'.format(template, dest_dir))\n    return dest_dir\n  else:\n    # check if exists\n    dest_dir = pjoin(engine_dir, template)\n    if not os.path.exists(dest_dir):\n      raise ValueError('Engine {0} does not exist in {1}'\n          .format(template, engine_dir))\n\n    return dest_dir\n\ndef pio_app_list():\n  \"\"\"Returns: a list of dicts for every application with the following keys:\n      `name`, `id`, `access_key`, `allowed_events`\n  \"\"\"\n  output = srun_out('pio app list').rstrip()\n  return [ { 'name': line[2], 'id': int(line[4]),\n             'access_key': line[6], 'allowed_events': line[8] }\n          for line in [x.split() for x in output.split('\\n')[1:-1]] ]\n\ndef get_app_eventserver_url_json(test_context):\n  return 'http://{}:{}/events.json'.format(\n      test_context.es_ip, test_context.es_port)\n\ndef get_engine_url_json(engine_ip, engine_port):\n  return 'http://{}:{}/queries.json'.format(\n      engine_ip, engine_port)\n\ndef send_event(event, test_context, access_key, channel=None):\n  \"\"\" Sends an event to the eventserver\n  Args:\n    event: json-like dictionary describing an event\n    test_context (obj: `TestContext`):\n    access_key: application's access key\n    channel (str): custom channel for storing event\n  Returns: `requests.Response`\n  \"\"\"\n  url = get_app_eventserver_url_json(test_context)\n  params = { 'accessKey': access_key }\n  if channel: params['channel'] = channel\n  return requests.post(\n      url,\n      params=params,\n      json=event)\n\ndef send_events_batch(events, test_context, access_key, channel=None):\n  \"\"\" Send events in batch via REST to the eventserver\n  Args:\n    events: a list of json-like dictionaries for events\n    test_context (obj: `TestContext`):\n    access_key: application's access key\n    channel (str): custom channel for storing event\n  Returns: `requests.Response`\n  Requires: Events length must not exceed length of 50\n    http://predictionio.apache.org/datacollection/eventmodel/#3.-batch-events-to-the-eventserver\n  \"\"\"\n  url = 'http://{}:{}/batch/events.json'.format(\n      test_context.es_ip, test_context.es_port)\n  params = { 'accessKey': access_key }\n  if channel: params['channel'] = channel\n  return requests.post(\n      url,\n      params=params,\n      json=events)\n\n\ndef import_events_batch(events, test_context, appid, channel=None):\n  \"\"\" Imports events in batch from file with `pio import`\n  Args:\n    events: a list of json-like dictionaries for events\n    test_context (obj: `TestContext`)\n    appid (int): application's id\n    channel (str): custom channel for storing event\n  \"\"\"\n  # Writing events list to temporary file.\n  # `pio import` requires each line of input file to be a JSON string\n  # representing an event. Empty lines are not allowed.\n  contents = ''\n  for ev in events:\n      contents += '{}\\n'.format(json.dumps(ev))\n  contents.rstrip('\\n')\n\n  file_path = pjoin(test_context.data_directory, 'events.json.tmp')\n  try:\n      with open(file_path, 'w') as f:\n          f.write(contents)\n      srun('pio import --appid {} --input {} {} -- {}'.format(\n          appid,\n          file_path,\n          '--channel {}'.format(channel) if channel else '',\n          '--conf spark.sql.warehouse.dir=file:///tmp/spark-warehouse'))\n  finally:\n      os.remove(file_path)\n\ndef get_events(test_context, access_key, params={}):\n  \"\"\" Gets events for some application\n  Args:\n    test_context (obj: `TestContext`)\n    access_key (str):\n    params (dict): special parameters for eventserver's GET, e.g:\n        'limit', 'reversed', 'event'. See the docs\n  Returns: `requests.Response`\n  \"\"\"\n  url = get_app_eventserver_url_json(test_context)\n  return requests.get(url, params=dict({'accessKey': access_key}, **params))\n\ndef query_engine(data, engine_ip='localhost', engine_port=8000):\n  \"\"\" Send a query to deployed engine\n  Args:\n    data (dict): json-like dictionary being an input to an engine\n    access_key (str):\n    engine_ip (str): ip of deployed engine\n    engine_port (int): port of deployed engine\n  Returns: `requests.Response`\n  \"\"\"\n  url = get_engine_url_json(engine_ip, engine_port)\n  return requests.post(url, json=data)\n\nclass AppEngine:\n  \"\"\" This is a utility class simplifying all app related interactions.\n  Basically it is just a wrapper on other utility functions and shell\n  scripts.\n  \"\"\"\n\n  def __init__(self, test_context, app_context, already_created=False):\n    \"\"\" Args:\n        test_context (obj: `TestContext`)\n        app_context (obj: `AppContext`)\n        already_created (bool): True if the given app has been already added\n    \"\"\"\n    self.test_context = test_context\n    self.app_context = app_context\n    self.engine_path = obtain_template(\n        self.test_context.engine_directory, app_context.template)\n    self.deployed_process = None\n    if already_created:\n      self.__init_info()\n    else:\n      self.id = None\n      self.access_key = None\n      self.description = None\n\n    if self.app_context.engine_json_path:\n      self.__copy_engine_json()\n\n  def __copy_engine_json(self):\n    to_path = pjoin(self.engine_path, 'engine.json')\n    copyfile(self.app_context.engine_json_path, to_path)\n\n  def __init_info(self):\n    info = self.show()\n    self.id = info['id']\n    self.access_key = info['access_key']\n    self.description = info['description']\n\n  def new(self, id=None, description=None, access_key=None):\n    \"\"\" Creates a new application with given parameters \"\"\"\n    srun('pio app new {} {} {} {}'.format(\n        '--id {}'.format(id) if id else '',\n        '--description \\\"{}\\\"'.format(description) if description else '',\n        '--access-key {}'.format(access_key) if access_key else '',\n        self.app_context.name))\n\n    self.__init_info()\n\n\n  def show(self):\n    \"\"\" Returns: application info in dictionary with the keys:\n         `name`: str, `id`: int, `description`: str,\n         `access_key`: str, `allowed_events`: str\n    \"\"\"\n    output = srun_out('pio app show {}'.format(self.app_context.name)).rstrip()\n    lines = [x.split() for x in output.split('\\n')]\n    return { 'name': lines[0][3],\n             'id': int(lines[1][4]),\n             'description': lines[2][3] if len(lines[2]) >= 4 else '',\n             'access_key': lines[3][4],\n             'allowed_events': lines[3][5] }\n\n\n  # deletes this app from pio\n  def delete(self):\n    srun('pio app delete {0} --force'.format(self.app_context.name))\n\n  def build(self, sbt_extra=None, clean=False, no_asm=True, engine_dir=None):\n    srun('cd {0}; pio build {1} {2} {3} {4}'.format(\n        self.engine_path,\n        '--sbt-extra {}'.format(sbt_extra) if sbt_extra else '',\n        '--clean' if clean else '',\n        '--no-asm' if no_asm else '',\n        '--engine-dir {}'.format(engine_dir) if engine_dir else ''))\n\n  def train(self, batch=None, skip_sanity_check=False, stop_after_read=False,\n          stop_after_prepare=False, engine_factory=None,\n          engine_params_key=None, scratch_uri=None, engine_dir=None):\n\n    srun('cd {0}; pio train {1} {2} {3} {4} {5} {6} {7} {8}'.format(\n        self.engine_path,\n        '--batch {}'.format(batch) if batch else '',\n        '--skip-sanity-check' if skip_sanity_check else '',\n        '--stop-after-read' if stop_after_read else '',\n        '--stop-after-prepare' if stop_after_prepare else '',\n        '--engine_factory {}'.format(engine_factory) if engine_factory else '',\n        '--engine-params-key {}'.format(engine_params_key) if engine_params_key else '',\n        '--scratch-uri {}'.format(scratch_uri) if scratch_uri else '',\n        '--engine-dir {}'.format(engine_dir) if engine_dir else ''))\n\n  def deploy(self, wait_time=0, ip=None, port=None, engine_instance_id=None,\n          feedback=False, accesskey=None, event_server_ip=None, event_server_port=None,\n          batch=None, scratch_uri=None, engine_dir=None):\n\n    command = 'cd {0}; pio deploy {1} {2} {3} {4} {5} {6} {7} {8} {9} {10}'.format(\n            self.engine_path,\n            '--ip {}'.format(ip) if ip else '',\n            '--port {}'.format(port) if port else '',\n            '--engine-instance-id {}'.format(engine_instance_id) if engine_instance_id else '',\n            '--feedback' if feedback else '',\n            '--accesskey {}'.format(accesskey) if accesskey else '',\n            '--event-server-ip {}'.format(event_server_ip) if event_server_ip else '',\n            '--event-server-port {}'.format(event_server_port) if event_server_port else '',\n            '--batch {}'.format(bach) if batch else '',\n            '--scratch-uri {}'.format(scratch_uri) if scratch_uri else '',\n            '--engine-dir {}'.format(engine_dir) if engine_dir else '')\n\n    self.deployed_process = srun_bg(command)\n    time.sleep(wait_time)\n    if self.deployed_process.poll() is not None:\n      raise Exception('Application engine terminated')\n    self.ip = ip if ip else 'localhost'\n    self.port = port if port else 8000\n\n  def stop(self):\n    \"\"\" Kills deployed engine \"\"\"\n    if self.deployed_process:\n      self.deployed_process.kill()\n\n  def new_channel(self, channel):\n    srun('pio app channel-new {0}'.format(channel))\n\n  def delete_channel(self, channel):\n    srun('pio app channel-delete {0} --force'.format(channel))\n\n  def send_event(self, event):\n    return send_event(event, self.test_context, self.access_key)\n\n  def send_events_batch(self, events):\n    return send_events_batch(events, self.test_context, self.access_key)\n\n  def import_events_batch(self, events):\n    return import_events_batch(events, self.test_context, self.id)\n\n  def get_events(self, params={}):\n    return get_events(self.test_context, self.access_key, params)\n\n  def delete_data(self, delete_all=True, channel=None):\n    srun('pio app data-delete {0} {1} {2} --force'\n        .format(\n            self.app_context.name,\n            '--all' if delete_all else '',\n            '--channel ' + channel if channel is not None else ''))\n\n  def query(self, data):\n    return query_engine(data, self.ip, self.port)\n"
  },
  {
    "path": "tests/run_docker.sh",
    "content": "#!/bin/bash\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nUSAGE=$\"Usage: run_docker <meta> <event> <model> <command>\n  Where:\n    meta         = [PGSQL,ELASTICSEARCH]\n    event        = [PGSQL,HBASE,ELASTICSEARCH]\n    model        = [PGSQL,LOCALFS,HDFS,S3]\n    command      = command to run in the container\"\n\nif ! [[ \"$1\" =~ ^(PGSQL|ELASTICSEARCH)$ ]]; then\n  echo \"$USAGE\"\n  exit 1\nfi\nMETA=\"$1\"\nshift\n\nif ! [[ \"$1\" =~ ^(PGSQL|HBASE|ELASTICSEARCH)$ ]]; then\n  echo \"$USAGE\"\n  exit 1\nfi\nEVENT=\"$1\"\nshift\n\nif ! [[ \"$1\" =~ ^(PGSQL|LOCALFS|HDFS|S3)$ ]]; then\n  echo \"$USAGE\"\n  exit 1\nfi\nMODEL=\"$1\"\nshift\n\nDIR=\"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )\" && pwd )\"\n\nsource $DIR/../conf/pio-vendors.sh\n\ndocker-compose -f $DIR/docker-compose.yml run \\\n  -e PIO_STORAGE_REPOSITORIES_METADATA_SOURCE=$META \\\n  -e PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=$EVENT \\\n  -e PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=$MODEL \\\n  pio-testing $@\n"
  },
  {
    "path": "tests/script.travis.sh",
    "content": "#!/bin/bash\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\nif [[ $BUILD_TYPE == Unit ]]; then\n  tests/run_docker.sh $METADATA_REP $EVENTDATA_REP $MODELDATA_REP \\\n    \"/PredictionIO/tests/unit.sh\"\nelif [[ $BUILD_TYPE == LicenseCheck ]]; then\n  tests/check_libraries.sh\nelse\n  tests/run_docker.sh $METADATA_REP $EVENTDATA_REP $MODELDATA_REP \\\n    \"python3 /PredictionIO/tests/pio_tests/tests.py\"\nfi\n"
  },
  {
    "path": "tests/unit.sh",
    "content": "#!/bin/bash -ex\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements.  See the NOTICE file distributed with\n# this work for additional information regarding copyright ownership.\n# The ASF licenses this file to You under the Apache License, Version 2.0\n# (the \"License\"); you may not use this file except in compliance with\n# the License.  You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n#\n\npushd /PredictionIO\n\n# Run license check\n./tests/check_license.sh\n\n# Prepare pio environment variables\nset -a\nsource ./conf/pio-env.sh\nset +a\nsource ./conf/pio-vendors.sh\n\n# Run stylecheck\nsbt/sbt scalastyle \\\n    -Dscala.version=$PIO_SCALA_VERSION \\\n    -Dspark.version=$PIO_SPARK_VERSION \\\n    -Dhadoop.version=$PIO_HADOOP_VERSION \\\n    -Delasticsearch.version=$PIO_ELASTICSEARCH_VERSION \\\n    -Dhbase.version=$PIO_HBASE_VERSION\n\n# Run all unit tests\nsbt/sbt dataJdbc/compile test storage/test \\\n    -Dscala.version=$PIO_SCALA_VERSION \\\n    -Dspark.version=$PIO_SPARK_VERSION \\\n    -Dhadoop.version=$PIO_HADOOP_VERSION \\\n    -Delasticsearch.version=$PIO_ELASTICSEARCH_VERSION \\\n    -Dhbase.version=$PIO_HBASE_VERSION\n\npopd\n"
  },
  {
    "path": "tools/build.sbt",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport PIOBuild._\nimport sbtassembly.AssemblyPlugin.autoImport._\n\nname := \"apache-predictionio-tools\"\n\nlibraryDependencies ++= Seq(\n  \"com.github.zafarkhaja\"  %  \"java-semver\"       % \"0.9.0\",\n  \"org.apache.spark\"       %% \"spark-sql\"         % sparkVersion.value % \"provided\",\n  \"com.typesafe.akka\"      %% \"akka-slf4j\"        % akkaVersion.value,\n  \"com.typesafe.akka\"      %% \"akka-http-testkit\" % \"10.1.5\" % \"test\",\n  \"org.specs2\"             %% \"specs2-core\"       % \"4.2.0\" % \"test\")\n\nassemblyMergeStrategy in assembly := {\n  case PathList(\"META-INF\", \"LICENSE.txt\") => MergeStrategy.concat\n  case PathList(\"META-INF\", \"NOTICE.txt\")  => MergeStrategy.concat\n  case x =>\n    val oldStrategy = (assemblyMergeStrategy in assembly).value\n    oldStrategy(x)\n}\n\nassemblyExcludedJars in assembly := {\n  val cp = (fullClasspath in assembly).value\n  cp filter { _.data.getName match {\n    case \"reflectasm-1.10.1.jar\" => true\n    case \"kryo-3.0.3.jar\" => true\n    case _ => false\n  }}\n}\n\nassemblyShadeRules in assembly := Seq(\n  ShadeRule.rename(\"org.objenesis.**\" -> \"shadeio.@1\").inLibrary(\"com.esotericsoftware.kryo\" % \"kryo\" % \"2.21\").inProject,\n  ShadeRule.rename(\"com.esotericsoftware.reflectasm.**\" -> \"shadeio.@1\").inLibrary(\"com.esotericsoftware.kryo\" % \"kryo\" % \"2.21\").inProject,\n  ShadeRule.rename(\"com.esotericsoftware.minlog.**\" -> \"shadeio.@1\").inLibrary(\"com.esotericsoftware.kryo\" % \"kryo\" % \"2.21\").inProject\n)\n\n// skip test in assembly\ntest in assembly := {}\n\nassemblyOutputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile /\n  \"assembly\" / \"src\" / \"universal\" / \"lib\" / s\"pio-assembly-${version.value}.jar\"\n\ncleanFiles += baseDirectory.value.getParentFile /\n  \"assembly\" / \"src\" / \"universal\" / \"lib\" \n\npomExtra := childrenPomExtra.value\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/Common.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.tools\n\nimport org.apache.predictionio.core.BuildInfo\nimport org.apache.predictionio.tools.ReturnTypes._\n\nimport grizzled.slf4j.Logging\nimport java.io.File\n\n\nobject ReturnTypes {\n  sealed case class Ok()\n\n  type MaybeError = Either[String, Ok]\n  type Expected[T] = Either[String, T]\n\n  val Success: MaybeError = Right(Ok())\n}\n\ntrait EitherLogging extends Logging {\n  import ReturnTypes._\n\n  protected def logAndFail[T](msg: => String): Expected[T] = {\n    error(msg)\n    Left(msg)\n  }\n\n  protected def logOnFail[T](msg: => String, t: => Throwable): Expected[T] = {\n    error(msg, t)\n    Left(msg)\n  }\n\n  protected def logAndReturn[T](value: T, msg: => Any): Expected[T] = {\n    info(msg)\n    Right(value)\n  }\n\n  protected def logAndSucceed(msg: => Any): MaybeError = {\n    info(msg)\n    Success\n  }\n}\n\nobject Common extends EitherLogging {\n\n  def getSparkHome(sparkHome: Option[String]): String = {\n    sparkHome getOrElse {\n      sys.env.getOrElse(\"SPARK_HOME\", \".\")\n    }\n  }\n\n  def getCoreDir(pioHome: String): String = {\n    if (new File(pioHome + File.separator + \"RELEASE\").exists) {\n      pioHome + File.separator + \"lib\"\n    } else {\n      Array(pioHome, \"assembly\", \"src\", \"universal\", \"lib\")\n        .mkString(File.separator)\n    }\n  }\n\n  def getEngineDirPath(directory: Option[String]): String = {\n    new File(directory.getOrElse(\".\")).getCanonicalPath\n  }\n\n  def jarFilesForScala(directory: String): Array[File] = {\n    def recursiveListFiles(f: File): Array[File] = {\n      Option(f.listFiles) map { these =>\n        these ++ these.filter(_.isDirectory).flatMap(recursiveListFiles)\n      } getOrElse Array[File]()\n    }\n    def jarFilesForScalaFilter(jars: Array[File]): Array[File] =\n      jars.filterNot { f =>\n        f.getName.toLowerCase.endsWith(\"-javadoc.jar\") ||\n        f.getName.toLowerCase.endsWith(\"-sources.jar\")\n      }\n    def jarFilesAt(path: File): Array[File] = recursiveListFiles(path) filter {\n      _.getName.toLowerCase.endsWith(\".jar\")\n    }\n\n    val engineDir = getEngineDirPath(Some(directory))\n    val libFiles = jarFilesForScalaFilter(\n      jarFilesAt(new File(engineDir, \"lib\")))\n    val targetFiles = jarFilesForScalaFilter(jarFilesAt(new File(engineDir,\n      \"target\" + File.separator + s\"scala-${BuildInfo.scalaBinaryVersion}\")))\n    // Use libFiles is target is empty.\n    if (targetFiles.size > 0) targetFiles else libFiles\n  }\n\n  def jarFilesForSpark(pioHome: String): Array[File] = {\n    def jarFilesAt(path: File): Array[File] = path.listFiles filter {\n      _.getName.toLowerCase.endsWith(\".jar\")\n    }\n    jarFilesAt(new File(getCoreDir(pioHome) + File.separator + \"spark\"))\n  }\n\n  def coreAssembly(pioHome: String): Expected[File] = {\n    val core = s\"pio-assembly-${BuildInfo.version}.jar\"\n    val coreFile = new File(getCoreDir(pioHome), core)\n    if (coreFile.exists) {\n      Right(coreFile)\n    } else {\n      logAndFail(s\"PredictionIO Core Assembly (${coreFile.getCanonicalPath}) does \" +\n        \"not exist. Aborting.\")\n    }\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/RunBatchPredict.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.tools\n\nimport org.apache.predictionio.tools.Common._\nimport org.apache.predictionio.tools.ReturnTypes._\nimport org.apache.predictionio.workflow.JsonExtractorOption\nimport org.apache.predictionio.workflow.JsonExtractorOption.JsonExtractorOption\n\nimport java.io.File\nimport grizzled.slf4j.Logging\n\nimport scala.sys.process._\n\ncase class BatchPredictArgs(\n  inputFilePath: String = \"batchpredict-input.json\",\n  outputFilePath: String = \"batchpredict-output.json\",\n  queryPartitions: Option[Int] = None,\n  variantJson: Option[File] = None,\n  jsonExtractor: JsonExtractorOption = JsonExtractorOption.Both)\n\n\nobject RunBatchPredict extends Logging {\n\n  def runBatchPredict(\n    engineInstanceId: String,\n    batchPredictArgs: BatchPredictArgs,\n    sparkArgs: SparkArgs,\n    pioHome: String,\n    engineDirPath: String,\n    verbose: Boolean = false): Expected[(Process, () => Unit)] = {\n\n    val jarFiles = jarFilesForScala(engineDirPath).map(_.toURI) ++\n      Option(new File(pioHome, \"plugins\").listFiles())\n        .getOrElse(Array.empty[File]).map(_.toURI)\n    val args = Seq[String](\n      \"--input\",\n      batchPredictArgs.inputFilePath,\n      \"--output\",\n      batchPredictArgs.outputFilePath,\n      \"--engineInstanceId\",\n      engineInstanceId,\n      \"--engine-variant\",\n      batchPredictArgs.variantJson.getOrElse(\n        new File(engineDirPath, \"engine.json\")).getCanonicalPath) ++\n      (if (batchPredictArgs.queryPartitions.isEmpty) Nil\n        else Seq(\"--query-partitions\",\n                  batchPredictArgs.queryPartitions.get.toString)) ++\n      (if (verbose) Seq(\"--verbose\") else Nil) ++\n      Seq(\"--json-extractor\", batchPredictArgs.jsonExtractor.toString)\n\n    Runner.runOnSpark(\n      \"org.apache.predictionio.workflow.BatchPredict\",\n      args, sparkArgs, jarFiles, pioHome, verbose)\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/RunServer.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.tools\n\nimport org.apache.predictionio.tools.Common._\nimport org.apache.predictionio.tools.ReturnTypes._\nimport org.apache.predictionio.workflow.JsonExtractorOption\nimport org.apache.predictionio.workflow.JsonExtractorOption.JsonExtractorOption\n\nimport java.io.File\nimport grizzled.slf4j.Logging\n\nimport scala.sys.process._\n\ncase class DeployArgs(\n  ip: String = \"0.0.0.0\",\n  port: Int = 8000,\n  logUrl: Option[String] = None,\n  logPrefix: Option[String] = None)\n\ncase class EventServerArgs(\n  enabled: Boolean = false,\n  ip: String = \"0.0.0.0\",\n  port: Int = 7070,\n  stats: Boolean = false)\n\ncase class ServerArgs(\n  deploy: DeployArgs = DeployArgs(),\n  eventServer: EventServerArgs = EventServerArgs(),\n  batch: String = \"\",\n  accessKey: String = \"\",\n  variantJson: Option[File] = None,\n  jsonExtractor: JsonExtractorOption = JsonExtractorOption.Both)\n\n\nobject RunServer extends Logging {\n\n  def runServer(\n    engineInstanceId: String,\n    serverArgs: ServerArgs,\n    sparkArgs: SparkArgs,\n    pioHome: String,\n    engineDirPath: String,\n    verbose: Boolean = false): Expected[(Process, () => Unit)] = {\n\n    val jarFiles = jarFilesForScala(engineDirPath).map(_.toURI) ++\n      Option(new File(pioHome, \"plugins\").listFiles())\n        .getOrElse(Array.empty[File]).map(_.toURI)\n    val args = Seq(\n      \"--engineInstanceId\",\n      engineInstanceId,\n      \"--engine-variant\",\n      serverArgs.variantJson.getOrElse(\n        new File(engineDirPath, \"engine.json\")).getCanonicalPath,\n      \"--ip\",\n      serverArgs.deploy.ip,\n      \"--port\",\n      serverArgs.deploy.port.toString,\n      \"--event-server-ip\",\n      serverArgs.eventServer.ip,\n      \"--event-server-port\",\n      serverArgs.eventServer.port.toString) ++\n      (if (serverArgs.accessKey != \"\") {\n        Seq(\"--accesskey\", serverArgs.accessKey)\n      } else {\n        Nil\n      }) ++\n      (if (serverArgs.eventServer.enabled) Seq(\"--feedback\") else Nil) ++\n      (if (serverArgs.batch != \"\") Seq(\"--batch\", serverArgs.batch) else Nil) ++\n      (if (verbose) Seq(\"--verbose\") else Nil) ++\n      serverArgs.deploy.logUrl.map(x => Seq(\"--log-url\", x)).getOrElse(Nil) ++\n      serverArgs.deploy.logPrefix.map(x => Seq(\"--log-prefix\", x)).getOrElse(Nil) ++\n      Seq(\"--json-extractor\", serverArgs.jsonExtractor.toString)\n\n    Runner.runOnSpark(\n      \"org.apache.predictionio.workflow.CreateServer\",\n      args, sparkArgs, jarFiles, pioHome, verbose)\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/RunWorkflow.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.tools\n\nimport org.apache.predictionio.tools.console.Console\nimport org.apache.predictionio.tools.Common._\nimport org.apache.predictionio.tools.ReturnTypes._\nimport org.apache.predictionio.workflow.JsonExtractorOption\nimport org.apache.predictionio.workflow.JsonExtractorOption.JsonExtractorOption\n\nimport java.io.File\nimport grizzled.slf4j.Logging\n\nimport scala.sys.process._\n\ncase class WorkflowArgs(\n  batch: String = \"\",\n  variantJson: Option[File] = None,\n  verbosity: Int = 0,\n  engineParamsKey: Option[String] = None,\n  engineFactory: Option[String] = None,\n  evaluation: Option[String] = None,\n  engineParamsGenerator: Option[String] = None,\n  stopAfterRead: Boolean = false,\n  stopAfterPrepare: Boolean = false,\n  skipSanityCheck: Boolean = false,\n  mainPyFile: Option[String] = None,\n  jsonExtractor: JsonExtractorOption = JsonExtractorOption.Both)\n\nobject RunWorkflow extends Logging {\n  def runWorkflow(\n    wa: WorkflowArgs,\n    sa: SparkArgs,\n    pioHome: String,\n    engineDirPath: String,\n    verbose: Boolean = false): Expected[(Process, () => Unit)] = {\n\n    val jarFiles = jarFilesForScala(engineDirPath).map(_.toURI)\n    val args =\n      {\n        val variantJson = wa.variantJson.getOrElse(new File(engineDirPath, \"engine.json\"))\n        val ei = Console.getEngineInfo(variantJson, engineDirPath)\n        Seq(\n          \"--engine-id\", ei.engineId,\n          \"--engine-version\", ei.engineVersion,\n          \"--engine-variant\", variantJson.toURI.toString)\n      } ++\n      wa.engineFactory.map(\n        x => Seq(\"--engine-factory\", x)).getOrElse(Nil) ++\n      wa.engineParamsKey.map(\n        x => Seq(\"--engine-params-key\", x)).getOrElse(Nil) ++\n      (if (wa.batch != \"\") Seq(\"--batch\", wa.batch) else Nil) ++\n      (if (verbose) Seq(\"--verbose\") else Nil) ++\n      (if (wa.skipSanityCheck) Seq(\"--skip-sanity-check\") else Nil) ++\n      (if (wa.stopAfterRead) Seq(\"--stop-after-read\") else Nil) ++\n      (if (wa.stopAfterPrepare) Seq(\"--stop-after-prepare\") else Nil) ++\n      wa.evaluation.map(x => Seq(\"--evaluation-class\", x)).\n        getOrElse(Nil) ++\n      // If engineParamsGenerator is specified, it overrides the evaluation.\n      wa.engineParamsGenerator.orElse(wa.evaluation)\n        .map(x => Seq(\"--engine-params-generator-class\", x))\n        .getOrElse(Nil) ++\n      Seq(\"--json-extractor\", wa.jsonExtractor.toString,\n          \"--verbosity\", wa.verbosity.toString)\n\n    val resourceName = wa.mainPyFile match {\n      case Some(x) => x\n      case _ => \"org.apache.predictionio.workflow.CreateWorkflow\"\n    }\n    Runner.runOnSpark(\n      resourceName,\n      args,\n      sa,\n      jarFiles,\n      pioHome,\n      verbose)\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/Runner.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.tools\n\nimport java.io.File\nimport java.net.URI\n\nimport org.apache.hadoop.conf.Configuration\nimport org.apache.hadoop.fs.{FileSystem, Path}\nimport org.apache.predictionio.tools.ReturnTypes._\nimport org.apache.predictionio.workflow.WorkflowUtils\n\nimport scala.collection.mutable\nimport scala.sys.process._\n\ncase class SparkArgs(\n  sparkHome: Option[String] = None,\n  sparkPassThrough: Seq[String] = Nil,\n  sparkKryo: Boolean = false,\n  scratchUri: Option[URI] = None)\n\nobject Runner extends EitherLogging {\n  def envStringToMap(env: String): Map[String, String] =\n    env.split(',').flatMap(p =>\n      p.split('=') match {\n        case Array(k, v) => List(k -> v)\n        case _ => Nil\n      }\n    ).toMap\n\n  def argumentValue(arguments: Seq[String], argumentName: String): Option[String] = {\n    val argumentIndex = arguments.indexOf(argumentName)\n    try {\n      arguments(argumentIndex) // just to make it error out if index is -1\n      Some(arguments(argumentIndex + 1))\n    } catch {\n      case e: IndexOutOfBoundsException => None\n    }\n  }\n\n  def handleScratchFile(\n      fileSystem: Option[FileSystem],\n      uri: Option[URI],\n      localFile: File): String = {\n    val localFilePath = localFile.getCanonicalPath\n    (fileSystem, uri) match {\n      case (Some(fs), Some(u)) =>\n        val dest = fs.makeQualified(Path.mergePaths(\n          new Path(u),\n          new Path(localFilePath)))\n        info(s\"Copying $localFile to ${dest.toString}\")\n        fs.copyFromLocalFile(new Path(localFilePath), dest)\n        dest.toUri.toString\n      case _ => localFile.toURI.toString\n    }\n  }\n\n  def cleanup(fs: Option[FileSystem], uri: Option[URI]): Unit = {\n    (fs, uri) match {\n      case (Some(f), Some(u)) =>\n        f.close()\n      case _ => ()\n    }\n  }\n\n  def detectFilePaths(\n      fileSystem: Option[FileSystem],\n      uri: Option[URI],\n      args: Seq[String]): Seq[String] = {\n    args map { arg =>\n      val f = try {\n        new File(new URI(arg))\n      } catch {\n        case e: Throwable => new File(arg)\n      }\n      if (f.exists()) {\n        handleScratchFile(fileSystem, uri, f)\n      } else {\n        arg\n      }\n    }\n  }\n\n  /** Group argument values by argument names\n    *\n    * This only works with long argument names immediately followed by a value\n    *\n    * Input:\n    * Seq(\"--foo\", \"bar\", \"--flag\", \"--dead\", \"beef baz\", \"n00b\", \"--foo\", \"jeez\")\n    *\n    * Output:\n    * Map(\"--foo\" -> Seq(\"bar\", \"jeez\"), \"--dead\"- > \"beef baz\")\n    *\n    * @param arguments Sequence of argument names and values\n    * @return A map with argument values keyed by the same argument name\n    */\n  def groupByArgumentName(arguments: Seq[String]): Map[String, Seq[String]] = {\n    val argumentMap = mutable.HashMap.empty[String, Seq[String]]\n    arguments.foldLeft(\"\") { (prev, current) =>\n      if (prev.startsWith(\"--\") && !current.startsWith(\"--\")) {\n        if (argumentMap.contains(prev)) {\n          argumentMap(prev) = argumentMap(prev) :+ current\n        } else {\n          argumentMap(prev) = Seq(current)\n        }\n      }\n      current\n    }\n    argumentMap.toMap\n  }\n\n  /** Remove argument names and values\n    *\n    * This only works with long argument names immediately followed by a value\n    *\n    * Input:\n    * Seq(\"--foo\", \"bar\", \"--flag\", \"--dead\", \"beef baz\", \"n00b\", \"--foo\", \"jeez\")\n    * Set(\"--flag\", \"--foo\")\n    *\n    * Output:\n    * Seq(\"--flag\", \"--dead\", \"beef baz\", \"n00b\")\n    *\n    * @param arguments Sequence of argument names and values\n    * @param remove Name of argument and associated values to remove\n    * @return Sequence of argument names and values with targets removed\n    */\n  def removeArguments(arguments: Seq[String], remove: Set[String]): Seq[String] = {\n    if (remove.isEmpty) {\n      arguments\n    } else {\n      arguments.foldLeft(Seq.empty[String]) { (ongoing, current) =>\n        if (ongoing.isEmpty) {\n          Seq(current)\n        } else {\n          if (remove.contains(ongoing.last) && !current.startsWith(\"--\")) {\n            ongoing.take(ongoing.length - 1)\n          } else {\n            ongoing :+ current\n          }\n        }\n      }\n    }\n  }\n\n  /** Combine repeated arguments together\n    *\n    * Input:\n    * Seq(\"--foo\", \"bar\", \"--flag\", \"--dead\", \"beef baz\", \"n00b\", \"--foo\", \"jeez\")\n    * Map(\"--foo\", (_ + _))\n    *\n    * Output:\n    * Seq(\"--flag\", \"--dead\", \"beef baz\", \"n00b\", \"--foo\", \"bar jeez\")\n    *\n    * @param arguments Sequence of argument names and values\n    * @param combinators Map of argument name to combinator function\n    * @return Sequence of argument names and values with specific argument values combined\n    */\n  def combineArguments(\n      arguments: Seq[String],\n      combinators: Map[String, (String, String) => String]): Seq[String] = {\n    val argumentsToCombine: Map[String, Seq[String]] =\n      groupByArgumentName(arguments).filterKeys(combinators.keySet.contains(_))\n    val argumentsMinusToCombine = removeArguments(arguments, combinators.keySet)\n    val combinedArguments = argumentsToCombine flatMap { kv =>\n      Seq(kv._1, kv._2.reduce(combinators(kv._1)))\n    }\n    argumentsMinusToCombine ++ combinedArguments\n  }\n\n  def runOnSpark(\n      resourceName: String,\n      classArgs: Seq[String],\n      sa: SparkArgs,\n      extraJars: Seq[URI],\n      pioHome: String,\n      verbose: Boolean = false): Expected[(Process, () => Unit)] = {\n    // Return error for unsupported cases\n    val deployMode =\n      argumentValue(sa.sparkPassThrough, \"--deploy-mode\").getOrElse(\"client\")\n    val master =\n      argumentValue(sa.sparkPassThrough, \"--master\").getOrElse(\"local\")\n    val isPython = resourceName match {\n      case x if x.endsWith(\".py\") => true\n      case _ => false\n    }\n\n    (sa.scratchUri, deployMode, master) match {\n      case (Some(u), \"client\", m) if m != \"yarn-cluster\" =>\n        return logAndFail(\"--scratch-uri cannot be set when deploy mode is client\")\n      case (_, \"cluster\", m) if m.startsWith(\"spark://\") =>\n        return logAndFail(\n          \"Using cluster deploy mode with Spark standalone cluster is not supported\")\n      case _ => ()\n    }\n\n    // Initialize HDFS API for scratch URI\n    val fs = sa.scratchUri map { uri =>\n      FileSystem.get(uri, new Configuration())\n    }\n\n    // Collect and serialize PIO_* environmental variables\n    val pioEnvVars = sys.env.filter(kv => kv._1.startsWith(\"PIO_\")).map(kv =>\n      s\"${kv._1}=${kv._2}\"\n    ).mkString(\",\")\n\n    // Location of Spark\n    val sparkHome = sa.sparkHome.getOrElse(\n      sys.env.getOrElse(\"SPARK_HOME\", \".\"))\n\n    // Local path to PredictionIO assembly JAR\n    val assemblyJar = Common.coreAssembly(pioHome) fold(\n      errStr => return Left(errStr),\n      assembly => handleScratchFile(fs, sa.scratchUri, assembly)\n    )\n    val mainJar = if(isPython) {\n      resourceName\n    } else {\n      assemblyJar\n    }\n\n    // Extra JARs that are needed by the driver\n    val driverClassPathPrefix =\n      argumentValue(sa.sparkPassThrough, \"--driver-class-path\") map { v =>\n        Seq(v)\n      } getOrElse {\n        Nil\n      }\n\n    val extraClasspaths =\n      driverClassPathPrefix ++ WorkflowUtils.thirdPartyClasspaths\n\n    // Extra files that are needed to be passed to --files\n    val extraFiles = WorkflowUtils.thirdPartyConfFiles map { f =>\n      handleScratchFile(fs, sa.scratchUri, new File(f))\n    }\n\n    val deployedJars = extraJars map { j =>\n      handleScratchFile(fs, sa.scratchUri, new File(j))\n    }\n\n    val sparkSubmitCommand =\n      Seq(Seq(sparkHome, \"bin\", \"spark-submit\").mkString(File.separator))\n\n    val sparkSubmitJarsList = if(isPython) {\n      WorkflowUtils.thirdPartyJars ++ deployedJars ++\n        Common.jarFilesForSpark(pioHome).map(_.toURI) ++ Seq(new URI(assemblyJar))\n    } else {\n       WorkflowUtils.thirdPartyJars ++ deployedJars ++\n        Common.jarFilesForSpark(pioHome).map(_.toURI)\n    }\n    val sparkSubmitJars = if (sparkSubmitJarsList.nonEmpty) {\n      Seq(\"--jars\", sparkSubmitJarsList.map(_.toString).mkString(\",\"))\n    } else {\n      Nil\n    }\n\n    val sparkSubmitFiles = if (extraFiles.nonEmpty) {\n      Seq(\"--files\", extraFiles.mkString(\",\"))\n    } else {\n      Nil\n    }\n\n    val sparkSubmitExtraClasspaths = if (extraClasspaths.nonEmpty) {\n      Seq(\"--driver-class-path\", extraClasspaths.mkString(\":\"))\n    } else {\n      Nil\n    }\n\n    val sparkSubmitKryo = if (sa.sparkKryo) {\n      Seq(\n        \"--conf\",\n        \"spark.serializer=org.apache.spark.serializer.KryoSerializer\")\n    } else {\n      Nil\n    }\n\n    val className = if(isPython) {\n      Nil\n    } else {\n      Seq(\"--class\", resourceName)\n    }\n\n    val verboseArg = if (verbose) Seq(\"--verbose\") else Nil\n    val pioLogDir = Option(System.getProperty(\"pio.log.dir\")).getOrElse(s\"$pioHome/log\")\n\n    val sparkSubmitArgs = Seq(\n      sa.sparkPassThrough,\n      className,\n      sparkSubmitJars,\n      sparkSubmitFiles,\n      sparkSubmitExtraClasspaths,\n      sparkSubmitKryo,\n      Seq(\"--driver-java-options\", s\"-Dpio.log.dir=$pioLogDir\")).flatten\n\n    val whitespaceCombinator = (a: String, b: String) => s\"$a $b\"\n    val combinators = Map(\"--driver-java-options\" -> whitespaceCombinator)\n\n    val sparkSubmit = Seq(\n      sparkSubmitCommand,\n      combineArguments(sparkSubmitArgs, combinators),\n      Seq(mainJar),\n      detectFilePaths(fs, sa.scratchUri, classArgs),\n      Seq(\"--env\", pioEnvVars),\n      verboseArg).flatten.filter(_ != \"\")\n    info(s\"Submission command: ${sparkSubmit.mkString(\" \")}\")\n    val extraEnv: Seq[(String, String)] = if(isPython) {\n      Seq(\"CLASSPATH\" -> \"\",\n        \"SPARK_YARN_USER_ENV\" -> pioEnvVars,\n        \"PYTHONPATH\" -> s\"$pioHome/python\")\n    } else {\n      Seq(\"CLASSPATH\" -> \"\",\n        \"SPARK_YARN_USER_ENV\" -> pioEnvVars)\n    }\n    val proc = Process(\n      sparkSubmit,\n      None,\n      extraEnv:_*).run()\n    Right((proc, () => cleanup(fs, sa.scratchUri)))\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/admin/AdminAPI.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\npackage org.apache.predictionio.tools.admin\n\nimport java.util.concurrent.TimeUnit\n\nimport akka.http.scaladsl.server._\nimport org.apache.predictionio.data.storage._\n\nimport scala.concurrent.duration.Duration\nimport scala.concurrent.{Await, ExecutionContext}\nimport akka.actor.ActorSystem\nimport akka.stream.ActorMaterializer\nimport akka.http.scaladsl.Http\nimport akka.http.scaladsl.model._\nimport akka.http.scaladsl.server.Directives._\nimport akka.util.Timeout\nimport org.apache.predictionio.akkahttpjson4s.Json4sSupport._\nimport org.json4s.{DefaultFormats, Formats}\n\nobject Json4sProtocol {\n  implicit val serialization = org.json4s.jackson.Serialization\n  implicit def json4sFormats: Formats = DefaultFormats\n}\n\ncase class AdminServerConfig(\n  ip: String = \"localhost\",\n  port: Int = 7071\n)\n\nobject AdminServer {\n  import Json4sProtocol._\n\n  private implicit val timeout: Timeout = Timeout(5, TimeUnit.SECONDS)\n\n  // for better message response\n  private val rejectionHandler = RejectionHandler.newBuilder().handle {\n    case MalformedRequestContentRejection(msg, _) =>\n      complete(StatusCodes.BadRequest, Map(\"message\" -> msg))\n    case MissingQueryParamRejection(msg) =>\n      complete(StatusCodes.NotFound,\n        Map(\"message\" -> s\"missing required query parameter ${msg}.\"))\n    case AuthenticationFailedRejection(cause, challengeHeaders) =>\n      complete(StatusCodes.Unauthorized, challengeHeaders,\n        Map(\"message\" -> s\"Invalid accessKey.\"))\n  }.result()\n\n  def createRoute()(implicit executionContext: ExecutionContext): Route = {\n\n    val commandClient = new CommandClient(\n      appClient = Storage.getMetaDataApps,\n      accessKeyClient = Storage.getMetaDataAccessKeys,\n      eventClient = Storage.getLEvents()\n    )\n\n    val route =\n      pathSingleSlash {\n        get {\n          complete(Map(\"status\" -> \"alive\"))\n        }\n      } ~\n      path(\"cmd\" / \"app\" / Segment / \"data\") {\n        appName => {\n          delete {\n            complete(commandClient.futureAppDataDelete(appName))\n          }\n        }\n      } ~\n      path(\"cmd\" / \"app\" / Segment) {\n        appName => {\n          delete {\n            complete(commandClient.futureAppDelete(appName))\n          }\n        }\n      } ~\n      path(\"cmd\" / \"app\") {\n        get {\n          complete(commandClient.futureAppList())\n        } ~\n        post {\n          entity(as[AppRequest]) {\n            appArgs =>\n              onSuccess(commandClient.futureAppNew(appArgs)){\n                case res: GeneralResponse => complete(res)\n                case res: AppNewResponse  => complete(res)\n              }\n          }\n        }\n      }\n\n    route\n  }\n\n\n  def createAdminServer(config: AdminServerConfig): ActorSystem = {\n    implicit val system = ActorSystem(\"AdminServerSystem\")\n    implicit val materializer = ActorMaterializer()\n    implicit val executionContext = system.dispatcher\n\n    val route = createRoute()\n    Http().bindAndHandle(route, config.ip, config.port)\n    system\n  }\n}\n\nobject AdminRun {\n  def main (args: Array[String]) : Unit = {\n    val f = AdminServer.createAdminServer(AdminServerConfig(\n      ip = \"localhost\",\n      port = 7071))\n    .whenTerminated\n\n    Await.ready(f, Duration.Inf)\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/admin/CommandClient.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.tools.admin\n\nimport org.apache.predictionio.data.storage._\n\nimport scala.concurrent.{ExecutionContext, Future}\n\nabstract class BaseResponse()\n\ncase class GeneralResponse(\n  status: Int = 0,\n  message: String = \"\"\n) extends BaseResponse()\n\ncase class AppRequest(\n  id: Int = 0,\n  name: String = \"\",\n  description: String = \"\"\n)\n\ncase class TrainRequest(\n  enginePath: String = \"\"\n)\ncase class AppResponse(\n  id: Int = 0,\n  name: String = \"\",\n  keys: Seq[AccessKey]\n) extends BaseResponse()\n\ncase class AppNewResponse(\n  status: Int = 0,\n  message: String = \"\",\n  id: Int = 0,\n  name: String = \"\",\n  key: String\n) extends BaseResponse()\n\ncase class AppListResponse(\n  status: Int = 0,\n  message: String = \"\",\n  apps: Seq[AppResponse]\n) extends BaseResponse()\n\nclass CommandClient(\n  val appClient: Apps,\n  val accessKeyClient: AccessKeys,\n  val eventClient: LEvents\n) {\n\n  def futureAppNew(req: AppRequest)(implicit ec: ExecutionContext): Future[BaseResponse] = Future {\n    val response = appClient.getByName(req.name) map { app =>\n      GeneralResponse(0, s\"App ${req.name} already exists. Aborting.\")\n    } getOrElse {\n      appClient.get(req.id) map {\n        app2 =>\n          GeneralResponse(0,\n              s\"App ID ${app2.id} already exists and maps to the app '${app2.name}'. \" +\n              \"Aborting.\")\n      } getOrElse {\n        val appid = appClient.insert(App(\n          id = Option(req.id).getOrElse(0),\n          name = req.name,\n          description = Option(req.description)))\n        appid map { id =>\n          val dbInit = eventClient.init(id)\n          val r = if (dbInit) {\n            val accessKey = AccessKey(\n              key = \"\",\n              appid = id,\n              events = Nil)\n            val accessKey2 = accessKeyClient.insert(AccessKey(\n              key = \"\",\n              appid = id,\n              events = Nil))\n            accessKey2 map { k =>\n              new AppNewResponse(1,\"App created successfully.\",id, req.name, k)\n            } getOrElse {\n              GeneralResponse(0, s\"Unable to create new access key.\")\n            }\n          } else {\n            GeneralResponse(0, s\"Unable to initialize Event Store for this app ID: ${id}.\")\n          }\n          r\n        } getOrElse {\n          GeneralResponse(0, s\"Unable to create new app.\")\n        }\n      }\n    }\n    response\n  }\n\n  def futureAppList()(implicit ec: ExecutionContext): Future[AppListResponse] = Future {\n    val apps = appClient.getAll().sortBy(_.name)\n    val appsRes = apps.map {\n      app => {\n        new AppResponse(app.id, app.name, accessKeyClient.getByAppid(app.id))\n      }\n    }\n    new AppListResponse(1, \"Successful retrieved app list.\", appsRes)\n  }\n\n  def futureAppDataDelete(appName: String)\n      (implicit ec: ExecutionContext): Future[GeneralResponse] = Future {\n    val response = appClient.getByName(appName) map { app =>\n      val data = if (eventClient.remove(app.id)) {\n        GeneralResponse(1, s\"Removed Event Store for this app ID: ${app.id}\")\n      } else {\n        GeneralResponse(0, s\"Error removing Event Store for this app.\")\n      }\n\n      val dbInit = eventClient.init(app.id)\n      val data2 = if (dbInit) {\n        GeneralResponse(1, s\"Initialized Event Store for this app ID: ${app.id}.\")\n      } else {\n        GeneralResponse(0, s\"Unable to initialize Event Store for this appId:\" +\n          s\" ${app.id}.\")\n      }\n      GeneralResponse(data.status * data2.status, data.message + data2.message)\n    } getOrElse {\n      GeneralResponse(0, s\"App ${appName} does not exist.\")\n    }\n    response\n  }\n\n  def futureAppDelete(appName: String)\n      (implicit ec: ExecutionContext): Future[GeneralResponse] = Future {\n\n    val response = appClient.getByName(appName) map { app =>\n      val data = if (eventClient.remove(app.id)) {\n        Storage.getMetaDataApps.delete(app.id)\n        GeneralResponse(1, s\"App successfully deleted\")\n      } else {\n        GeneralResponse(0, s\"Error removing Event Store for app ${app.name}.\");\n      }\n      data\n    } getOrElse {\n      GeneralResponse(0, s\"App ${appName} does not exist.\")\n    }\n    response\n  }\n\n  def futureTrain(req: TrainRequest)\n      (implicit ec: ExecutionContext): Future[GeneralResponse] = Future {\n    null\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/admin/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one or more\ncontributor license agreements.  See the NOTICE file distributed with\nthis work for additional information regarding copyright ownership.\nThe ASF licenses this file to You under the Apache License, Version 2.0\n(the \"License\"); you may not use this file except in compliance with\nthe License.  You may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n-->\n\n## Admin API (under development)\n\n### Start Admin HTTP Server without bin/pio (for development)\n\nNOTE: elasticsearch and hbase should be running first.\n\n```\n$ sbt/sbt \"tools/compile\"\n$ set -a\n$ source conf/pio-env.sh\n$ set +a\n$ sbt/sbt \"tools/runMain org.apache.predictionio.tools.admin.AdminRun\"\n```\n\n### Unit test (Very minimal)\n\n```\n$ set -a\n$ source conf/pio-env.sh\n$ set +a\n$ sbt/sbt \"tools/testOnly org.apache.predictionio.tools.admin.AdminAPISpec\"\n```\n\n### Start with pio command adminserver\n\n```\n$ pio adminserver\n```\n\nAdmin Server url defaults to `http://localhost:7071`\n\nThe host and port can be specified by using the 'ip' and 'port' parameters\n\n```\n$ pio adminserver --ip 127.0.0.1 --port 7080\n```\n\n### Current Supported Commands\n\n#### Check status\n\n```\n$ curl -i http://localhost:7071/\n\n{\"status\":\"alive\"}\n```\n\n#### Get list of apps\n\n```\n$ curl -i -X GET http://localhost:7071/cmd/app\n\n{\"status\":1,\"message\":\"Successful retrieved app list.\",\"apps\":[{\"id\":12,\"name\":\"scratch\",\"keys\":[{\"key\":\"gtPgVMIr3uthus1QJWFBcIjNf6d1SNuhaOWQAgdLbOBP1eRWMNIJWl6SkHgI1OoN\",\"appid\":12,\"events\":[]}]},{\"id\":17,\"name\":\"test-ecommercerec\",\"keys\":[{\"key\":\"zPkr6sBwQoBwBjVHK2hsF9u26L38ARSe19QzkdYentuomCtYSuH0vXP5fq7advo4\",\"appid\":17,\"events\":[]}]}]}\n```\n\n#### Create a new app\n\n```\n$ curl -i -X POST http://localhost:7071/cmd/app \\\n-H \"Content-Type: application/json\" \\\n-d '{ \"name\" : \"my_new_app\" }'\n\n{\"status\":1,\"message\":\"App created successfully.\",\"id\":19,\"name\":\"my_new_app\",\"keys\":[{\"key\":\"\",\"appid\":19,\"events\":[]}]}\n```\n\n#### Delete data of app\n\n```\n$ curl -i -X DELETE http://localhost:7071/cmd/app/my_new_app/data\n```\n\n#### Delete app\n\n```\n$ curl -i -X DELETE http://localhost:7071/cmd/app/my_new_app\n\n{\"status\":1,\"message\":\"App successfully deleted\"}\n```\n\n\n## API Doc (To be updated)\n\n### app list:\nGET http://localhost:7071/cmd/app\n\nOK Response:\n{\n  “status”: <STATUS>,\n  “message”: <MESSAGE>,\n  “apps” : [\n    { “name': “<APP_NAME>”,\n      “id': <APP_ID>,\n      “accessKey' : “<ACCESS_KEY>” },\n    { “name': “<APP_NAME>”,\n      “id': <APP_ID>,\n      “accessKey' : “<ACCESS_KEY>” }, ... ]\n}\n\nError Response:\n{“status”: <STATUS>, “message” : “<MESSAGE>”}\n\n### app new\nPOST http://localhost:7071/cmd/app\nRequest Body:\n{ name”: “<APP_NAME>”, // required\n  “id”: <APP_ID>, // optional\n  “description”: “<DESCRIPTION>” } // optional\n\nOK Response:\n{ “status”: <STATUS>,\n  “message”: <MESSAGE>,\n  “app” : {\n    “name”: “<APP_NAME>”,\n    “id”: <APP_ID>,\n    “accessKey” : “<ACCESS_KEY>” }\n}\n\nError Response:\n{ “status”: <STATUS>, “message” : “<MESSAGE>”}\n\n### app delete\nDELETE http://localhost:7071/cmd/app/{appName}\n\nOK Response:\n{ \"status\": <STATUS>, \"message\" : “<MESSAGE>”}\n\nError Response:\n{ “status”: <STATUS>, “message” : “<MESSAGE>”}\n\n### app data-delete\nDELETE http://localhost:7071/cmd/app/{appName}/data\n\nOK Response:\n{ \"status\": <STATUS>, \"message\" : “<MESSAGE>”}\n\nError Response:\n{ “status”: <STATUS>, “message” : “<MESSAGE>” }\n\n\n### train TBD\n\n#### Training request:\nPOST http://localhost:7071/cmd/train\nRequest body: TBD\n\nOK Response: TBD\n\nError Response: TBD\n\n#### Get training status:\nGET http://localhost:7071/cmd/train/{engineInstanceId}\n\nOK Response: TBD\nINIT\nTRAINING\nDONE\nERROR\n\nError Response: TBD\n\n### deploy TBD\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/commands/AccessKey.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.tools.commands\n\nimport org.apache.predictionio.data.storage\nimport org.apache.predictionio.tools.EitherLogging\nimport org.apache.predictionio.tools.ReturnTypes._\n\nobject AccessKey extends EitherLogging {\n\n  def create(\n    appName: String,\n    key: String,\n    events: Seq[String]): Expected[storage.AccessKey] = {\n\n    val apps = storage.Storage.getMetaDataApps\n    apps.getByName(appName) map { app =>\n      val accessKeys = storage.Storage.getMetaDataAccessKeys\n      val newKey = storage.AccessKey(\n        key = key,\n        appid = app.id,\n        events = events)\n      accessKeys.insert(newKey) map { k =>\n        info(s\"Created new access key: ${k}\")\n        Right(newKey.copy(key = k))\n      } getOrElse {\n        logAndFail(s\"Unable to create new access key.\")\n      }\n    } getOrElse {\n      logAndFail(s\"App ${appName} does not exist. Aborting.\")\n    }\n  }\n\n  def list(app: Option[String]): Expected[Seq[storage.AccessKey]] =\n    app map { appName =>\n      App.show(appName).right map { appChansPair => appChansPair._1.keys }\n    } getOrElse {\n      Right(storage.Storage.getMetaDataAccessKeys.getAll)\n    }\n\n  def delete(key: String): MaybeError = {\n    try {\n      storage.Storage.getMetaDataAccessKeys.delete(key)\n      logAndSucceed(s\"Deleted access key ${key}.\")\n    } catch {\n      case e: Exception =>\n        error(s\"Error deleting access key ${key}.\", e)\n        Left(s\"Error deleting access key ${key}.\")\n    }\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/commands/App.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.tools.commands\n\nimport org.apache.predictionio.data.storage\nimport org.apache.predictionio.data.storage.Channel\nimport org.apache.predictionio.tools.EitherLogging\nimport org.apache.predictionio.tools.ReturnTypes._\n\nsealed case class AppDescription(\n  app: storage.App,\n  keys: Seq[storage.AccessKey])\n\nobject App extends EitherLogging {\n\n  def create(\n    name: String,\n    id: Option[Int] = None,\n    description: Option[String] = None,\n    accessKey: String = \"\") : Expected[AppDescription] = {\n\n    val apps = storage.Storage.getMetaDataApps()\n    // get the client in the beginning so error exit right away if can't access client\n    val events = storage.Storage.getLEvents()\n    var errStr = \"\"\n\n    apps.getByName(name) map { app =>\n      errStr = s\"App ${name} already exists. Aborting.\"\n      error(errStr)\n      errStr\n    } orElse {\n      id.flatMap { id =>\n        apps.get(id) map { app =>\n          errStr = s\"App ID ${id} already exists and maps to the app '${app.name}'. \" +\n            \"Aborting.\"\n          error(errStr)\n          errStr\n        }\n      }\n    } map {err => Left(err)} getOrElse {\n      val newApp = storage.App(\n        id = id.getOrElse(0),\n        name = name,\n        description = description)\n      val appid = apps.insert(newApp)\n\n      appid map { id =>\n        val dbInit = events.init(id)\n        val r = if (dbInit) {\n          info(s\"Initialized Event Store for this app ID: ${id}.\")\n          val accessKeys = storage.Storage.getMetaDataAccessKeys\n          val newKey = storage.AccessKey(\n            key = accessKey,\n            appid = id,\n            events = Nil)\n          accessKeys.insert(newKey)\n          .map { k =>\n            Right(AppDescription(\n              app = newApp.copy(id = id),\n              keys = Seq(newKey.copy(key = k))))\n          } getOrElse {\n            logAndFail(s\"Unable to create new access key.\")\n          }\n        } else {\n          errStr = s\"Unable to initialize Event Store for this app ID: ${id}.\"\n          try {\n            apps.delete(id)\n          } catch {\n            case e: Exception =>\n              errStr += s\"\"\"\n                |Failed to revert back the App meta-data change.\n                |The app ${name} CANNOT be used!\n                |Please run 'pio app delete ${name}' to delete this app!\"\"\".stripMargin\n          }\n          logAndFail(errStr)\n        }\n        events.close()\n        r\n      } getOrElse {\n        logAndFail(s\"Unable to create new app.\")\n      }\n    }\n  }\n\n  def list: Seq[AppDescription] = {\n    val apps = storage.Storage.getMetaDataApps.getAll().sortBy(_.name)\n    val accessKeys = storage.Storage.getMetaDataAccessKeys\n\n    apps map { app =>\n      AppDescription(\n        app = app,\n        keys = accessKeys.getByAppid(app.id))\n    }\n  }\n\n  def show(appName: String): Expected[(AppDescription, Seq[Channel])] = {\n    val apps = storage.Storage.getMetaDataApps\n    val accessKeys = storage.Storage.getMetaDataAccessKeys\n    val channels = storage.Storage.getMetaDataChannels\n\n    apps.getByName(appName) map { app =>\n      Right(\n        (AppDescription(\n          app = app,\n          keys = accessKeys.getByAppid(app.id)),\n        channels.getByAppid(app.id))\n      )\n    } getOrElse {\n      logAndFail(s\"App ${appName} does not exist. Aborting.\")\n    }\n  }\n\n  def delete(name: String): MaybeError = {\n    val events = storage.Storage.getLEvents()\n    try {\n      show(name).right.flatMap { case (appDesc: AppDescription, channels: Seq[Channel]) =>\n\n        val delChannelStatus: MaybeError =\n          channels.map { ch =>\n            if (events.remove(appDesc.app.id, Some(ch.id))) {\n              info(s\"Removed Event Store of the channel ID: ${ch.id}\")\n              try {\n                storage.Storage.getMetaDataChannels.delete(ch.id)\n                info(s\"Deleted channel ${ch.name}\")\n                None\n              } catch {\n                case e: Exception =>\n                  val errStr = s\"Error deleting channel ${ch.name}.\"\n                  error(errStr, e)\n                  Some(errStr)\n              }\n              } else {\n                val errStr = s\"Error removing Event Store of the channel ID: ${ch.id}.\"\n                error(errStr)\n                Some(errStr)\n              }\n          }\n          .flatten\n          .reduceOption(_ + \"\\n\" + _)\n          .map(Left(_)) getOrElse Success\n\n          if (delChannelStatus.isLeft) {\n            return delChannelStatus\n          }\n\n          try {\n            events.remove(appDesc.app.id)\n            info(s\"Removed Event Store for this app ID: ${appDesc.app.id}\")\n          } catch {\n            case e: Exception =>\n              logAndFail(s\"Error removing Event Store for this app. Aborting.\")\n          }\n\n          appDesc.keys foreach { key =>\n            try {\n              storage.Storage.getMetaDataAccessKeys.delete(key.key)\n              info(s\"Removed access key ${key.key}\")\n            } catch {\n              case e: Exception =>\n                logAndFail(s\"Error removing access key ${key.key}. Aborting.\")\n            }\n          }\n\n          try {\n            storage.Storage.getMetaDataApps.delete(appDesc.app.id)\n            info(s\"Deleted app ${appDesc.app.name}.\")\n          } catch {\n            case e: Exception =>\n              logAndFail(s\"Error deleting app ${appDesc.app.name}. Aborting.\")\n          }\n          logAndSucceed(\"Done.\")\n      }\n\n    } finally {\n      events.close()\n    }\n  }\n\n  def dataDelete(\n    name: String,\n    channel: Option[String] = None,\n    all: Boolean = false): MaybeError = {\n\n    var errStr = \"\"\n    val events = storage.Storage.getLEvents()\n    try {\n      show(name).right.flatMap { case (appDesc: AppDescription, channels: Seq[Channel]) =>\n\n        val chanIdsToRemove: Seq[Option[Int]] =\n          if (all) {\n            channels.map(ch => Some(ch.id)) :+ None // remove default channel too\n          } else {\n            channel.map { chName =>\n              channels.find(ch => ch.name == chName) match {\n                case None =>\n                  return logAndFail(s\"\"\"Unable to delete data for channel.\n                              |Channel ${chName} doesn't exist.\"\"\".stripMargin)\n                case Some(ch) => Seq(Some(ch.id))\n              }\n            } getOrElse {\n              Seq(None) // for default channel\n            }\n          }\n\n        chanIdsToRemove.map { chId: Option[Int] =>\n\n          val r1 = if (events.remove(appDesc.app.id, chId)) {\n            if (chId.isDefined) {\n              info(s\"Removed Event Store for the channel ID: ${chId.get}\")\n            } else {\n              info(s\"Removed Event Store for the app ID: ${appDesc.app.id}\")\n            }\n            None\n          } else {\n            errStr =\n              if (chId.isDefined) s\"Error removing Event Store for the channel ID: ${chId.get}.\"\n              else s\"Error removing Event Store for the app ID: ${appDesc.app.id}.\"\n            error(errStr)\n            Some(errStr)\n          }\n          // re-create table\n          val dbInit = events.init(appDesc.app.id, chId)\n          val r2 = if (dbInit) {\n            if (chId.isDefined) {\n              info(s\"Initialized Event Store for the channel ID: ${chId.get}\")\n            } else {\n              info(s\"Initialized Event Store for the app ID: ${appDesc.app.id}\")\n            }\n            None\n          } else {\n            errStr =\n              if (chId.isDefined) {\n                s\"Unable to initialize Event Store for the channel ID: ${chId.get}.\"\n              } else {\n                s\"Unable to initialize Event tore for the app ID: ${appDesc.app.id}.\"\n              }\n            error(errStr)\n            Some(errStr)\n          }\n          Seq(r1, r2)\n        }\n        .flatten.flatten\n        .reduceOption(_ + \"\\n\" + _)\n        .toLeft(Ok())\n      }\n    } finally {\n      events.close()\n    }\n  }\n\n  def channelNew(appName: String, newChannel: String): Expected[Channel] = {\n    val events = storage.Storage.getLEvents()\n    val chanStorage = storage.Storage.getMetaDataChannels\n    var errStr = \"\"\n    try {\n      show(appName).right flatMap { case (appDesc: AppDescription, channels: Seq[Channel]) =>\n        if (channels.find(ch => ch.name == newChannel).isDefined) {\n          logAndFail(s\"\"\"Channel ${newChannel} already exists.\n                      |Unable to create new channel.\"\"\".stripMargin)\n        } else if (!storage.Channel.isValidName(newChannel)) {\n          logAndFail(s\"\"\"Unable to create new channel.\n                      |The channel name ${newChannel} is invalid.\n                      |${storage.Channel.nameConstraint}\"\"\".stripMargin)\n        } else {\n\n          val channel = Channel(\n            id = 0,\n            appid = appDesc.app.id,\n            name = newChannel)\n\n          chanStorage.insert(channel) map { chanId =>\n\n            info(s\"Updated Channel meta-data.\")\n\n            // initialize storage\n            val dbInit = events.init(appDesc.app.id, Some(chanId))\n            if (dbInit) {\n              info(s\"Initialized Event Store for the channel: ${newChannel}.\")\n              info(s\"Created new channel:\")\n              info(s\"    Channel Name: ${newChannel}\")\n              info(s\"      Channel ID: ${chanId}\")\n              info(s\"          App ID: ${appDesc.app.id}\")\n              Right(channel.copy(id = chanId))\n            } else {\n              errStr = s\"\"\"Unable to create new channel.\n                          |Failed to initialize Event Store.\"\"\".stripMargin\n              error(errStr)\n              // reverted back the meta data\n              try {\n                chanStorage.delete(chanId)\n                Left(errStr)\n              } catch {\n                case e: Exception =>\n                  val nextErrStr = (s\"\"\"\n                    |Failed to revert back the Channel meta-data change.\n                    |The channel ${newChannel} CANNOT be used!\n                    |Please run 'pio app channel-delete ${appName} ${newChannel}'\"\"\" +\n                    \" to delete this channel!\").stripMargin\n                  logAndFail(errStr + nextErrStr)\n              }\n            }\n          } getOrElse {\n            logAndFail(s\"\"\"Unable to create new channel.\n                          |Failed to update Channel meta-data.\"\"\".stripMargin)\n          }\n        }\n      }\n    } finally {\n      events.close()\n    }\n  }\n\n  def channelDelete(appName: String, deleteChannel: String): MaybeError = {\n    val chanStorage = storage.Storage.getMetaDataChannels\n    val events = storage.Storage.getLEvents()\n    try {\n      show(appName).right.flatMap { case (appDesc: AppDescription, channels: Seq[Channel]) =>\n        val foundChannel = channels.find(ch => ch.name == deleteChannel)\n        foundChannel match {\n          case None =>\n            logAndFail(s\"\"\"Unable to delete channel\n                          |Channel ${deleteChannel} doesn't exists.\"\"\".stripMargin)\n          case Some(channel) =>\n            val dbRemoved = events.remove(appDesc.app.id, Some(channel.id))\n            if (dbRemoved) {\n              info(s\"Removed Event Store for this channel: ${deleteChannel}\")\n              try {\n                chanStorage.delete(channel.id)\n                logAndSucceed(s\"Deleted channel: ${deleteChannel}.\")\n              } catch {\n                case e: Exception =>\n                  logAndFail((s\"\"\"Unable to delete channel.\n                   |Failed to update Channel meta-data.\n                   |The channel ${deleteChannel} CANNOT be used!\n                   |Please run 'pio app channel-delete ${appDesc.app.name} ${deleteChannel}'\"\"\" +\n                    \" to delete this channel again!\").stripMargin)\n              }\n            } else {\n              logAndFail(s\"\"\"Unable to delete channel.\n                            |Error removing Event Store for this channel.\"\"\".stripMargin)\n            }\n        }\n      }\n    } finally {\n      events.close()\n    }\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.tools.commands\n\nimport org.apache.predictionio.core.BuildInfo\nimport org.apache.predictionio.controller.Utils\nimport org.apache.predictionio.data.storage\nimport org.apache.predictionio.tools.EitherLogging\nimport org.apache.predictionio.tools.{RunWorkflow, RunServer, RunBatchPredict}\nimport org.apache.predictionio.tools.{\n  DeployArgs, WorkflowArgs, SparkArgs, ServerArgs, BatchPredictArgs}\nimport org.apache.predictionio.tools.console.Console\nimport org.apache.predictionio.tools.Common._\nimport org.apache.predictionio.tools.ReturnTypes._\nimport org.apache.predictionio.workflow.WorkflowUtils\n\nimport org.apache.commons.io.FileUtils\nimport scala.collection.JavaConversions._\nimport scala.sys.process._\nimport scalaj.http.Http\nimport java.io.File\n\ncase class BuildArgs(\n  sbt: Option[File] = None,\n  sbtExtra: Option[String] = None,\n  sbtAssemblyPackageDependency: Boolean = true,\n  sbtClean: Boolean = false,\n  uberJar: Boolean = false,\n  forceGeneratePIOSbt: Boolean = false)\n\ncase class EngineArgs(\n  engineId: Option[String] = None,\n  engineVersion: Option[String] = None,\n  engineDir: Option[String] = None)\n\nobject Engine extends EitherLogging {\n\n  private def detectSbt(sbt: Option[File], pioHome: String): String = {\n    sbt map {\n      _.getCanonicalPath\n    } getOrElse {\n      val f = new File(Seq(pioHome, \"sbt\", \"sbt\").mkString(File.separator))\n      if (f.exists) f.getCanonicalPath else \"sbt\"\n    }\n  }\n\n  private def outputSbtError(line: String): Unit = {\n    \"\"\"\\[.*error.*\\]\"\"\".r findFirstIn line foreach { _ => error(line) }\n  }\n\n  private def compile(\n    buildArgs: BuildArgs,\n    pioHome: String,\n    engineDirPath: String,\n    verbose: Boolean): MaybeError = {\n\n    val f = new File(\n      Seq(engineDirPath, \"project\", \"pio-build.sbt\").mkString(File.separator))\n    if (f.exists || buildArgs.forceGeneratePIOSbt) {\n      FileUtils.writeLines(\n        new File(engineDirPath, \"pio.sbt\"),\n        Seq(\n          \"// Generated automatically by pio build.\",\n          \"// Changes in this file will be overridden.\",\n          \"\",\n          \"pioVersion := \\\"\" + BuildInfo.version + \"\\\"\"))\n    }\n    implicit val formats = Utils.json4sDefaultFormats\n\n    val sbt = detectSbt(buildArgs.sbt, pioHome)\n    info(s\"Using command '${sbt}' at ${engineDirPath} to build.\")\n    info(\"If the path above is incorrect, this process will fail.\")\n    val asm =\n      if (buildArgs.sbtAssemblyPackageDependency) {\n        \" assemblyPackageDependency\"\n      } else {\n        \"\"\n      }\n    val clean = if (buildArgs.sbtClean) \" clean\" else \"\"\n    val buildCmd = s\"${sbt} ${buildArgs.sbtExtra.getOrElse(\"\")}${clean} \" +\n      (if (buildArgs.uberJar) \"assembly\" else s\"package${asm}\")\n    val core = new File(engineDirPath, s\"pio-assembly-${BuildInfo.version}.jar\")\n    if (buildArgs.uberJar) {\n      info(s\"Uber JAR enabled. Putting ${core.getName} in lib.\")\n      val dst = new File(engineDirPath, \"lib\")\n      dst.mkdir()\n      coreAssembly(pioHome) match {\n        case Right(coreFile) =>\n          FileUtils.copyFileToDirectory(\n            coreFile,\n            dst,\n            true)\n        case Left(errStr) => return Left(errStr)\n      }\n    } else {\n      if (new File(engineDirPath, \"engine.json\").exists()) {\n        info(s\"Uber JAR disabled. Making sure lib/${core.getName} is absent.\")\n        new File(\"lib\", core.getName).delete()\n      } else {\n        info(\"Uber JAR disabled, but current working directory does not look \" +\n          s\"like an engine project directory. Please delete lib/${core.getName} manually.\")\n      }\n    }\n    info(s\"Going to run: ${buildCmd} in ${engineDirPath}\")\n    try {\n      val p = Process(s\"${buildCmd}\", new File(engineDirPath))\n      val r =\n        if (verbose) {\n          p.!(ProcessLogger(line => info(line), line => error(line)))\n        } else {\n          p.!(ProcessLogger(\n            line => outputSbtError(line),\n            line => outputSbtError(line)))\n        }\n      if (r != 0) {\n        logAndFail(s\"Return code of build command: ${buildCmd} is ${r}. Aborting.\")\n      } else {\n        logAndSucceed(\"Compilation finished successfully.\")\n      }\n    } catch {\n      case e: java.io.IOException =>\n        logAndFail(s\"Exception during compilation: ${e.getMessage}\")\n    }\n  }\n\n  def build(\n    ea: EngineArgs,\n    buildArgs: BuildArgs,\n    pioHome: String,\n    verbose: Boolean): MaybeError = {\n\n    val engineDirPath = getEngineDirPath(ea.engineDir)\n    Template.verifyTemplateMinVersion(\n      new File(engineDirPath, \"template.json\")) match {\n\n      case Left(err) => return Left(err)\n      case Right(_) =>\n        compile(buildArgs, pioHome, engineDirPath, verbose) match {\n          case Left(err) => return Left(err)\n          case Right(_) =>\n            info(\"Looking for an engine...\")\n            val jarFiles = jarFilesForScala(engineDirPath)\n            if (jarFiles.isEmpty) {\n              return logAndFail(\"No engine found. Your build might have failed. Aborting.\")\n            }\n            jarFiles foreach { f => info(s\"Found ${f.getName}\") }\n        }\n    }\n    logAndSucceed(\"Build finished successfully.\")\n  }\n\n  /** Training an engine.\n    *  The function starts a training process to bu run concurrenlty.\n    *\n    * @param ea An instance of [[EngineArgs]]\n    * @param wa An instance of [[WorkflowArgs]] for running a single training.\n    * @param sa An instance of [[SparkArgs]]\n    * @param pioHome [[String]] with a path to PIO installation\n    * @param verbose A [[Boolean]]\n    * @return An instance of [[Expected]] contaning either [[Left]]\n    *         with an error message or [[Right]] with a handle to a process\n    *         responsible for training and a function () => Unit,\n    *         that must be called when the process is complete\n    */\n  def train(\n    ea: EngineArgs,\n    wa: WorkflowArgs,\n    sa: SparkArgs,\n    pioHome: String,\n    verbose: Boolean = false): Expected[(Process, () => Unit)] = {\n\n    val engineDirPath = getEngineDirPath(ea.engineDir)\n    Template.verifyTemplateMinVersion(\n      new File(engineDirPath, \"template.json\"))\n    RunWorkflow.runWorkflow(wa, sa, pioHome, engineDirPath, verbose)\n  }\n\n  /** Deploying an engine.\n    *  The function starts a new process to be run concerrently.\n    *\n    * @param ea An instance of [[EngineArgs]]\n    * @param engineInstanceId An instance of [[engineInstanceId]]\n    * @param serverArgs An instance of [[ServerArgs]]\n    * @param sparkArgs An instance of [[SparkArgs]]\n    * @param pioHome [[String]] with a path to PIO installation\n    * @param verbose A [[Boolean]]\n    * @return An instance of [[Expected]] contaning either [[Left]]\n    *         with an error message or [[Right]] with a handle to process\n    *         of a running angine  and a function () => Unit,\n    *         that must be called when the process is complete\n    */\n  def deploy(\n    ea: EngineArgs,\n    engineInstanceId: Option[String],\n    serverArgs: ServerArgs,\n    sparkArgs: SparkArgs,\n    pioHome: String,\n    verbose: Boolean = false): Expected[(Process, () => Unit)] = {\n\n    val engineDirPath = getEngineDirPath(ea.engineDir)\n    val verifyResult = Template.verifyTemplateMinVersion(\n      new File(engineDirPath, \"template.json\"))\n    if (verifyResult.isLeft) {\n      return Left(verifyResult.left.get)\n    }\n    val engineInstances = storage.Storage.getMetaDataEngineInstances\n    engineInstanceId map { eid =>\n      engineInstances.get(eid).map { r =>\n        RunServer.runServer(\n          r.id, serverArgs, sparkArgs, pioHome, engineDirPath, verbose)\n      } getOrElse {\n        logAndFail(s\"Invalid engine instance ID ${eid}. Aborting.\")\n      }\n    } getOrElse {\n      val ei = Console.getEngineInfo(\n        serverArgs.variantJson.getOrElse(new File(engineDirPath, \"engine.json\")),\n        engineDirPath)\n\n      engineInstances.getLatestCompleted(\n        ei.engineId, ei.engineVersion, ei.variantId).map { r =>\n        RunServer.runServer(\n          r.id, serverArgs, sparkArgs, pioHome, engineDirPath, verbose)\n      } getOrElse {\n        logAndFail(s\"No valid engine instance found for engine ${ei.engineId} \" +\n          s\"${ei.engineVersion}.\\nTry running 'train' before 'deploy'. Aborting.\")\n      }\n    }\n  }\n\n  def undeploy(da: DeployArgs): MaybeError = {\n\n    val serverUrl = s\"http://${da.ip}:${da.port}\"\n    info(\n      s\"Undeploying any existing engine instance at ${serverUrl}\")\n    try {\n      val code = Http(s\"${serverUrl}/stop\").asString.code\n      code match {\n        case 200 => Success\n        case 404 =>\n          logAndFail(s\"Another process is using ${serverUrl}. Unable to undeploy.\")\n        case _ =>\n          logAndFail(s\"Another process is using ${serverUrl}, or an existing \" +\n            s\"engine server is not responding properly (HTTP ${code}). \" +\n            \"Unable to undeploy.\")\n      }\n    } catch {\n      case e: java.net.ConnectException =>\n        logAndFail(s\"Nothing at ${serverUrl}\")\n      case _: Throwable =>\n        logAndFail(\"Another process might be occupying \" +\n          s\"${da.ip}:${da.port}. Unable to undeploy.\")\n    }\n  }\n\n  /** Batch predict with an engine.\n    *\n    * @param ea An instance of [[EngineArgs]]\n    * @param engineInstanceId An instance of [[engineInstanceId]]\n    * @param batchPredictArgs An instance of [[BatchPredictArgs]]\n    * @param sparkArgs An instance of [[SparkArgs]]\n    * @param pioHome [[String]] with a path to PIO installation\n    * @param verbose A [[Boolean]]\n    * @return An instance of [[Expected]] contaning either [[Left]]\n    *         with an error message or [[Right]] with a handle to process\n    *         of a running angine  and a function () => Unit,\n    *         that must be called when the process is complete\n    */\n  def batchPredict(\n    ea: EngineArgs,\n    engineInstanceId: Option[String],\n    batchPredictArgs: BatchPredictArgs,\n    sparkArgs: SparkArgs,\n    pioHome: String,\n    verbose: Boolean = false): Expected[(Process, () => Unit)] = {\n\n    val engineDirPath = getEngineDirPath(ea.engineDir)\n    val verifyResult = Template.verifyTemplateMinVersion(\n      new File(engineDirPath, \"template.json\"))\n    if (verifyResult.isLeft) {\n      return Left(verifyResult.left.get)\n    }\n    val ei = Console.getEngineInfo(\n      batchPredictArgs.variantJson.getOrElse(new File(engineDirPath, \"engine.json\")),\n      engineDirPath)\n    val engineInstances = storage.Storage.getMetaDataEngineInstances\n    val engineInstance = engineInstanceId map { eid =>\n      engineInstances.get(eid)\n    } getOrElse {\n      engineInstances.getLatestCompleted(\n        ei.engineId, ei.engineVersion, ei.variantId)\n    }\n    engineInstance map { r =>\n      RunBatchPredict.runBatchPredict(\n        r.id, batchPredictArgs, sparkArgs, pioHome, engineDirPath, verbose)\n    } getOrElse {\n      engineInstanceId map { eid =>\n        logAndFail(s\"Invalid engine instance ID ${eid}. Aborting.\")\n      } getOrElse {\n        logAndFail(s\"No valid engine instance found for engine ${ei.engineId} \" +\n          s\"${ei.engineVersion}.\\nTry running 'train' before 'batchpredict'. Aborting.\")\n      }\n    }\n  }\n\n  /** Running a driver on spark.\n    *  The function starts a process and returns immediately\n    *\n    * @param mainClass A [[String]] with the class containing a main functionto run\n    * @param driverArguments Arguments to be passed to the main function\n    * @param buildArgs An instance of [[BuildArgs]]\n    * @param sparkArgs an instance of [[SparkArgs]]\n    * @param pioHome [[String]] with a path to PIO installation\n    * @param verbose A [[Boolean]]\n    * @return An instance of [[Expected]] contaning either [[Left]]\n    *         with an error message or [[Right]] with a handle to a process\n    *         of a running driver\n    */\n  def run(\n    ea: EngineArgs,\n    mainClass: String,\n    driverArguments: Seq[String],\n    buildArgs: BuildArgs,\n    sparkArgs: SparkArgs,\n    pioHome: String,\n    verbose: Boolean): Expected[Process] = {\n\n    val engineDirPath = getEngineDirPath(ea.engineDir)\n\n    compile(buildArgs, pioHome, engineDirPath, verbose)\n\n    val extraFiles = WorkflowUtils.thirdPartyConfFiles\n    val jarFiles = jarFilesForScala(engineDirPath)\n    jarFiles foreach { f => info(s\"Found JAR: ${f.getName}\") }\n    val jarPluginFiles = jarFilesForSpark(pioHome)\n    jarPluginFiles foreach { f => info(s\"Found JAR: ${f.getName}\") }\n    val allJarFiles = jarFiles.map(_.getCanonicalPath) ++ jarPluginFiles.map(_.getCanonicalPath)\n    val pioLogDir = Option(System.getProperty(\"pio.log.dir\")).getOrElse(s\"${pioHome}/log\")\n\n    val cmd = s\"${getSparkHome(sparkArgs.sparkHome)}/bin/spark-submit --jars \" +\n      s\"${allJarFiles.mkString(\",\")} \" +\n      (if (extraFiles.size > 0) {\n        s\"--files ${extraFiles.mkString(\",\")} \"\n      } else {\n        \"\"\n      }) +\n      \"--driver-java-options -Dpio.log.dir=${pioLogDir} \" +\n      \"--class \" +\n      s\"${mainClass} ${sparkArgs.sparkPassThrough.mkString(\" \")} \" +\n      coreAssembly(pioHome) + \" \" +\n      driverArguments.mkString(\" \")\n    info(s\"Submission command: ${cmd}\")\n    Right(Process(\n      cmd,\n      None,\n      \"SPARK_YARN_USER_ENV\" -> sys.env.filter(kv => kv._1.startsWith(\"PIO_\")).\n        map(kv => s\"${kv._1}=${kv._2}\").mkString(\",\")).run())\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/commands/Export.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.tools.commands\n\nimport org.apache.predictionio.tools.Runner\nimport org.apache.predictionio.tools.SparkArgs\nimport org.apache.predictionio.tools.ReturnTypes._\n\nimport scala.sys.process._\n\ncase class ExportArgs(\n  appId: Int = 0,\n  channel: Option[String] = None,\n  outputPath: String = \"\",\n  format: String = \"json\")\n\nobject Export {\n  def eventsToFile(\n    ea: ExportArgs,\n    sa: SparkArgs,\n    pioHome: String): Expected[(Process, () => Unit)] = {\n\n    val channelArg = ea.channel\n      .map(ch => Seq(\"--channel\", ch)).getOrElse(Nil)\n    Runner.runOnSpark(\n      \"org.apache.predictionio.tools.export.EventsToFile\",\n      Seq(\n        \"--appid\",\n        ea.appId.toString,\n        \"--output\",\n        ea.outputPath,\n        \"--format\",\n        ea.format) ++ channelArg,\n      sa,\n      Nil,\n      pioHome)\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/commands/Import.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.tools.commands\n\nimport org.apache.predictionio.tools.Runner\nimport org.apache.predictionio.tools.SparkArgs\nimport org.apache.predictionio.tools.ReturnTypes._\n\nimport scala.sys.process._\n\ncase class ImportArgs(\n  appId: Int = 0,\n  channel: Option[String] = None,\n  inputPath: String = \"\")\n\nobject Import {\n  def fileToEvents(\n    ia: ImportArgs,\n    sa: SparkArgs,\n    pioHome: String): Expected[(Process, () => Unit)] = {\n\n    val channelArg = ia.channel\n      .map(ch => Seq(\"--channel\", ch)).getOrElse(Nil)\n    Runner.runOnSpark(\n      \"org.apache.predictionio.tools.imprt.FileToEvents\",\n      Seq(\n        \"--appid\",\n        ia.appId.toString,\n        \"--input\",\n        ia.inputPath) ++ channelArg,\n      sa,\n      Nil,\n      pioHome)\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/commands/Management.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.tools.commands\n\nimport org.apache.predictionio.core.BuildInfo\nimport org.apache.predictionio.data.storage\nimport org.apache.predictionio.data.api.EventServer\nimport org.apache.predictionio.data.api.EventServerConfig\nimport org.apache.predictionio.tools.EventServerArgs\nimport org.apache.predictionio.tools.EitherLogging\nimport org.apache.predictionio.tools.Common\nimport org.apache.predictionio.tools.ReturnTypes._\nimport org.apache.predictionio.tools.dashboard.DashboardServer\nimport org.apache.predictionio.tools.dashboard.DashboardConfig\nimport org.apache.predictionio.tools.admin.AdminServer\nimport org.apache.predictionio.tools.admin.AdminServerConfig\n\nimport akka.actor.ActorSystem\nimport java.io.File\nimport scala.io.Source\nimport com.github.zafarkhaja.semver.Version\n\ncase class DashboardArgs(\n  ip: String = \"127.0.0.1\",\n  port: Int = 9000)\n\ncase class AdminServerArgs(\n  ip: String = \"127.0.0.1\",\n  port: Int = 7071)\n\ncase class PioStatus(\n  version: String = \"\",\n  pioHome: String = \"\",\n  sparkHome: String = \"\",\n  sparkVersion: String = \"\",\n  sparkMinVersion: String = \"\",\n  warnings: Seq[String] = Nil)\n\nobject Management extends EitherLogging {\n\n  def version(): String = BuildInfo.version\n\n  /** Starts a dashboard server and returns immediately\n    *\n    * @param da An instance of [[DashboardArgs]]\n    * @return An instance of [[ActorSystem]] in which the server is being executed\n    */\n  def dashboard(da: DashboardArgs): ActorSystem = {\n    info(s\"Creating dashboard at ${da.ip}:${da.port}\")\n    DashboardServer.createDashboard(DashboardConfig(\n      ip = da.ip,\n      port = da.port))\n  }\n\n  /** Starts an eventserver server and returns immediately\n    *\n    * @param ea An instance of [[EventServerArgs]]\n    * @return An instance of [[ActorSystem]] in which the server is being executed\n    */\n  def eventserver(ea: EventServerArgs): ActorSystem = {\n    info(s\"Creating Event Server at ${ea.ip}:${ea.port}\")\n    EventServer.createEventServer(EventServerConfig(\n      ip = ea.ip,\n      port = ea.port,\n      stats = ea.stats))\n  }\n\n  /** Starts an adminserver server and returns immediately\n    *\n    * @param aa An instance of [[AdminServerArgs]]\n    * @return An instance of [[ActorSystem]] in which the server is being executed\n    */\n  def adminserver(aa: AdminServerArgs): ActorSystem = {\n    info(s\"Creating Admin Server at ${aa.ip}:${aa.port}\")\n    AdminServer.createAdminServer(AdminServerConfig(\n      ip = aa.ip,\n      port = aa.port\n    ))\n  }\n\n  private def stripMarginAndNewlines(string: String): String =\n    string.stripMargin.replaceAll(\"\\n\", \" \")\n\n  def status(pioHome: Option[String], sparkHome: Option[String]): Expected[PioStatus] = {\n    var pioStatus = PioStatus()\n    info(\"Inspecting PredictionIO...\")\n    pioHome map { pioHome =>\n      info(s\"PredictionIO ${BuildInfo.version} is installed at $pioHome\")\n      pioStatus = pioStatus.copy(version = version(), pioHome = pioHome)\n    } getOrElse {\n      return logAndFail(\"Unable to locate PredictionIO installation. Aborting.\")\n    }\n    info(\"Inspecting Apache Spark...\")\n    val sparkHomePath = Common.getSparkHome(sparkHome)\n    if (new File(s\"$sparkHomePath/bin/spark-submit\").exists) {\n      info(s\"Apache Spark is installed at $sparkHomePath\")\n      val sparkMinVersion = \"2.0.2\"\n      pioStatus = pioStatus.copy(\n        sparkHome = sparkHomePath,\n        sparkMinVersion = sparkMinVersion)\n      val sparkReleaseFile = new File(s\"$sparkHomePath/RELEASE\")\n      if (sparkReleaseFile.exists) {\n        val sparkReleaseStrings =\n          Source.fromFile(sparkReleaseFile).mkString.split(' ')\n        if (sparkReleaseStrings.length < 2) {\n          val warning = (stripMarginAndNewlines(\n            s\"\"\"|Apache Spark version information cannot be found (RELEASE file\n                |is empty). This is a known issue for certain vendors (e.g.\n                |Cloudera). Please make sure you are using a version of at least\n                |$sparkMinVersion.\"\"\"))\n          warn(warning)\n          pioStatus = pioStatus.copy(warnings = pioStatus.warnings :+ warning)\n        } else {\n          val sparkReleaseVersion = sparkReleaseStrings(1)\n          val parsedMinVersion = Version.valueOf(sparkMinVersion)\n          val parsedCurrentVersion = Version.valueOf(sparkReleaseVersion)\n          if (parsedCurrentVersion.greaterThanOrEqualTo(parsedMinVersion)) {\n            info(stripMarginAndNewlines(\n              s\"\"\"|Apache Spark $sparkReleaseVersion detected (meets minimum\n                  |requirement of $sparkMinVersion)\"\"\"))\n            pioStatus = pioStatus.copy(sparkVersion = sparkReleaseVersion)\n          } else {\n            return logAndFail(stripMarginAndNewlines(\n              s\"\"\"|Apache Spark $sparkReleaseVersion detected (does not meet\n                  |minimum requirement. Aborting.\"\"\"))\n          }\n        }\n      } else {\n        val warning = (stripMarginAndNewlines(\n          s\"\"\"|Apache Spark version information cannot be found. If you are\n              |using a developmental tree, please make sure you are using a\n              |version of at least $sparkMinVersion.\"\"\"))\n        warn(warning)\n        pioStatus = pioStatus.copy(warnings = pioStatus.warnings :+ warning)\n      }\n    } else {\n      return logAndFail(\"Unable to locate a proper Apache Spark installation. Aborting.\")\n    }\n    info(\"Inspecting storage backend connections...\")\n    try {\n      storage.Storage.verifyAllDataObjects()\n    } catch {\n      case e: Throwable =>\n        val errStr = s\"\"\"Unable to connect to all storage backends successfully.\n            |The following shows the error message from the storage backend.\n            |\n            |${e.getMessage} (${e.getClass.getName})\n            |\n            |Dumping configuration of initialized storage backend sources.\n            |Please make sure they are correct.\n            |\n            |\"\"\".stripMargin\n        val sources = storage.Storage.config.get(\"sources\") map { src =>\n          src map { case (s, p) =>\n            s\"Source Name: $s; Type: ${p.getOrElse(\"type\", \"(error)\")}; \" +\n              s\"Configuration: ${p.getOrElse(\"config\", \"(error)\")}\"\n          } mkString(\"\\n\")\n        } getOrElse {\n          \"No properly configured storage backend sources.\"\n        }\n        return logOnFail(errStr + sources, e)\n    }\n    info(\"Your system is all ready to go.\")\n    Right(pioStatus)\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/commands/Template.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.tools.commands\n\nimport java.io.File\n\nimport scala.io.Source\n\nimport org.apache.predictionio.core.BuildInfo\nimport org.apache.predictionio.tools.EitherLogging\nimport org.apache.predictionio.tools.ReturnTypes._\nimport org.json4s._\nimport org.json4s.native.JsonMethods._\nimport com.github.zafarkhaja.semver.Version\n\ncase class TemplateMetaData(\n  pioVersionMin: Option[String] = None)\n\nobject Template extends EitherLogging {\n\n  def templateMetaData(templateJson: File): TemplateMetaData = {\n    if (!templateJson.exists) {\n      warn(s\"$templateJson does not exist. Template metadata will not be available. \" +\n        \"(This is safe to ignore if you are not working on a template.)\")\n      TemplateMetaData()\n    } else {\n      val jsonString = Source.fromFile(templateJson)(scala.io.Codec.ISO8859).mkString\n      val json = try {\n        parse(jsonString)\n      } catch {\n        case e: org.json4s.ParserUtil.ParseException =>\n          warn(s\"$templateJson cannot be parsed. Template metadata will not be available.\")\n          return TemplateMetaData()\n      }\n      val pioVersionMin = json \\ \"pio\" \\ \"version\" \\ \"min\"\n      pioVersionMin match {\n        case JString(s) => TemplateMetaData(pioVersionMin = Some(s))\n        case _ => TemplateMetaData()\n      }\n    }\n  }\n\n  def verifyTemplateMinVersion(templateJsonFile: File): MaybeError = {\n    val metadata = templateMetaData(templateJsonFile)\n\n    for (pvm <- metadata.pioVersionMin) {\n      if(Version.valueOf(BuildInfo.version).lessThan(Version.valueOf(pvm))){\n        return logAndFail(s\"This engine template requires at least PredictionIO $pvm. \" +\n          s\"The template may not work with PredictionIO ${BuildInfo.version}.\")\n      }\n    }\n    Success\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/console/Console.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.tools.console\n\nimport java.io.File\nimport java.net.URI\n\nimport grizzled.slf4j.Logging\nimport org.apache.predictionio.core.BuildInfo\nimport org.apache.predictionio.tools.commands.{\n  DashboardArgs, AdminServerArgs, ImportArgs, ExportArgs,\n  BuildArgs, EngineArgs}\nimport org.apache.predictionio.tools.{\n  EventServerArgs, SparkArgs, WorkflowArgs, ServerArgs,\n  DeployArgs, BatchPredictArgs}\nimport org.apache.predictionio.workflow.{JsonExtractorOption, WorkflowUtils}\nimport org.json4s._\nimport org.json4s.native.JsonMethods._\n\nimport scala.io.Source\n\ncase class ConsoleArgs(\n  build: BuildArgs = BuildArgs(),\n  app: AppArgs = AppArgs(),\n  spark: SparkArgs = SparkArgs(),\n  engine: EngineArgs = EngineArgs(),\n  workflow: WorkflowArgs = WorkflowArgs(),\n  accessKey: AccessKeyArgs = AccessKeyArgs(),\n  deploy: DeployArgs = DeployArgs(),\n  batchPredict: BatchPredictArgs = BatchPredictArgs(),\n  eventServer: EventServerArgs = EventServerArgs(),\n  adminServer: AdminServerArgs = AdminServerArgs(),\n  dashboard: DashboardArgs = DashboardArgs(),\n  export: ExportArgs = ExportArgs(),\n  imprt: ImportArgs = ImportArgs(),\n  commands: Seq[String] = Nil,\n  metricsParamsJsonPath: Option[String] = None,\n  paramsPath: String = \"params\",\n  engineInstanceId: Option[String] = None,\n  mainClass: Option[String] = None,\n  driverPassThrough: Seq[String] = Nil,\n  pioHome: Option[String] = None,\n  verbose: Boolean = false)\n\ncase class AppArgs(\n  id: Option[Int] = None,\n  name: String = \"\",\n  channel: String = \"\",\n  dataDeleteChannel: Option[String] = None,\n  all: Boolean = false,\n  force: Boolean = false,\n  description: Option[String] = None)\n\ncase class AccessKeyArgs(\n  accessKey: String = \"\",\n  events: Seq[String] = Nil)\n\ncase class EngineInfo(\n  engineId: String,\n  engineVersion: String,\n  variantId: String)\n\nobject Console extends Logging {\n  def main(args: Array[String]): Unit = {\n    val parser = new scopt.OptionParser[ConsoleArgs](\"pio\") {\n      override def showUsageOnError: Boolean = false\n      head(\"PredictionIO Command Line Interface Console\", BuildInfo.version)\n      help(\"\")\n      note(\"Note that it is possible to supply pass-through arguments at\\n\" +\n        \"the end of the command by using a '--' separator, e.g.\\n\\n\" +\n        \"pio train --params-path params -- --master spark://mycluster:7077\\n\" +\n        \"\\nIn the example above, the '--master' argument will be passed to\\n\" +\n        \"underlying spark-submit command. Please refer to the usage section\\n\" +\n        \"for each command for more information.\\n\\n\" +\n        \"The following options are common to all commands:\\n\")\n      opt[String](\"pio-home\") action { (x, c) =>\n        c.copy(pioHome = Some(x))\n      } text(\"Root directory of a PredictionIO installation.\\n\" +\n        \"        Specify this if automatic discovery fail.\")\n      opt[String](\"spark-home\") action { (x, c) =>\n        c.copy(spark = c.spark.copy(sparkHome = Some(x)))\n      } text(\"Root directory of an Apache Spark installation.\\n\" +\n        \"        If not specified, will try to use the SPARK_HOME\\n\" +\n        \"        environmental variable. If this fails as well, default to\\n\" +\n        \"        current directory.\")\n      opt[String](\"engine-id\") abbr(\"ei\") action { (x, c) =>\n        c.copy(engine = c.engine.copy(engineId = Some(x)))\n      } text(\"Specify an engine ID. Usually used by distributed deployment.\")\n      opt[String](\"engine-version\") abbr(\"ev\") action { (x, c) =>\n        c.copy(engine = c.engine.copy(engineVersion = Some(x)))\n      } text(\"Specify an engine version. Usually used by distributed \" +\n        \"deployment.\")\n      opt[String](\"engine-dir\") abbr(\"ed\") action { (x, c) =>\n        c.copy(engine = c.engine.copy(engineDir = Some(x)))\n      } text(\"Specify path for engine directory, default to current directory.\")\n      opt[File](\"variant\") abbr(\"v\") action { (x, c) =>\n        c.copy(workflow = c.workflow.copy(variantJson = Some(x)))\n      }\n      opt[File](\"sbt\") action { (x, c) =>\n        c.copy(build = c.build.copy(sbt = Some(x)))\n      } validate { x =>\n        if (x.exists) {\n          success\n        } else {\n          failure(s\"${x.getCanonicalPath} does not exist.\")\n        }\n      } text(\"Path to sbt. Default: sbt\")\n      opt[Unit](\"verbose\") action { (x, c) =>\n        c.copy(verbose = true)\n      }\n      opt[Unit](\"spark-kryo\") abbr(\"sk\") action { (x, c) =>\n        c.copy(spark = c.spark.copy(sparkKryo = true))\n      }\n      opt[String](\"scratch-uri\") action { (x, c) =>\n        c.copy(spark = c.spark.copy(scratchUri = Some(new URI(x))))\n      }\n      note(\"\")\n      cmd(\"version\").\n        text(\"Displays the version of this command line console.\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"version\")\n        }\n      note(\"\")\n      cmd(\"help\").action { (_, c) =>\n        c.copy(commands = c.commands :+ \"help\")\n      } children(\n        arg[String](\"<command>\") optional()\n          action { (x, c) =>\n            c.copy(commands = c.commands :+ x)\n          }\n        )\n      note(\"\")\n      cmd(\"build\").\n        text(\"Build an engine at the specific directory, or current \" +\n          \"directory by default.\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"build\")\n        } children(\n          opt[String](\"sbt-extra\") action { (x, c) =>\n            c.copy(build = c.build.copy(sbtExtra = Some(x)))\n          } text(\"Extra command to pass to SBT when it builds your engine.\"),\n          opt[Unit](\"clean\") action { (x, c) =>\n            c.copy(build = c.build.copy(sbtClean = true))\n          } text(\"Clean build.\"),\n          opt[Unit](\"no-asm\") action { (x, c) =>\n            c.copy(build = c.build.copy(sbtAssemblyPackageDependency = false))\n          } text(\"Skip building external dependencies assembly.\"),\n          opt[Unit](\"uber-jar\") action { (x, c) =>\n            c.copy(build = c.build.copy(uberJar = true))\n          },\n          opt[Unit](\"generate-pio-sbt\") action { (x, c) =>\n            c.copy(build = c.build.copy(forceGeneratePIOSbt = true))\n          }\n        )\n      note(\"\")\n      cmd(\"unregister\").\n        text(\"Unregister an engine at the specific directory, or current \" +\n          \"directory by default.\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"unregister\")\n        }\n      note(\"\")\n      cmd(\"train\").\n        text(\"Kick off a training using an engine. This will produce an\\n\" +\n          \"engine instance. This command will pass all pass-through\\n\" +\n          \"arguments to its underlying spark-submit command.\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"train\")\n        } children(\n          opt[String](\"batch\") action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(batch = x))\n          } text(\"Batch label of the run.\"),\n          opt[String](\"params-path\") action { (x, c) =>\n            c.copy(paramsPath = x)\n          } text(\"Directory to lookup parameters JSON files. Default: params\"),\n          opt[String](\"metrics-params\") abbr(\"mp\") action { (x, c) =>\n            c.copy(metricsParamsJsonPath = Some(x))\n          } text(\"Metrics parameters JSON file. Will try to use\\n\" +\n            \"        metrics.json in the base path.\"),\n          opt[Unit](\"skip-sanity-check\") abbr(\"ssc\") action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(skipSanityCheck = true))\n          },\n          opt[Unit](\"stop-after-read\") abbr(\"sar\") action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(stopAfterRead = true))\n          },\n          opt[Unit](\"stop-after-prepare\") abbr(\"sap\") action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(stopAfterPrepare = true))\n          },\n          opt[Unit](\"uber-jar\") action { (x, c) =>\n            c.copy(build = c.build.copy(uberJar = true))\n          },\n          opt[Int](\"verbosity\") action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(verbosity = x))\n          },\n          opt[String](\"engine-factory\") action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(engineFactory = Some(x)))\n          },\n          opt[String](\"engine-params-key\") action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(engineParamsKey = Some(x)))\n          },\n          opt[String](\"main-py-file\") action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(mainPyFile = Some(x)))\n          },\n          opt[String](\"json-extractor\") action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(jsonExtractor = JsonExtractorOption.withName(x)))\n          } validate { x =>\n              if (JsonExtractorOption.values.map(_.toString).contains(x)) {\n                success\n              } else {\n                val validOptions = JsonExtractorOption.values.mkString(\"|\")\n                failure(s\"$x is not a valid json-extractor option [$validOptions]\")\n              }\n          }\n        )\n      note(\"\")\n      cmd(\"eval\").\n        text(\"Kick off an evaluation using an engine. This will produce an\\n\" +\n          \"engine instance. This command will pass all pass-through\\n\" +\n          \"arguments to its underlying spark-submit command.\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"eval\")\n        } children(\n          arg[String](\"<evaluation-class>\") action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(evaluation = Some(x)))\n          },\n          arg[String](\"[<engine-parameters-generator-class>]\") optional() action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(engineParamsGenerator = Some(x)))\n          } text(\"Optional engine parameters generator class, overriding the first argument\"),\n          opt[String](\"batch\") action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(batch = x))\n          } text(\"Batch label of the run.\"),\n          opt[String](\"json-extractor\") action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(jsonExtractor = JsonExtractorOption.withName(x)))\n          } validate { x =>\n            if (JsonExtractorOption.values.map(_.toString).contains(x)) {\n              success\n            } else {\n              val validOptions = JsonExtractorOption.values.mkString(\"|\")\n              failure(s\"$x is not a valid json-extractor option [$validOptions]\")\n            }\n          }\n        )\n      note(\"\")\n      cmd(\"deploy\").\n        text(\"Deploy an engine instance as a prediction server. This\\n\" +\n          \"command will pass all pass-through arguments to its underlying\\n\" +\n          \"spark-submit command.\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"deploy\")\n        } children(\n          opt[String](\"batch\") action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(batch = x))\n          } text(\"Batch label of the deployment.\"),\n          opt[String](\"engine-instance-id\") action { (x, c) =>\n            c.copy(engineInstanceId = Some(x))\n          } text(\"Engine instance ID.\"),\n          opt[String](\"ip\") action { (x, c) =>\n            c.copy(deploy = c.deploy.copy(ip = x))\n          },\n          opt[Int](\"port\") action { (x, c) =>\n            c.copy(deploy = c.deploy.copy(port = x))\n          } text(\"Port to bind to. Default: 8000\"),\n          opt[Unit](\"feedback\") action { (_, c) =>\n            c.copy(eventServer = c.eventServer.copy(enabled = true))\n          } text(\"Enable feedback loop to event server.\"),\n          opt[String](\"event-server-ip\") action { (x, c) =>\n            c.copy(eventServer = c.eventServer.copy(ip = x))\n          },\n          opt[Int](\"event-server-port\") action { (x, c) =>\n            c.copy(eventServer = c.eventServer.copy(port = x))\n          } text(\"Event server port. Default: 7070\"),\n          opt[Int](\"admin-server-port\") action { (x, c) =>\n            c.copy(adminServer = c.adminServer.copy(port = x))\n          } text(\"Admin server port. Default: 7071\"),\n          opt[String](\"admin-server-ip\") action { (x, c) =>\n          c.copy(adminServer = c.adminServer.copy(ip = x))\n          } text(\"Admin server IP. Default: localhost\"),\n          opt[String](\"accesskey\") action { (x, c) =>\n            c.copy(accessKey = c.accessKey.copy(accessKey = x))\n          } text(\"Access key of the App where feedback data will be stored.\"),\n          opt[Unit](\"uber-jar\") action { (x, c) =>\n            c.copy(build = c.build.copy(uberJar = true))\n          },\n          opt[String](\"log-url\") action { (x, c) =>\n            c.copy(deploy = c.deploy.copy(logUrl = Some(x)))\n          },\n          opt[String](\"log-prefix\") action { (x, c) =>\n            c.copy(deploy = c.deploy.copy(logPrefix = Some(x)))\n          },\n          opt[String](\"json-extractor\") action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(jsonExtractor = JsonExtractorOption.withName(x)))\n          } validate { x =>\n            if (JsonExtractorOption.values.map(_.toString).contains(x)) {\n              success\n            } else {\n              val validOptions = JsonExtractorOption.values.mkString(\"|\")\n              failure(s\"$x is not a valid json-extractor option [$validOptions]\")\n            }\n          }\n        )\n      note(\"\")\n      cmd(\"undeploy\").\n        text(\"Undeploy an engine instance as a prediction server.\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"undeploy\")\n        } children(\n          opt[String](\"ip\") action { (x, c) =>\n            c.copy(deploy = c.deploy.copy(ip = x))\n          },\n          opt[Int](\"port\") action { (x, c) =>\n            c.copy(deploy = c.deploy.copy(port = x))\n          } text(\"Port to unbind from. Default: 8000\")\n        )\n      note(\"\")\n      cmd(\"batchpredict\").\n        text(\"Use an engine instance to process batch predictions. This\\n\" +\n              \"command will pass all pass-through arguments to its underlying\\n\" +\n              \"spark-submit command. All algorithm classes used in the engine\\n\" +\n              \"must be serializable.\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"batchpredict\")\n        } children(\n          opt[String](\"input\") action { (x, c) =>\n            c.copy(batchPredict = c.batchPredict.copy(inputFilePath = x))\n          } text(\"Path to file containing queries; a multi-object JSON file\\n\" +\n                  \"with one query object per line. Accepts any valid Hadoop\\n\" +\n                  \"file URL. Default: batchpredict-input.json\"),\n          opt[String](\"output\") action { (x, c) =>\n            c.copy(batchPredict = c.batchPredict.copy(outputFilePath = x))\n          } text(\"Path to file to receive results; a multi-object JSON file\\n\" +\n                  \"with one object per line, the prediction + original query.\\n\" +\n                  \"Accepts any valid Hadoop file URL. Actual output will be\\n\" +\n                  \"written as Hadoop partition files in a directory with the\\n\" +\n                  \"output name. Default: batchpredict-output.json\"),\n          opt[Int](\"query-partitions\") action { (x, c) =>\n            c.copy(batchPredict = c.batchPredict.copy(queryPartitions = Some(x)))\n          } text(\"Limit concurrency of predictions by setting the number\\n\" +\n                  \"of partitions used internally for the RDD of queries.\\n\" +\n                  \"Default: number created by Spark context's `textFile`\"),\n          opt[String](\"engine-instance-id\") action { (x, c) =>\n            c.copy(engineInstanceId = Some(x))\n          } text(\"Engine instance ID.\"),\n          opt[String](\"json-extractor\") action { (x, c) =>\n            c.copy(workflow = c.workflow.copy(jsonExtractor = JsonExtractorOption.withName(x)))\n          } validate { x =>\n            if (JsonExtractorOption.values.map(_.toString).contains(x)) {\n              success\n            } else {\n              val validOptions = JsonExtractorOption.values.mkString(\"|\")\n              failure(s\"$x is not a valid json-extractor option [$validOptions]\")\n            }\n          }\n        )\n      note(\"\")\n      cmd(\"dashboard\").\n        text(\"Launch a dashboard at the specific IP and port.\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"dashboard\")\n        } children(\n          opt[String](\"ip\") action { (x, c) =>\n            c.copy(dashboard = c.dashboard.copy(ip = x))\n          },\n          opt[Int](\"port\") action { (x, c) =>\n            c.copy(dashboard = c.dashboard.copy(port = x))\n          } text(\"Port to bind to. Default: 9000\")\n        )\n      note(\"\")\n      cmd(\"eventserver\").\n        text(\"Launch an Event Server at the specific IP and port.\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"eventserver\")\n        } children(\n          opt[String](\"ip\") action { (x, c) =>\n            c.copy(eventServer = c.eventServer.copy(ip = x))\n          },\n          opt[Int](\"port\") action { (x, c) =>\n            c.copy(eventServer = c.eventServer.copy(port = x))\n          } text(\"Port to bind to. Default: 7070\"),\n          opt[Unit](\"stats\") action { (x, c) =>\n            c.copy(eventServer = c.eventServer.copy(stats = true))\n          }\n        )\n      cmd(\"adminserver\").\n        text(\"Launch an Admin Server at the specific IP and port.\").\n        action { (_, c) =>\n        c.copy(commands = c.commands :+ \"adminserver\")\n      } children(\n        opt[String](\"ip\") action { (x, c) =>\n          c.copy(adminServer = c.adminServer.copy(ip = x))\n        } text(\"IP to bind to. Default: localhost\"),\n        opt[Int](\"port\") action { (x, c) =>\n          c.copy(adminServer = c.adminServer.copy(port = x))\n        } text(\"Port to bind to. Default: 7071\")\n        )\n      note(\"\")\n      cmd(\"run\").\n        text(\"Launch a driver program. This command will pass all\\n\" +\n          \"pass-through arguments to its underlying spark-submit command.\\n\" +\n          \"In addition, it also supports a second level of pass-through\\n\" +\n          \"arguments to the driver program, e.g.\\n\" +\n          \"pio run -- --master spark://localhost:7077 -- --driver-arg foo\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"run\")\n        } children(\n          arg[String](\"<main class>\") action { (x, c) =>\n            c.copy(mainClass = Some(x))\n          } text(\"Main class name of the driver program.\"),\n          opt[String](\"sbt-extra\") action { (x, c) =>\n            c.copy(build = c.build.copy(sbtExtra = Some(x)))\n          } text(\"Extra command to pass to SBT when it builds your engine.\"),\n          opt[Unit](\"clean\") action { (x, c) =>\n            c.copy(build = c.build.copy(sbtClean = true))\n          } text(\"Clean build.\"),\n          opt[Unit](\"no-asm\") action { (x, c) =>\n            c.copy(build = c.build.copy(sbtAssemblyPackageDependency = false))\n          } text(\"Skip building external dependencies assembly.\")\n        )\n      note(\"\")\n      cmd(\"status\").\n        text(\"Displays status information about the PredictionIO system.\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"status\")\n        }\n      note(\"\")\n      cmd(\"upgrade\").\n        text(\"No longer supported!\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"upgrade\")\n        }\n      note(\"\")\n      cmd(\"app\").\n        text(\"Manage apps.\\n\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"app\")\n        } children(\n          cmd(\"new\").\n            text(\"Create a new app key to app ID mapping.\").\n            action { (_, c) =>\n              c.copy(commands = c.commands :+ \"new\")\n            } children(\n              opt[Int](\"id\") action { (x, c) =>\n                c.copy(app = c.app.copy(id = Some(x)))\n              },\n              opt[String](\"description\") action { (x, c) =>\n                c.copy(app = c.app.copy(description = Some(x)))\n              },\n              opt[String](\"access-key\") action { (x, c) =>\n                c.copy(accessKey = c.accessKey.copy(accessKey = x))\n              },\n              arg[String](\"<name>\") action { (x, c) =>\n                c.copy(app = c.app.copy(name = x))\n              }\n            ),\n          note(\"\"),\n          cmd(\"list\").\n            text(\"List all apps.\").\n            action { (_, c) =>\n              c.copy(commands = c.commands :+ \"list\")\n            },\n          note(\"\"),\n          cmd(\"show\").\n            text(\"Show details of an app.\").\n            action { (_, c) =>\n              c.copy(commands = c.commands :+ \"show\")\n            } children (\n              arg[String](\"<name>\") action { (x, c) =>\n                c.copy(app = c.app.copy(name = x))\n              } text(\"Name of the app to be shown.\")\n            ),\n          note(\"\"),\n          cmd(\"delete\").\n            text(\"Delete an app.\").\n            action { (_, c) =>\n              c.copy(commands = c.commands :+ \"delete\")\n            } children(\n              arg[String](\"<name>\") action { (x, c) =>\n                c.copy(app = c.app.copy(name = x))\n              } text(\"Name of the app to be deleted.\"),\n              opt[Unit](\"force\") abbr(\"f\") action { (x, c) =>\n                c.copy(app = c.app.copy(force = true))\n              } text(\"Delete an app without prompting for confirmation\")\n            ),\n          note(\"\"),\n          cmd(\"data-delete\").\n            text(\"Delete data of an app\").\n            action { (_, c) =>\n              c.copy(commands = c.commands :+ \"data-delete\")\n            } children(\n              arg[String](\"<name>\") action { (x, c) =>\n                c.copy(app = c.app.copy(name = x))\n              } text(\"Name of the app whose data to be deleted.\"),\n              opt[String](\"channel\") action { (x, c) =>\n                c.copy(app = c.app.copy(dataDeleteChannel = Some(x)))\n              } text(\"Name of channel whose data to be deleted.\"),\n              opt[Unit](\"all\") action { (x, c) =>\n                c.copy(app = c.app.copy(all = true))\n              } text(\"Delete data of all channels including default\"),\n              opt[Unit](\"force\") abbr(\"f\") action { (x, c) =>\n                c.copy(app = c.app.copy(force = true))\n              } text(\"Delete data of an app without prompting for confirmation\")\n            ),\n          note(\"\"),\n          cmd(\"channel-new\").\n            text(\"Create a new channel for the app.\").\n            action { (_, c) =>\n              c.copy(commands = c.commands :+ \"channel-new\")\n            } children (\n              arg[String](\"<name>\") action { (x, c) =>\n                c.copy(app = c.app.copy(name = x))\n              } text(\"App name.\"),\n              arg[String](\"<channel>\") action { (x, c) =>\n                c.copy(app = c.app.copy(channel = x))\n              } text (\"Channel name to be created.\")\n            ),\n          note(\"\"),\n          cmd(\"channel-delete\").\n            text(\"Delete a channel of the app.\").\n            action { (_, c) =>\n              c.copy(commands = c.commands :+ \"channel-delete\")\n            } children (\n              arg[String](\"<name>\") action { (x, c) =>\n                c.copy(app = c.app.copy(name = x))\n              } text(\"App name.\"),\n              arg[String](\"<channel>\") action { (x, c) =>\n                c.copy(app = c.app.copy(channel = x))\n              } text (\"Channel name to be deleted.\"),\n              opt[Unit](\"force\") abbr(\"f\") action { (x, c) =>\n                c.copy(app = c.app.copy(force = true))\n              } text(\"Delete a channel of the app without prompting for confirmation\")\n            )\n        )\n      note(\"\")\n      cmd(\"accesskey\").\n        text(\"Manage app access keys.\\n\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"accesskey\")\n        } children(\n          cmd(\"new\").\n            text(\"Add allowed event(s) to an access key.\").\n            action { (_, c) =>\n              c.copy(commands = c.commands :+ \"new\")\n            } children(\n              opt[String](\"key\") action { (x, c) =>\n                c.copy(accessKey = c.accessKey.copy(accessKey = x))\n              },\n              arg[String](\"<app name>\") action { (x, c) =>\n                c.copy(app = c.app.copy(name = x))\n              },\n              arg[String](\"[<event1> <event2> ...]\") unbounded() optional()\n                action { (x, c) =>\n                  c.copy(accessKey = c.accessKey.copy(\n                    events = c.accessKey.events :+ x))\n                }\n            ),\n          cmd(\"list\").\n            text(\"List all access keys of an app.\").\n            action { (_, c) =>\n              c.copy(commands = c.commands :+ \"list\")\n            } children(\n              arg[String](\"<app name>\") optional() action { (x, c) =>\n                c.copy(app = c.app.copy(name = x))\n              } text(\"App name.\")\n            ),\n          note(\"\"),\n          cmd(\"delete\").\n            text(\"Delete an access key.\").\n            action { (_, c) =>\n              c.copy(commands = c.commands :+ \"delete\")\n            } children(\n              arg[String](\"<access key>\") action { (x, c) =>\n                c.copy(accessKey = c.accessKey.copy(accessKey = x))\n              } text(\"The access key to be deleted.\")\n            )\n        )\n      cmd(\"template\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"template\")\n        } children(\n          cmd(\"get\").\n            text(\"No longer supported! Use git clone to download a template\").\n            action { (_, c) =>\n              c.copy(commands = c.commands :+ \"get\")\n            },\n          cmd(\"list\").\n            text(\"No longer supported! Use git to manage your templates\").\n            action { (_, c) =>\n              c.copy(commands = c.commands :+ \"list\")\n            }\n        )\n      cmd(\"export\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"export\")\n        } children(\n          opt[Int](\"appid\") required() action { (x, c) =>\n            c.copy(export = c.export.copy(appId = x))\n          },\n          opt[String](\"output\") required() action { (x, c) =>\n            c.copy(export = c.export.copy(outputPath = x))\n          },\n          opt[String](\"format\") action { (x, c) =>\n            c.copy(export = c.export.copy(format = x))\n          },\n          opt[String](\"channel\") action { (x, c) =>\n            c.copy(export = c.export.copy(channel = Some(x)))\n          }\n        )\n      cmd(\"import\").\n        action { (_, c) =>\n          c.copy(commands = c.commands :+ \"import\")\n        } children(\n          opt[Int](\"appid\") required() action { (x, c) =>\n            c.copy(imprt = c.imprt.copy(appId = x))\n          },\n          opt[String](\"input\") required() action { (x, c) =>\n            c.copy(imprt = c.imprt.copy(inputPath = x))\n          },\n          opt[String](\"channel\") action { (x, c) =>\n            c.copy(imprt = c.imprt.copy(channel = Some(x)))\n          }\n        )\n    }\n\n    val separatorIndex = args.indexWhere(_ == \"--\")\n    val (consoleArgs, theRest) =\n      if (separatorIndex == -1) {\n        (args, Array[String]())\n      } else {\n        args.splitAt(separatorIndex)\n      }\n    val allPassThroughArgs = theRest.drop(1)\n    val secondSepIdx = allPassThroughArgs.indexWhere(_ == \"--\")\n    val (sparkPassThroughArgs, driverPassThroughArgs) =\n      if (secondSepIdx == -1) {\n        (allPassThroughArgs, Array[String]())\n      } else {\n        val t = allPassThroughArgs.splitAt(secondSepIdx)\n        (t._1, t._2.drop(1))\n      }\n\n    parser.parse(consoleArgs, ConsoleArgs()) map { pca =>\n      val ca = pca.copy(\n        spark = pca.spark.copy(sparkPassThrough = sparkPassThroughArgs),\n        driverPassThrough = driverPassThroughArgs)\n      WorkflowUtils.modifyLogging(ca.verbose)\n      val rv: Int = ca.commands match {\n        case Seq(\"\") =>\n          System.err.println(help())\n          1\n        case Seq(\"version\") =>\n          Pio.version()\n        case Seq(\"build\") =>\n          Pio.build(\n            ca.engine, ca.build, ca.pioHome.get, ca.verbose)\n        case Seq(\"train\") =>\n          Pio.train(\n            ca.engine, ca.workflow, ca.spark, ca.pioHome.get, ca.verbose)\n        case Seq(\"eval\") =>\n          Pio.eval(\n            ca.engine, ca.workflow, ca.spark, ca.pioHome.get, ca.verbose)\n        case Seq(\"deploy\") =>\n          Pio.deploy(\n            ca.engine,\n            ca.engineInstanceId,\n            ServerArgs(\n              ca.deploy,\n              ca.eventServer,\n              ca.workflow.batch,\n              ca.accessKey.accessKey,\n              ca.workflow.variantJson,\n              ca.workflow.jsonExtractor),\n            ca.spark,\n            ca.pioHome.get,\n            ca.verbose)\n        case Seq(\"undeploy\") =>\n          Pio.undeploy(ca.deploy)\n        case Seq(\"batchpredict\") =>\n          Pio.batchPredict(\n            ca.engine,\n            ca.engineInstanceId,\n            BatchPredictArgs(\n              ca.batchPredict.inputFilePath,\n              ca.batchPredict.outputFilePath,\n              ca.batchPredict.queryPartitions,\n              ca.workflow.variantJson,\n              ca.workflow.jsonExtractor),\n            ca.spark,\n            ca.pioHome.get,\n            ca.verbose)\n        case Seq(\"dashboard\") =>\n          Pio.dashboard(ca.dashboard)\n        case Seq(\"eventserver\") =>\n          Pio.eventserver(ca.eventServer)\n        case Seq(\"adminserver\") =>\n          Pio.adminserver(ca.adminServer)\n        case Seq(\"run\") =>\n          Pio.run(\n            ca.engine,\n            ca.mainClass.get,\n            ca.driverPassThrough,\n            ca.build,\n            ca.spark,\n            ca.pioHome.get,\n            ca.verbose)\n        case Seq(\"status\") =>\n          Pio.status(ca.pioHome, ca.spark.sparkHome)\n        case Seq(\"upgrade\") =>\n          error(\"Upgrade is no longer supported\")\n          1\n        case Seq(\"app\", \"new\") =>\n          Pio.App.create(\n            ca.app.name, ca.app.id, ca.app.description, ca.accessKey.accessKey)\n        case Seq(\"app\", \"list\") =>\n          Pio.App.list()\n        case Seq(\"app\", \"show\") =>\n          Pio.App.show(ca.app.name)\n        case Seq(\"app\", \"delete\") =>\n          Pio.App.delete(ca.app.name, ca.app.force)\n        case Seq(\"app\", \"data-delete\") =>\n          Pio.App.dataDelete(\n            ca.app.name, ca.app.dataDeleteChannel, ca.app.all, ca.app.force)\n        case Seq(\"app\", \"channel-new\") =>\n          Pio.App.channelNew(ca.app.name, ca.app.channel)\n        case Seq(\"app\", \"channel-delete\") =>\n          Pio.App.channelDelete(ca.app.name, ca.app.channel, ca.app.force)\n        case Seq(\"accesskey\", \"new\") =>\n          Pio.AccessKey.create(\n            ca.app.name, ca.accessKey.accessKey, ca.accessKey.events)\n        case Seq(\"accesskey\", \"list\") =>\n         Pio.AccessKey.list(\n           if (ca.app.name == \"\") None else Some(ca.app.name))\n        case Seq(\"accesskey\", \"delete\") =>\n          Pio.AccessKey.delete(ca.accessKey.accessKey)\n        case Seq(\"template\", _) =>\n          error(\"template commands are no longer supported.\")\n          error(\"Please use git to get and manage your templates.\")\n          1\n        case Seq(\"export\") =>\n          Pio.export(ca.export, ca.spark, ca.pioHome.get)\n        case Seq(\"import\") =>\n          Pio.imprt(ca.imprt, ca.spark, ca.pioHome.get)\n        case _ =>\n          System.err.println(help(ca.commands))\n          1\n      }\n      sys.exit(rv)\n    } getOrElse {\n      val command = args.toSeq.filterNot(_.startsWith(\"--\")).head\n      System.err.println(help(Seq(command)))\n      sys.exit(1)\n    }\n  }\n\n  def help(commands: Seq[String] = Nil): String = {\n    if (commands.isEmpty) {\n      mainHelp\n    } else {\n      val stripped =\n        (if (commands.head == \"help\") commands.drop(1) else commands).\n          mkString(\"-\")\n      helpText.getOrElse(stripped, s\"Help is unavailable for ${stripped}.\")\n    }\n  }\n\n  def getEngineInfo(jsonFile: File, engineDir: String): EngineInfo = {\n    // Use engineFactory as engineId\n    val variantJson = parse(Source.fromFile(jsonFile).mkString)\n    val engineId = variantJson \\ \"engineFactory\" match {\n      case JString(s) => s\n      case _ =>\n        error(\"unable to read engine factory from \" +\n          s\"${jsonFile.getCanonicalPath}. Aborting.\")\n        sys.exit(1)\n    }\n\n    val variantId = variantJson \\ \"id\" match {\n      case JString(s) => s\n      case _ =>\n        error(\"Unable to read engine variant ID from \" +\n          s\"${jsonFile.getCanonicalPath}. Aborting.\")\n        sys.exit(1)\n    }\n\n    // Use hash of engine directory as engineVersion\n    val engineVersion = java.security.MessageDigest.getInstance(\"SHA-1\").\n      digest(engineDir.getBytes).map(\"%02x\".format(_)).mkString\n\n    EngineInfo(engineId, engineVersion, variantId)\n  }\n\n  val mainHelp = txt.main().toString\n\n  val helpText = Map(\n    \"\" -> mainHelp,\n    \"status\" -> txt.status().toString,\n    \"upgrade\" -> txt.upgrade().toString,\n    \"version\" -> txt.version().toString,\n    \"template\" -> txt.template().toString,\n    \"build\" -> txt.build().toString,\n    \"train\" -> txt.train().toString,\n    \"deploy\" -> txt.deploy().toString,\n    \"batchpredict\" -> txt.batchpredict().toString,\n    \"eventserver\" -> txt.eventserver().toString,\n    \"adminserver\" -> txt.adminserver().toString,\n    \"app\" -> txt.app().toString,\n    \"accesskey\" -> txt.accesskey().toString,\n    \"import\" -> txt.imprt().toString,\n    \"export\" -> txt.export().toString,\n    \"run\" -> txt.run().toString,\n    \"eval\" -> txt.eval().toString,\n    \"dashboard\" -> txt.dashboard().toString)\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/console/Pio.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.tools.console\n\nimport org.apache.predictionio.tools.{BatchPredictArgs, DeployArgs, EventServerArgs, ServerArgs, SparkArgs, WorkflowArgs}\nimport org.apache.predictionio.tools.commands.{AdminServerArgs, BuildArgs, DashboardArgs, Engine, EngineArgs, Export, ExportArgs, Import, ImportArgs, Management, AccessKey => AccessKeysCmd, App => AppCmd}\nimport org.apache.predictionio.tools.ReturnTypes._\nimport grizzled.slf4j.Logging\n\nimport scala.concurrent.Await\nimport scala.concurrent.duration.Duration\nimport scala.io.StdIn\nimport scala.language.implicitConversions\nimport scala.sys.process._\n\nobject Pio extends Logging {\n\n  private implicit def eitherToInt[A, B](result: Either[A, B]): Int = {\n    result fold (_ => 1, _ => 0)\n  }\n\n  private def doOnSuccess[A, B](result: Either[A, B])(f: B => Int): Int = {\n    result match {\n      case Left(_) => 1\n      case Right(res) => f(res)\n    }\n  }\n\n  private def processAwaitAndClean(maybeProc: Expected[(Process, () => Unit)]) = {\n    maybeProc match {\n      case Left(_) => 1\n\n      case Right((proc, cleanup)) =>\n        Runtime.getRuntime.addShutdownHook(new Thread(new Runnable {\n          def run(): Unit = {\n            cleanup()\n            proc.destroy()\n          }\n        }))\n        val returnVal = proc.exitValue()\n        cleanup()\n        returnVal\n    }\n  }\n\n  def version(): Int = {\n    println(Management.version)\n    0\n  }\n\n  def build(\n    ea: EngineArgs,\n    buildArgs: BuildArgs,\n    pioHome: String,\n    verbose: Boolean = false): Int = {\n\n    doOnSuccess(Engine.build(ea, buildArgs, pioHome, verbose)) {\n      _ => info(\"Your engine is ready for training.\")\n      0\n    }\n  }\n\n  def train(\n    ea: EngineArgs,\n    wa: WorkflowArgs,\n    sa: SparkArgs,\n    pioHome: String,\n    verbose: Boolean = false): Int =\n      processAwaitAndClean(Engine.train(ea, wa, sa, pioHome, verbose))\n\n  def eval(\n    ea: EngineArgs,\n    wa: WorkflowArgs,\n    sa: SparkArgs,\n    pioHome: String,\n    verbose: Boolean = false): Int =\n      processAwaitAndClean(Engine.train(ea, wa, sa, pioHome, verbose))\n\n  def deploy(\n    ea: EngineArgs,\n    engineInstanceId: Option[String],\n    serverArgs: ServerArgs,\n    sparkArgs: SparkArgs,\n    pioHome: String,\n    verbose: Boolean = false): Int =\n      processAwaitAndClean(Engine.deploy(\n        ea, engineInstanceId, serverArgs, sparkArgs, pioHome, verbose))\n\n  def undeploy(da: DeployArgs): Int = Engine.undeploy(da)\n\n  def batchPredict(\n    ea: EngineArgs,\n    engineInstanceId: Option[String],\n    batchPredictArgs: BatchPredictArgs,\n    sparkArgs: SparkArgs,\n    pioHome: String,\n    verbose: Boolean = false): Int =\n      processAwaitAndClean(Engine.batchPredict(\n        ea, engineInstanceId, batchPredictArgs, sparkArgs, pioHome, verbose))\n\n  def dashboard(da: DashboardArgs): Int = {\n    Await.ready(Management.dashboard(da).whenTerminated, Duration.Inf)\n    0\n  }\n\n  def eventserver(ea: EventServerArgs): Int = {\n    Await.ready(Management.eventserver(ea).whenTerminated, Duration.Inf)\n    0\n  }\n\n  def adminserver(aa: AdminServerArgs): Int = {\n    Await.ready(Management.adminserver(aa).whenTerminated, Duration.Inf)\n    0\n  }\n\n  def run(\n    ea: EngineArgs,\n    mainClass: String,\n    driverArguments: Seq[String],\n    buildArgs: BuildArgs,\n    sparkArgs: SparkArgs,\n    pioHome: String,\n    verbose: Boolean = false): Int =\n      doOnSuccess(Engine.run(\n        ea, mainClass, driverArguments, buildArgs,\n        sparkArgs, pioHome, verbose)) { proc =>\n\n          val r = proc.exitValue()\n          if (r != 0) {\n            error(s\"Return code of previous step is ${r}. Aborting.\")\n            return 1\n          }\n          r\n        }\n\n\n  def status(pioHome: Option[String], sparkHome: Option[String]): Int = {\n    Management.status(pioHome, sparkHome)\n  }\n\n  def imprt(ia: ImportArgs, sa: SparkArgs, pioHome: String): Int = {\n    processAwaitAndClean(Import.fileToEvents(ia, sa, pioHome))\n  }\n\n  def export(ea: ExportArgs, sa: SparkArgs, pioHome: String): Int = {\n    processAwaitAndClean(Export.eventsToFile(ea, sa, pioHome))\n  }\n\n  object App {\n\n    def create(\n      name: String,\n      id: Option[Int] = None,\n      description: Option[String] = None,\n      accessKey: String = \"\"): Int =\n        doOnSuccess(AppCmd.create(name, id, description, accessKey)) { appDesc =>\n            info(\"Created a new app:\")\n            info(s\"      Name: ${appDesc.app.name}\")\n            info(s\"        ID: ${appDesc.app.id}\")\n            info(s\"Access Key: ${appDesc.keys.head.key}\")\n            0\n        }\n\n    def list(): Int = {\n      val title = \"Name\"\n      val ak = \"Access Key\"\n      val apps = AppCmd.list\n      info(f\"$title%20s |   ID | $ak%64s | Allowed Event(s)\")\n      apps foreach { appDesc =>\n        appDesc.keys foreach { k =>\n          val events =\n            if (k.events.size > 0) k.events.sorted.mkString(\",\") else \"(all)\"\n          info(f\"${appDesc.app.name}%20s | ${appDesc.app.id}%4d | ${k.key}%64s | $events%s\")\n        }\n      }\n      info(s\"Finished listing ${apps.size} app(s).\")\n      0\n    }\n\n    def show(appName: String): Int =\n      doOnSuccess(AppCmd.show(appName)) { case (appDesc, chans) =>\n        info(s\"    App Name: ${appDesc.app.name}\")\n        info(s\"      App ID: ${appDesc.app.id}\")\n        info(s\" Description: ${appDesc.app.description.getOrElse(\"\")}\")\n\n        var firstKey = true\n        appDesc.keys foreach { k =>\n          val events =\n            if (k.events.size > 0) k.events.sorted.mkString(\",\") else \"(all)\"\n          if (firstKey) {\n            info(f\"  Access Key: ${k.key}%s | ${events}%s\")\n            firstKey = false\n          } else {\n            info(f\"              ${k.key}%s | ${events}%s\")\n          }\n        }\n        var firstChan = true\n        val titleName = \"Channel Name\"\n        val titleID = \"Channel ID\"\n        chans.foreach { ch =>\n          if (firstChan) {\n            info(f\"    Channels: ${titleName}%16s | ${titleID}%10s \")\n            firstChan = false\n          }\n          info(f\"              ${ch.name}%16s | ${ch.id}%10s\")\n        }\n        0\n      }\n\n    def delete(name: String, force: Boolean = false): Int =\n      doOnSuccess(AppCmd.show(name)) { case (appDesc, chans) =>\n        info(s\"The following app (including all channels) will be deleted. Are you sure?\")\n        info(s\"    App Name: ${appDesc.app.name}\")\n        info(s\"      App ID: ${appDesc.app.id}\")\n        info(s\" Description: ${appDesc.app.description.getOrElse(\"\")}\")\n        var firstChan = true\n        val titleName = \"Channel Name\"\n        val titleID = \"Channel ID\"\n        chans.foreach { ch =>\n          if (firstChan) {\n            info(f\"    Channels: ${titleName}%16s | ${titleID}%10s \")\n            firstChan = false\n          }\n          info(f\"              ${ch.name}%16s | ${ch.id}%10s\")\n        }\n\n        val choice = if(force) \"YES\" else StdIn.readLine(\"Enter 'YES' to proceed: \")\n        choice match {\n          case \"YES\" =>\n            AppCmd.delete(name)\n          case _ =>\n            info(\"Aborted.\")\n            0\n        }\n      }\n\n    def dataDelete(\n      name: String,\n      channel: Option[String] = None,\n      all: Boolean = false,\n      force: Boolean = false): Int =\n        doOnSuccess(AppCmd.show(name)) { case (appDesc, chans) =>\n\n          val channelId = channel.map { ch =>\n            val channelMap = chans.map(c => (c.name, c.id)).toMap\n            if (!channelMap.contains(ch)) {\n              error(s\"Unable to delete data for channel.\")\n              error(s\"Channel ${ch} doesn't exist.\")\n              return 1\n            }\n            channelMap(ch)\n          }\n          if (all) {\n            info(s\"All data of the app (including default and all channels) will be deleted.\" +\n              \" Are you sure?\")\n          } else if (channelId.isDefined) {\n            info(s\"Data of the following channel will be deleted. Are you sure?\")\n            info(s\"Channel Name: ${channel.get}\")\n            info(s\"  Channel ID: ${channelId.get}\")\n          } else {\n            info(s\"Data of the following app (default channel only) will be deleted. Are you sure?\")\n          }\n          info(s\"    App Name: ${appDesc.app.name}\")\n          info(s\"      App ID: ${appDesc.app.id}\")\n          info(s\" Description: ${appDesc.app.description}\")\n\n          val choice = if(force) \"YES\" else StdIn.readLine(\"Enter 'YES' to proceed: \")\n          choice match {\n            case \"YES\" =>\n              AppCmd.dataDelete(name, channel, all)\n            case _ =>\n              info(\"Aborted.\")\n              0\n          }\n        }\n\n    def channelNew(appName: String, newChannel: String): Int =\n      AppCmd.channelNew(appName, newChannel)\n\n    def channelDelete(\n      appName: String,\n      deleteChannel: String,\n      force: Boolean = false): Int =\n        doOnSuccess(AppCmd.show(appName)) { case (appDesc, chans) =>\n          chans.find(chan => chan.name == deleteChannel) match {\n            case None =>\n              error(s\"Unable to delete channel.\")\n              error(s\"Channel ${deleteChannel} doesn't exist.\")\n              1\n            case Some(chan) =>\n              info(s\"The following channel will be deleted. Are you sure?\")\n              info(s\"    Channel Name: ${deleteChannel}\")\n              info(s\"      Channel ID: ${chan.id}\")\n              info(s\"        App Name: ${appDesc.app.name}\")\n              info(s\"          App ID: ${appDesc.app.id}\")\n              val choice = if(force) \"YES\" else StdIn.readLine(\"Enter 'YES' to proceed: \")\n              choice match {\n                case \"YES\" =>\n                  AppCmd.channelDelete(appName, deleteChannel)\n                case _ =>\n                  info(\"Aborted.\")\n                  0\n              }\n          }\n        }\n\n  }\n\n  object AccessKey {\n\n    def create(\n      appName: String,\n      key: String,\n      events: Seq[String]): Int =\n        AccessKeysCmd.create(appName, key, events)\n\n    def list(app: Option[String]): Int =\n      doOnSuccess(AccessKeysCmd.list(app)) { keys =>\n        val title = \"Access Key(s)\"\n        info(f\"$title%64s | App ID | Allowed Event(s)\")\n        keys.sortBy(k => k.appid) foreach { k =>\n          val events =\n            if (k.events.size > 0) k.events.sorted.mkString(\",\") else \"(all)\"\n          info(f\"${k.key}%64s | ${k.appid}%6d | $events%s\")\n        }\n        info(s\"Finished listing ${keys.size} access key(s).\")\n        0\n      }\n\n    def delete(key: String): Int = AccessKeysCmd.delete(key)\n  }\n\n}\n\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/dashboard/CorsSupport.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.tools.dashboard\n\n// Reference from: https://gist.github.com/jeroenr/5261fa041d592f37cd80\n\nimport akka.http.scaladsl.model.HttpMethods._\nimport akka.http.scaladsl.model.{StatusCodes, HttpResponse}\nimport akka.http.scaladsl.model.headers._\nimport akka.http.scaladsl.server.Directives._\nimport akka.http.scaladsl.server.{Directive0, Route}\nimport com.typesafe.config.ConfigFactory\n\ntrait CorsSupport {\n\n  // this directive adds access control headers to normal responses\n  private def addAccessControlHeaders: Directive0 = {\n    respondWithHeaders(\n      `Access-Control-Allow-Origin`.forRange(HttpOriginRange.`*`),\n      `Access-Control-Allow-Credentials`(true),\n      `Access-Control-Allow-Headers`(\"Authorization\", \"Content-Type\", \"X-Requested-With\")\n    )\n  }\n\n  // this handles preflight OPTIONS requests.\n  private def preflightRequestHandler: Route = options {\n    complete(HttpResponse(StatusCodes.OK)\n      .withHeaders(`Access-Control-Allow-Methods`(OPTIONS, POST, PUT, GET, DELETE)))\n  }\n\n  def corsHandler(r: Route): Route = addAccessControlHeaders {\n    preflightRequestHandler ~ r\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/dashboard/Dashboard.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.tools.dashboard\n\nimport org.apache.predictionio.authentication.KeyAuthentication\nimport org.apache.predictionio.data.storage.Storage\n\nimport scala.concurrent.{Await, ExecutionContext, Future}\nimport akka.actor.ActorSystem\nimport akka.http.scaladsl.server.directives.FutureDirectives.onSuccess\nimport com.github.nscala_time.time.Imports.DateTime\nimport grizzled.slf4j.Logging\nimport akka.http.scaladsl.{ConnectionContext, Http, HttpsConnectionContext}\nimport akka.http.scaladsl.model._\nimport akka.http.scaladsl.server.Directives._\nimport akka.http.scaladsl.server.directives._\nimport akka.http.scaladsl.server._\nimport akka.stream.ActorMaterializer\nimport akka.http.scaladsl.model.ContentTypes._\nimport com.typesafe.config.ConfigFactory\nimport org.apache.predictionio.configuration.SSLConfiguration\n\nimport scala.concurrent.duration._\n\ncase class DashboardConfig(\n  ip: String = \"localhost\",\n  port: Int = 9000)\n\nobject Dashboard extends Logging {\n\n  def main(args: Array[String]): Unit = {\n    val parser = new scopt.OptionParser[DashboardConfig](\"Dashboard\") {\n      opt[String](\"ip\") action { (x, c) =>\n        c.copy(ip = x)\n      } text(\"IP to bind to (default: localhost).\")\n      opt[Int](\"port\") action { (x, c) =>\n        c.copy(port = x)\n      } text(\"Port to bind to (default: 9000).\")\n    }\n\n    parser.parse(args, DashboardConfig()) map { dc =>\n      val f = DashboardServer.createDashboard(dc).whenTerminated\n      Await.result(f, Duration.Inf)\n    }\n  }\n\n}\n\nobject DashboardServer extends KeyAuthentication with CorsSupport with SSLConfiguration {\n\n  def createDashboard(dc: DashboardConfig): ActorSystem = {\n    val systemName = \"pio-dashboard\"\n    implicit val system = ActorSystem(systemName)\n    implicit val materializer = ActorMaterializer()\n    implicit val executionContext = system.dispatcher\n    val serverConfig = ConfigFactory.load(\"server.conf\")\n    val sslEnforced = serverConfig.getBoolean(\"org.apache.predictionio.server.ssl-enforced\")\n    val route = createRoute(DateTime.now, dc)\n    if(sslEnforced){\n      val https: HttpsConnectionContext = ConnectionContext.https(sslContext)\n      Http().setDefaultServerHttpContext(https)\n      Http().bindAndHandle(route, dc.ip, dc.port, connectionContext = https)\n    } else {\n      Http().bindAndHandle(route, dc.ip, dc.port)\n    }\n    system\n  }\n\n  def createRoute(serverStartTime: DateTime, dc: DashboardConfig)\n                 (implicit executionContext: ExecutionContext): Route = {\n    val evaluationInstances = Storage.getMetaDataEvaluationInstances\n    val pioEnvVars = sys.env.filter(kv => kv._1.startsWith(\"PIO_\"))\n\n    def authenticate[T](authenticator: RequestContext => Future[Either[Rejection, T]]):\n        AuthenticationDirective[T] = {\n      extractRequestContext.flatMap { requestContext =>\n        onSuccess(authenticator(requestContext)).flatMap {\n          case Right(x) => provide(x)\n          case Left(x)  => reject(x): Directive1[T]\n        }\n      }\n    }\n\n    val route: Route =\n      path(\"\") {\n        authenticate(withAccessKeyFromFile) { request =>\n          get {\n            val completedInstances = evaluationInstances.getCompleted\n            complete(HttpResponse(entity = HttpEntity(\n                `text/html(UTF-8)`,\n                 html.index(dc, serverStartTime, pioEnvVars, completedInstances).toString\n            )))\n          }\n        }\n      } ~\n      pathPrefix(\"engine_instances\" / Segment) { instanceId =>\n        path(\"evaluator_results.txt\") {\n          get {\n            evaluationInstances.get(instanceId).map { i =>\n              complete(i.evaluatorResults)\n            } getOrElse {\n              complete(StatusCodes.NotFound)\n            }\n          }\n        } ~\n        path(\"evaluator_results.html\") {\n          get {\n            evaluationInstances.get(instanceId).map { i =>\n              complete(HttpResponse(\n                entity = HttpEntity(`text/html(UTF-8)`, i.evaluatorResultsHTML)))\n            } getOrElse {\n              complete(StatusCodes.NotFound)\n            }\n          }\n        } ~\n        path(\"evaluator_results.json\") {\n          get {\n            evaluationInstances.get(instanceId).map { i =>\n              complete(HttpResponse(\n                entity = HttpEntity(`application/json`, i.evaluatorResultsJSON)))\n            } getOrElse {\n              complete(StatusCodes.NotFound)\n            }\n          }\n        } ~\n        corsHandler {\n          path(\"local_evaluator_results.json\") {\n            get {\n              evaluationInstances.get(instanceId).map { i =>\n                complete(HttpResponse(\n                  entity = HttpEntity(`application/json`, i.evaluatorResultsJSON)))\n              } getOrElse {\n                complete(StatusCodes.NotFound)\n              }\n            }\n          }\n        } ~\n        pathPrefix(\"assets\") {\n          getFromResourceDirectory(\"assets\")\n        }\n      }\n\n    route\n  }\n\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/export/EventsToFile.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.tools.export\n\nimport org.apache.predictionio.controller.Utils\nimport org.apache.predictionio.data.storage.EventJson4sSupport\nimport org.apache.predictionio.data.storage.Storage\nimport org.apache.predictionio.tools.Runner\nimport org.apache.predictionio.workflow.WorkflowContext\nimport org.apache.predictionio.workflow.WorkflowUtils\nimport org.apache.predictionio.workflow.CleanupFunctions\nimport grizzled.slf4j.Logging\nimport org.apache.spark.sql.{SaveMode, SparkSession}\nimport org.json4s.native.Serialization._\n\ncase class EventsToFileArgs(\n  env: String = \"\",\n  logFile: String = \"\",\n  appId: Int = 0,\n  channel: Option[String] = None,\n  outputPath: String = \"\",\n  format: String = \"parquet\",\n  verbose: Boolean = false,\n  debug: Boolean = false)\n\nobject EventsToFile extends Logging {\n  def main(args: Array[String]): Unit = {\n    val parser = new scopt.OptionParser[EventsToFileArgs](\"EventsToFile\") {\n      opt[String](\"env\") action { (x, c) =>\n        c.copy(env = x)\n      }\n      opt[String](\"log-file\") action { (x, c) =>\n        c.copy(logFile = x)\n      }\n      opt[Int](\"appid\") action { (x, c) =>\n        c.copy(appId = x)\n      }\n      opt[String](\"channel\") action { (x, c) =>\n        c.copy(channel = Some(x))\n      }\n      opt[String](\"format\") action { (x, c) =>\n        c.copy(format = x)\n      }\n      opt[String](\"output\") action { (x, c) =>\n        c.copy(outputPath = x)\n      }\n      opt[Unit](\"verbose\") action { (x, c) =>\n        c.copy(verbose = true)\n      }\n      opt[Unit](\"debug\") action { (x, c) =>\n        c.copy(debug = true)\n      }\n    }\n    parser.parse(args, EventsToFileArgs()) map { args =>\n      try {\n        // get channelId\n        val channels = Storage.getMetaDataChannels\n        val channelMap = channels.getByAppid(args.appId).map(c => (c.name, c.id)).toMap\n\n        val channelId: Option[Int] = args.channel.map { ch =>\n          if (!channelMap.contains(ch)) {\n            error(s\"Channel ${ch} doesn't exist in this app.\")\n            sys.exit(1)\n          }\n\n          channelMap(ch)\n        }\n\n        val channelStr = args.channel.map(n => \" Channel \" + n).getOrElse(\"\")\n\n        WorkflowUtils.modifyLogging(verbose = args.verbose)\n        @transient lazy implicit val formats = Utils.json4sDefaultFormats +\n          new EventJson4sSupport.APISerializer\n        val sc = WorkflowContext(\n          mode = \"Export\",\n          batch = \"App ID \" + args.appId + channelStr,\n          executorEnv = Runner.envStringToMap(args.env))\n        val sqlSession = SparkSession.builder().getOrCreate()\n        val events = Storage.getPEvents()\n        val eventsRdd = events.find(appId = args.appId, channelId = channelId)(sc)\n        val jsonStringRdd = eventsRdd.map(write(_))\n        if (args.format == \"json\") {\n          jsonStringRdd.saveAsTextFile(args.outputPath)\n        } else {\n          val jsonDf = sqlSession.read.json(jsonStringRdd)\n          jsonDf.write.mode(SaveMode.ErrorIfExists).parquet(args.outputPath)\n        }\n        info(s\"Events are exported to ${args.outputPath}/.\")\n        info(\"Done.\")\n\n      } finally {\n        CleanupFunctions.run()\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "tools/src/main/scala/org/apache/predictionio/tools/imprt/FileToEvents.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n\npackage org.apache.predictionio.tools.imprt\n\nimport org.apache.predictionio.controller.Utils\nimport org.apache.predictionio.data.storage.Event\nimport org.apache.predictionio.data.storage.EventJson4sSupport\nimport org.apache.predictionio.data.storage.Storage\nimport org.apache.predictionio.tools.Runner\nimport org.apache.predictionio.workflow.WorkflowContext\nimport org.apache.predictionio.workflow.WorkflowUtils\nimport org.apache.predictionio.workflow.CleanupFunctions\n\nimport grizzled.slf4j.Logging\nimport org.json4s.native.Serialization._\n\nimport scala.util.{Failure, Try}\n\ncase class FileToEventsArgs(\n  env: String = \"\",\n  logFile: String = \"\",\n  appId: Int = 0,\n  channel: Option[String] = None,\n  inputPath: String = \"\",\n  verbose: Boolean = false,\n  debug: Boolean = false)\n\nobject FileToEvents extends Logging {\n  def main(args: Array[String]): Unit = {\n    val parser = new scopt.OptionParser[FileToEventsArgs](\"FileToEvents\") {\n      opt[String](\"env\") action { (x, c) =>\n        c.copy(env = x)\n      }\n      opt[String](\"log-file\") action { (x, c) =>\n        c.copy(logFile = x)\n      }\n      opt[Int](\"appid\") action { (x, c) =>\n        c.copy(appId = x)\n      }\n      opt[String](\"channel\") action { (x, c) =>\n        c.copy(channel = Some(x))\n      }\n      opt[String](\"input\") action { (x, c) =>\n        c.copy(inputPath = x)\n      }\n      opt[Unit](\"verbose\") action { (x, c) =>\n        c.copy(verbose = true)\n      }\n      opt[Unit](\"debug\") action { (x, c) =>\n        c.copy(debug = true)\n      }\n    }\n    parser.parse(args, FileToEventsArgs()) map { args =>\n      try {\n        // get channelId\n        val channels = Storage.getMetaDataChannels\n        val channelMap = channels.getByAppid(args.appId).map(c => (c.name, c.id)).toMap\n\n        val channelId: Option[Int] = args.channel.map { ch =>\n          if (!channelMap.contains(ch)) {\n            error(s\"Channel ${ch} doesn't exist in this app.\")\n            sys.exit(1)\n          }\n\n          channelMap(ch)\n        }\n\n        val channelStr = args.channel.map(n => \" Channel \" + n).getOrElse(\"\")\n\n        WorkflowUtils.modifyLogging(verbose = args.verbose)\n        @transient lazy implicit val formats = Utils.json4sDefaultFormats +\n          new EventJson4sSupport.APISerializer\n        val sc = WorkflowContext(\n          mode = \"Import\",\n          batch = \"App ID \" + args.appId + channelStr,\n          executorEnv = Runner.envStringToMap(args.env))\n        val rdd = sc.textFile(args.inputPath).filter(_.trim.nonEmpty).map { json =>\n          Try(read[Event](json)).recoverWith {\n            case e: Throwable =>\n              error(s\"\\nmalformed json => $json\")\n              Failure(e)\n          }.get\n        }\n        val events = Storage.getPEvents()\n        events.write(events = rdd,\n          appId = args.appId,\n          channelId = channelId)(sc)\n        info(\"Events are imported.\")\n        info(\"Done.\")\n\n      } finally {\n        CleanupFunctions.run()\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/accesskey.scala.txt",
    "content": "Usage: pio accesskey new [--key] <app name> [<event1> <event2>...]\n\nAdd allowed event(s) to an access key.\n\n  --key <value>\n      Specify a custom key.\n  <app name>\n      App to be associated with the new access key.\n  <event1> <event2>...\n      Allowed event name(s) to be added to the access key.\n\nUsage: pio accesskey list [<app name>]\n\n  <app name>\n      App name.\n\nUsage: pio accesskey delete <access key>\n\n  <access key>\n      The access key to be deleted.\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/adminserver.scala.txt",
    "content": "(Experimental Only!) Usage: pio adminserver [--ip <value>] [--port <value>]\n\n  --ip <value>\n      IP to bind to. Default: localhost\n  --port <value>\n      Port to bind to. Default: 7071\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/app.scala.txt",
    "content": "Usage: pio app new [--id <value>] [--description <value>] [--access-key <value>]\n                   <name>\n\nCreate a new app key to app ID mapping.\n\n  --id <value>\n      Specify this if you already have data under an app ID.\n  --description <value>\n      Description of the new app.\n  --access-key <value>\n      Specify a custom default access key.\n  <name>\n      App name.\n\n\nUsage: pio app list\n\nList all apps.\n\n\nUsage: pio app show <name>\n\nShow details of an app.\n\n  <name>\n      App name.\n\n\nUsage: pio app delete <name> [--force]\n\nName of the app to be deleted.\n\n  <name>\n      App name.\n  --force, -f\n      Delete data without prompting for confirmation\n\n\nUsage: pio app data-delete <name> [--channel <name>] [--all] [--force]\n\nDelete data of an app.\n\n  <name>\n      App name.\n  --channel <name>\n      Delete data of the specified channel (default channel if not specified)\n  --all\n      Delete all data of this app (including both default and all channels)\n  --force, -f\n      Delete data without prompting for confirmation\n\n\nUsage: pio app channel-new <name> <channel>\n\nCreate a new channel for the app.\n\n  <name>\n      App name.\n\n  <channel>\n      Channel name to be created.\n\n\nUsage: pio app channel-delete <name> <channel> [--force]\n\nDelete a channel for the app.\n\n  <name>\n      App name.\n\n  <channel>\n      Channel name to be deleted.\n  --force, -f\n      Delete data without prompting for confirmation\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/batchpredict.scala.txt",
    "content": "Usage: pio batchpredict [--input <value>]\n                        [--output <value>]\n                        [--query-partitions <value>]\n                        [--engine-instance-id <value>]\n\nUse an engine instance to process batch predictions. This command will pass all\npass-through arguments to its underlying spark-submit command. All algorithm\nclasses used in the engine must be serializable.\n\n  --input <value>\n      Path to file containing queries; a multi-object JSON file with one\n      query object per line. Accepts any valid Hadoop file URL.\n      Default: batchpredict-input.json\n  --output <value>\n      Path to file to receive results; a multi-object JSON file with one\n      object per line, the prediction + original query. Accepts any\n      valid Hadoop file URL. Actual output will be written as Hadoop\n      partition files in a directory with the output name.\n      Default: batchpredict-output.json\n  --query-partitions <value>\n      Limit concurrency of predictions by setting the number of partitions\n      used internally for the RDD of queries.\n      Default: number created by Spark context's `textFile`\n  --engine-instance-id <value>\n      Engine instance ID. Default: the latest trained instance.\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/build.scala.txt",
    "content": "Usage: pio build [--sbt-extra <value>] [--clean] [--no-asm]\n                 [common options...]\n                 \nBuild an engine at the current directory.\n\n  --sbt-extra <value>\n      Extra command to pass to SBT when it builds your engine.\n  --clean\n      Clean build.\n  --no-asm\n      Skip building external dependencies assembly.\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/dashboard.scala.txt",
    "content": "Usage: pio dashboard [--ip <value>] [--port <value>]\n\n  --ip <value>\n      IP to bind to. Default: localhost\n  --port <value>\n      Port to bind to. Default: 9000\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/deploy.scala.txt",
    "content": "Usage: pio deploy [--ip <value>] [--port <value>]\n                  [--engine-instance-id <value>]\n                  [--feedback] [--accesskey <value>]\n                  [--event-server-ip <value>] [--event-server-port <value>]\n                  [--batch <value>] [--scratch-uri <value>]\n\nDeploy an engine instance as a prediction server. This command will pass all\npass-through arguments to its underlying spark-submit command.\n\n  --ip <value>\n      IP to bind to. Default: 0.0.0.0\n  --port <value>\n      Port to bind to. Default: 8000\n  --engine-instance-id <value>\n      Engine instance ID.\n  --feedback\n      Enable feedback loop to event server.\n  --accesskey <value>\n      Access key of the App where feedback data will be stored.\n  --event-server-ip <value>\n      Event server IP. Default: 0.0.0.0\n  --event-server-port <value>\n      Event server port. Default: 7070\n  --batch <value>\n      Batch label of the deployment.\n  --scratch-uri\n      URI of the working scratch space. Specify this when you want to have all\n      necessary files transferred to a remote location. You will usually want to\n      specify this when you use --deploy-mode cluster.\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/eval.scala.txt",
    "content": "Usage: pio eval <evaluation-class> <engine-parameters-generator-class>\n                [--batch <value>]\n                [common options...]\n\nKick off an evaluation using specified evaluation and engine parameters\ngenerator class. This command will pass all pass-through arguments to its\nunderlying spark-submit command.\n\n  --batch <value>\n      Batch label of the run.\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/eventserver.scala.txt",
    "content": "Usage: pio eventserver [--ip <value>] [--port <value>] [--stats]\n\n  --ip <value>\n      IP to bind to. Default: 0.0.0.0\n  --port <value>\n      Port to bind to. Default: 7070\n  --stats\n      Enable Event Server internal statistics and its API endpoint.\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/export.scala.txt",
    "content": "Usage: pio export --appid <value> --output <value> [--format <value>] [--channel <value>]\n\nExports all events of an app to a file. If Hadoop configuration is present, the\nfile will be exported to HDFS instead of local filesystem.\n\n  --appid <value>\n      App ID of events to be exported.\n  --channel <value>\n      Channel Name (default if this is not specified)\n  --output <value>\n      Output path of the exported file.\n  --format <value>\n      The format of the exported file. Valid values are \"json\" and \"parquet\".\n      The default format is \"json\".\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/imprt.scala.txt",
    "content": "Usage: pio import --appid <value> --input <value> [--channel <value>]\n\nImports all events from a file to an app. Each line of the file should be a JSON\nobject that represent a single event. If Hadoop configuration is present, the\nfile will be imported from HDFS instead of local filesystem.\n\n  --appid <value>\n      App ID of events to be imported.\n  --channel <value>\n      Channel Name (default if this is not specified)\n  --input <value>\n      Input path of the import file.\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/main.scala.txt",
    "content": "Usage: pio <command> [options] <args>...\n\nOptions common to all commands:\n  [--pio-home <value>] [--spark-home <value>] [--sbt <value>]\n  [-ei <value>] [-ev <value>] [-ed <value>] [-v <value>]\n  [-sk | --spark-kryo] [--verbose]\n  [<args>] [-- [<args passed to Spark>] [-- [<args passed to runner]]]\n\n  --sbt <value>\n      Full path of sbt. Default: sbt\n  -ei <value> | --engine-id <value>\n      Specify an engine ID. Usually used by distributed deployment.\n  -ev <value> | --engine-version <value>\n      Specify an engine version. Usually used by distributed deployment.\n  -ed <value> | --engine-dir <value>\n      Specify path of engine directory to run build/train/deploy commands.\n  -v <value> | --variant <value>\n      Path to an engine variant JSON file. Default: engine.json\n  -sk | --spark-kryo\n      Shorthand for setting the spark.serializer property to\n      org.apache.spark.serializer.KryoSerializer.\n  --verbose\n      Enable third-party informational messages.\n\nNote that it is possible to supply pass-through arguments at the en\nof the command by using a '--' separator, e.g.\n\n  pio train -v my-variant -- --master spark://mycluster:7077\n\nIn the example above, the '--master' argument will be passed to the underlying\nspark-submit command. Please refer to the usage section for each command for\nmore information.\n\nThe most commonly used pio commands are:\n    status        Displays status information about PredictionIO\n    version       Displays the version of this command line console\n    template      Creates a new engine based on an engine template\n    build         Build an engine at the current directory\n    train         Kick off a training using an engine\n    deploy        Deploy an engine as an engine server\n    batchpredict  Process bulk predictions with an engine\n    eventserver   Launch an Event Server\n    app           Manage apps that are used by the Event Server\n    accesskey     Manage app access keys\n    export        Export events from the Event Server\n\nThe following are experimental development commands:\n    run           Launch a driver program\n    eval          Kick off an evaluation using an engine\n    dashboard     Launch an evaluation dashboard\n    adminserver   Launch an Admin Server\n\nSee 'pio help <command>' to read about a specific subcommand.\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/run.scala.txt",
    "content": "Usage: pio run [--sbt-extra <value>] [--clean] [--no-asm]\n               [common options...] <main class>\n\nLaunch a driver program. This command will pass all pass-through arguments to\nits underlying spark-submit command. In addition, it also supports a second\nlevel of pass-through arguments to the driver program, e.g.\n\n  pio run -- --master spark://localhost:7077 -- --driver-arg foo\n\n  <main class>\n      Main class name of the driver program.\n  --sbt-extra <value>\n      Extra command to pass to SBT when it builds your engine.\n  --clean\n      Clean build.\n  --no-asm\n      Skip building external dependencies assembly.\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/status.scala.txt",
    "content": "Usage: pio status\n\nDisplays status information about the PredictionIO system.\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/template.scala.txt",
    "content": "Usage: pio template list\n\nNo longer supported! Please use git to download and manage your templates.\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/train.scala.txt",
    "content": "Usage: pio train [--batch <value>] [--skip-sanity-check]\n                 [--stop-after-read] [--stop-after-prepare]\n                 [--engine-factory <value>] [--engine-params-key <value>]\n                 [--scratch-uri <value>]\n                 [common options...]\n\nKick off a training using an engine (variant) to produce an engine instance.\nThis command will pass all pass-through arguments to its underlying spark-submit\ncommand.\n\n  --batch <value>\n      Batch label of the run.\n  --skip-sanity-check\n      Disable all data sanity check. Useful for speeding up training in\n      production.\n  --stop-after-read\n      Stop the training process after DataSource.read(). Useful for debugging.\n  --stop-after-prepare\n      Stop the training process after Preparator.prepare(). Useful for\n      debugging.\n  --engine-factory\n      Override engine factory class.\n  --engine-params-key\n      Retrieve engine parameters programmatically from the engine factory class.\n  --scratch-uri\n      URI of the working scratch space. Specify this when you want to have all\n      necessary files transferred to a remote location. You will usually want to\n      specify this when you use --deploy-mode cluster.\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/upgrade.scala.txt",
    "content": "Usage: pio upgrade <from version> <to version> <old App ID> <new app ID>\n\nNo longer supported!\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/console/version.scala.txt",
    "content": "Usage: pio version\n\nDisplays the version of this command line console.\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/dashboard/index.scala.html",
    "content": "@import org.apache.predictionio.data.storage.EvaluationInstance\n@import org.apache.predictionio.tools.dashboard.DashboardConfig\n@import org.joda.time.DateTime\n@import org.joda.time.format.DateTimeFormat\n@(dc: DashboardConfig,\n  dashboardStartTime: DateTime,\n  env: Map[String, String],\n  completedInstances: Seq[EvaluationInstance])\n<!DOCTYPE html>\n<!--\n  Licensed to the Apache Software Foundation (ASF) under one or more\n  contributor license agreements.  See the NOTICE file distributed with\n  this work for additional information regarding copyright ownership.\n  The ASF licenses this file to You under the Apache License, Version 2.0\n  (the \"License\"); you may not use this file except in compliance with\n  the License.  You may obtain a copy of the License at\n\n      http://www.apache.org/licenses/LICENSE-2.0\n\n  Unless required by applicable law or agreed to in writing, software\n  distributed under the License is distributed on an \"AS IS\" BASIS,\n  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n  See the License for the specific language governing permissions and\n  limitations under the License.\n-->\n<html lang=\"en\">\n  <head>\n    <title>PredictionIO Dashboard at @{dc.ip}:@{dc.port}</title>\n    <link href=\"/assets/favicon.png\" rel=\"shortcut icon\" />\n    <link rel=\"stylesheet\" href=\"https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css\" integrity=\"sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u\" crossorigin=\"anonymous\">\n    <style type=\"text/css\">\n    .string { color: green; }\n    .number { color: darkorange; }\n    .boolean { color: blue; }\n    .null { color: magenta; }\n    .key { color: red; }\n    </style>\n    <script type=\"text/javascript\">\n      function syntaxHighlight(json) {\n        if (typeof json != 'string') {\n          json = JSON.stringify(json, undefined, 2);\n        }\n        json = json.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');\n        return json.replace(/(\"(\\\\u[a-zA-Z0-9]{4}|\\\\[^u]|[^\\\\\"])*\"(\\s*:)?|\\b(true|false|null)\\b|-?\\d+(?:\\.\\d*)?(?:[eE][+\\-]?\\d+)?)/g, function (match) {\n          var cls = 'number';\n          if (/^\"/.test(match)) {\n            if (/:$/.test(match)) {\n              cls = 'key';\n            } else {\n              cls = 'string';\n            }\n          } else if (/true|false/.test(match)) {\n            cls = 'boolean';\n          } else if (/null/.test(match)) {\n            cls = 'null';\n          }\n          return '<span class=\"' + cls + '\">' + match + '</span>';\n        });\n      }\n      function shorternClassName(className) {\n        return className.replace(/(\\w)\\w*\\./g, \"$1.\"); \n      }\n    </script>\n  </head>\n  <body>\n    <div class=\"container-fluid\">\n      <div class=\"page-header\">\n        <h1>PredictionIO Dashboard at @{dc.ip}:@{dc.port}</h1>\n        <p class=\"lead\">Started on: @{DateTimeFormat.forStyle(\"FF\").print(dashboardStartTime)}</p>\n      </div>\n      <h2>Completed Evaluations</h2>\n      <table class=\"table table-bordered table-striped\">\n        <tr>\n          <th></th>\n          <th>Evaluator Result (One-liner)</th>\n          <th>Evaluation Class</th>\n          <th>Engine Parameters Generator Class</th>\n          <th>Start Time</th>\n          <th>End Time</th>\n          <th>Batch</th>\n        </tr>\n        @for(i <- completedInstances) {\n        <tr>\n          <td>\n            <div>\n              <a href=\"/engine_instances/@{i.id}/evaluator_results.html\" class=\"btn btn-primary\">HTML</a>\n            </div>\n          </td>\n          <td>\n            <div>@{i.evaluatorResults}</div>\n          </td>\n          <td>\n            <span title=\"@{i.evaluationClass}\"><script type=\"text/javascript\">document.write(shorternClassName(\"@{i.evaluationClass}\"));</script></span>\n          </td>\n          <td>\n            <span title=\"@{i.engineParamsGeneratorClass}\"><script type=\"text/javascript\">document.write(shorternClassName(\"@{i.engineParamsGeneratorClass}\"));</script></span>\n          </td>\n          <td>@{DateTimeFormat.forStyle(\"MM\").print(i.startTime)}</td>\n          <td>@{DateTimeFormat.forStyle(\"MM\").print(i.endTime)}</td>\n          <td>@{i.batch}</td>\n        </tr>\n        }\n      </table>\n      <h2>Environment</h2>\n      <p>The following values are accurate up to the time when the dashboard was launched.</p>\n      <table class=\"table table-bordered table-striped\">\n        @for(k <- env.keys.toSeq.sorted) {\n        <tr><th>@{k}</th><td>@{env(k)}</td></tr>\n        }\n      </table>\n    </div>\n    <script src=\"https://code.jquery.com/jquery-1.12.4.min.js\" integrity=\"sha256-ZosEbRLbNQzLpnKIkEdrPv7lOy9C27hHQ+Xp8a4MxAQ=\" crossorigin=\"anonymous\"></script>\n    <script src=\"https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js\" integrity=\"sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa\" crossorigin=\"anonymous\"></script>\n  </body>\n</html>\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/itemrank/params/algorithmsJson.scala.txt",
    "content": "[\n  {\n    \"name\": \"mahoutItemBased\",\n    \"params\": {\n      \"booleanData\": true,\n      \"itemSimilarity\": \"LogLikelihoodSimilarity\",\n      \"weighted\": false,\n      \"nearestN\": 10,\n      \"threshold\": 4.9E-324,\n      \"numSimilarItems\": 50,\n      \"numUserActions\": 50,\n      \"freshness\" : 0,\n      \"freshnessTimeUnit\" : 86400\n    }\n  }\n]\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/itemrank/params/datasourceJson.scala.txt",
    "content": "{\n  \"appId\": 1,\n  \"actions\": [\n    \"view\",\n    \"like\",\n    \"dislike\",\n    \"conversion\",\n    \"rate\"\n  ],\n  \"attributeNames\": {\n    \"user\" : \"pio_user\",\n    \"item\" : \"pio_item\",\n    \"u2iActions\": [\n      \"view\",\n      \"like\",\n      \"dislike\",\n      \"conversion\",\n      \"rate\"\n    ],\n    \"itypes\" : \"pio_itypes\",\n    \"starttime\" : \"pio_starttime\",\n    \"endtime\" : \"pio_endtime\",\n    \"inactive\" : \"pio_inactive\",\n    \"rating\" : \"pio_rating\"\n  }\n}\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/itemrank/params/preparatorJson.scala.txt",
    "content": "{\n  \"actions\": {\n    \"view\": 3,\n    \"like\": 5,\n    \"dislike\": 1,\n    \"conversion\": 4,\n    \"rate\": null\n  },\n  \"conflict\": \"latest\"\n}\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/itemrank/params/servingJson.scala.txt",
    "content": "{}\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/itemrec/params/algorithmsJson.scala.txt",
    "content": "[\n  {\n    \"name\": \"ncMahoutItemBased\",\n    \"params\": {\n      \"booleanData\": true,\n      \"itemSimilarity\": \"LogLikelihoodSimilarity\",\n      \"weighted\": false,\n      \"threshold\": 4.9E-324,\n      \"nearestN\": 10,\n      \"unseenOnly\": false,\n      \"freshness\" : 0,\n      \"freshnessTimeUnit\" : 86400\n    }\n  }\n]\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/itemrec/params/datasourceJson.scala.txt",
    "content": "{\n  \"appId\": 2,\n  \"actions\": [\n    \"view\",\n    \"like\",\n    \"dislike\",\n    \"conversion\",\n    \"rate\"\n  ],\n  \"attributeNames\": {\n    \"user\" : \"pio_user\",\n    \"item\" : \"pio_item\",\n    \"u2iActions\": [\n      \"view\",\n      \"like\",\n      \"dislike\",\n      \"conversion\",\n      \"rate\"\n    ],\n    \"itypes\" : \"pio_itypes\",\n    \"starttime\" : \"pio_starttime\",\n    \"endtime\" : \"pio_endtime\",\n    \"inactive\" : \"pio_inactive\",\n    \"rating\" : \"pio_rating\"\n  }\n}\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/itemrec/params/preparatorJson.scala.txt",
    "content": "{\n  \"actions\": {\n    \"view\": 3,\n    \"like\": 5,\n    \"dislike\": 1,\n    \"conversion\": 4,\n    \"rate\": null\n  },\n  \"conflict\": \"latest\"\n}\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/itemrec/params/servingJson.scala.txt",
    "content": "{}\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/itemsim/params/algorithmsJson.scala.txt",
    "content": "[\n  {\n    \"name\": \"ncMahoutItemBased\",\n    \"params\": {\n      \"booleanData\": true,\n      \"itemSimilarity\": \"LogLikelihoodSimilarity\",\n      \"weighted\": false,\n      \"threshold\": 4.9E-324,\n      \"freshness\" : 0,\n      \"freshnessTimeUnit\" : 86400\n    }\n  }\n]\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/itemsim/params/datasourceJson.scala.txt",
    "content": "{\n  \"appId\": 2,\n  \"actions\": [\n    \"view\",\n    \"like\",\n    \"dislike\",\n    \"conversion\",\n    \"rate\"\n  ],\n  \"attributeNames\": {\n    \"user\" : \"pio_user\",\n    \"item\" : \"pio_item\",\n    \"u2iActions\": [\n      \"view\",\n      \"like\",\n      \"dislike\",\n      \"conversion\",\n      \"rate\"\n    ],\n    \"itypes\" : \"pio_itypes\",\n    \"starttime\" : \"pio_starttime\",\n    \"endtime\" : \"pio_endtime\",\n    \"inactive\" : \"pio_inactive\",\n    \"rating\" : \"pio_rating\"\n  }\n}\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/itemsim/params/preparatorJson.scala.txt",
    "content": "{\n  \"actions\": {\n    \"view\": 3,\n    \"like\": 5,\n    \"dislike\": 1,\n    \"conversion\": 4,\n    \"rate\": null\n  },\n  \"conflict\": \"latest\"\n}\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/itemsim/params/servingJson.scala.txt",
    "content": "{}\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/scala/buildSbt.scala.txt",
    "content": "@(name: String, pioVersion: String, sparkVersion: String)\nimport AssemblyKeys._\n\nassemblySettings\n\nname := \"@{name}\"\n\norganization := \"myorg\"\n\nversion := \"0.0.1-SNAPSHOT\"\n\nlibraryDependencies ++= Seq(\n  \"org.apache.predictionio\"    %% \"core\"          % \"@{pioVersion}\" % \"provided\",\n  \"org.apache.spark\" %% \"spark-core\"    % \"@{sparkVersion}\" % \"provided\")\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/scala/engineJson.scala.txt",
    "content": "@(name: String, engineFactory: String)\n{\n  \"id\": \"default\",\n  \"description\": \"@{name}\",\n  \"engineFactory\": \"@{engineFactory}\",\n  \"datasource\": {\n    \"multiplier\": 2\n  }\n}\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/scala/manifestJson.scala.txt",
    "content": "@(id: String, version: String, name: String)\n{\n  \"id\": \"@{id}\",\n  \"version\": \"@{version}\",\n  \"name\": \"@{name}\",\n  \"description\": \"@{name}\"\n  \"engineFactory\": \"\",\n  \"files\": []\n}\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/scala/project/assemblySbt.scala.txt",
    "content": "addSbtPlugin(\"com.eed3si9n\" % \"sbt-assembly\" % \"0.11.2\")\n"
  },
  {
    "path": "tools/src/main/twirl/org/apache/predictionio/tools/templates/scala/src/main/scala/engine.scala.txt",
    "content": "package myorg\n\n// Pulls in necessary PredictionIO controller components\nimport org.apache.predictionio.controller.EmptyActualResult\nimport org.apache.predictionio.controller.EmptyEvaluationInfo\nimport org.apache.predictionio.controller.IEngineFactory\nimport org.apache.predictionio.controller.LAlgorithm\nimport org.apache.predictionio.controller.LDataSource\nimport org.apache.predictionio.controller.Params\nimport org.apache.predictionio.controller.SimpleEngine\n\n// All data classes must be an instance of Serializable\nclass MyTrainingData(\n  val multiplier: Int\n) extends Serializable\n\nclass MyQuery(\n  val multiplicand: Int\n) extends Serializable\n\nclass MyModel(\n  val multiplier: Int\n) extends Serializable {\n  override def toString = s\"MyModel's multiplier: ${multiplier.toString}\"\n}\n\nclass MyPredictedResult(\n  val product: Int\n) extends Serializable\n\ncase class MyDataSourceParams(\n  val multiplier: Int\n) extends Params\n\n// Your controller components\nclass MyDataSource(val dsp: MyDataSourceParams) extends LDataSource[\n    MyTrainingData,\n    EmptyEvaluationInfo,\n    MyQuery,\n    EmptyActualResult] {\n\n  /** Implement readTraining() when you are not concerned about evaluation.\n    *\n    */\n  override def readTraining(): MyTrainingData = {\n    new MyTrainingData(dsp.multiplier)\n  }\n}\n\nclass MyAlgorithm extends LAlgorithm[\n    MyTrainingData,\n    MyModel,\n    MyQuery,\n    MyPredictedResult] {\n\n  override def train(pd: MyTrainingData): MyModel = {\n    // Our model is simply one integer...\n    new MyModel(pd.multiplier)\n  }\n\n  override def predict(model: MyModel, query: MyQuery): MyPredictedResult = {\n    new MyPredictedResult(query.multiplicand * model.multiplier)\n  }\n}\n\n/** Engine factory that pieces everything together. SimpleEngine only requires\n  * one DataSource and one Algorithm. Preparator is an identity function, and\n  * Serving simply outputs Algorithm's prediction without further processing.\n  */\nobject MyEngineFactory extends IEngineFactory {\n  override def apply() = {\n    new SimpleEngine(\n      classOf[MyDataSource],\n      classOf[MyAlgorithm])\n  }\n}\n"
  },
  {
    "path": "tools/src/test/scala/org/apache/predictionio/tools/RunnerSpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.tools\n\nimport org.specs2.mutable.Specification\n\nclass RunnerSpec extends Specification {\n  \"groupByArgumentName\" >> {\n    \"test1\" >> {\n      val test = Seq(\"--foo\", \"bar\", \"--flag\", \"--dead\", \"beef baz\", \"n00b\", \"--foo\", \"jeez\")\n      Runner.groupByArgumentName(test) must havePairs(\n        \"--foo\" -> Seq(\"bar\", \"jeez\"),\n        \"--dead\" -> Seq(\"beef baz\"))\n    }\n\n    \"test2\" >> {\n      val test =\n        Seq(\"--foo\", \"--bar\", \"flag\", \"--dead\", \"beef baz\", \"n00b\", \"--foo\", \"jeez\", \"--flag\")\n      Runner.groupByArgumentName(test) must havePairs(\n        \"--foo\" -> Seq(\"jeez\"),\n        \"--bar\" -> Seq(\"flag\"),\n        \"--dead\" -> Seq(\"beef baz\"))\n    }\n  }\n\n  \"removeArguments\" >> {\n    \"test1\" >> {\n      val test = Seq(\"--foo\", \"bar\", \"--flag\", \"--dead\", \"beef baz\", \"n00b\", \"--foo\", \"jeez\")\n      val remove = Set(\"--flag\", \"--foo\")\n      Runner.removeArguments(test, remove) === Seq(\"--flag\", \"--dead\", \"beef baz\", \"n00b\")\n    }\n\n    \"test2\" >> {\n      val test =\n        Seq(\"--foo\", \"--bar\", \"flag\", \"--dead\", \"beef baz\", \"n00b\", \"--foo\", \"jeez\", \"--flag\")\n      val remove = Set(\"--flag\", \"--foo\")\n      Runner.removeArguments(test, remove) ===\n        Seq(\"--foo\", \"--bar\", \"flag\", \"--dead\", \"beef baz\", \"n00b\", \"--flag\")\n    }\n  }\n\n  \"combineArguments\" >> {\n    \"test1\" >> {\n      val test = Seq(\"--foo\", \"bar\", \"--flag\", \"--dead\", \"beef baz\", \"n00b\", \"--foo\", \"jeez\")\n      val combinators = Map(\"--foo\" -> ((a: String, b: String) => s\"$a $b\"))\n      Runner.combineArguments(test, combinators) ===\n        Seq(\"--flag\", \"--dead\", \"beef baz\", \"n00b\", \"--foo\", \"bar jeez\")\n    }\n\n    \"test2\" >> {\n      val test =\n        Seq(\"--foo\", \"--bar\", \"flag\", \"--dead\", \"beef baz\", \"n00b\", \"--foo\", \"jeez\", \"--flag\")\n      val combinators = Map(\"--foo\" -> ((a: String, b: String) => s\"$a $b\"))\n      Runner.combineArguments(test, combinators) ===\n        Seq(\"--foo\", \"--bar\", \"flag\", \"--dead\", \"beef baz\", \"n00b\", \"--flag\", \"--foo\", \"jeez\")\n    }\n  }\n}\n"
  },
  {
    "path": "tools/src/test/scala/org/apache/predictionio/tools/admin/AdminAPISpec.scala",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one or more\n * contributor license agreements.  See the NOTICE file distributed with\n * this work for additional information regarding copyright ownership.\n * The ASF licenses this file to You under the Apache License, Version 2.0\n * (the \"License\"); you may not use this file except in compliance with\n * the License.  You may obtain a copy of the License at\n *\n *    http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage org.apache.predictionio.tools.admin\n\nimport org.specs2.mutable.Specification\nimport akka.http.scaladsl.testkit.Specs2RouteTest\n\nclass AdminAPISpec extends Specification with Specs2RouteTest {\n  val route = AdminServer.createRoute()\n\n  \"GET / request\" should {\n    \"properly produce OK HttpResponses\" in {\n      Get() ~> route ~> check {\n        response.status.intValue() shouldEqual 200\n        responseAs[String] shouldEqual \"\"\"{\"status\":\"alive\"}\"\"\"\n      }\n    }\n  }\n\n}\n"
  }
]