[
  {
    "path": ".dir-locals.el",
    "content": ";;; Directory Local Variables\n;;; See Info node `(emacs) Directory Variables' for more information.\n\n((python-mode\n  (flycheck-flake8rc . \"setup.cfg\")))\n"
  },
  {
    "path": ".dockerignore",
    "content": ".*\nDockerfile*\ndocker\n"
  },
  {
    "path": ".gitignore",
    "content": "*.pyc\n*~\nbuild\ndocs/_static/favicon.ico\ndocs/_static/logo.png\npydoop/config.py\npydoop/version.py\nsrc/hadoop*/libhdfs/config.h\nsrc/hdfs/hdfs.xcodeproj\nsrc/hdfs/hdfs/*\n\ndist\n\nexamples/**/*.class\nexamples/**/*.jar\n\ntest/timings/dataset\n\npydoop.egg-info\n\n.DS_Store\n.idea\n*.xcodeproj\n"
  },
  {
    "path": ".travis/check_script_template.py",
    "content": "\"\"\"\\\nPerform full substitution on the Pydoop script template and check\nit with flake8.\n\nAny options (i.e., arguments starting with at least a dash) are passed\nthrough to flake8.\n\"\"\"\n\nimport sys\nimport os\nimport tempfile\n\nfrom flake8.main.cli import main as flake8_main\n\n\nTHIS_DIR = os.path.dirname(os.path.abspath(__file__))\nsys.path.append(os.path.join(THIS_DIR, os.pardir, \"pydoop\", \"app\"))\nfrom script_template import DRIVER_TEMPLATE\n\n\ndef main(argv):\n    code = DRIVER_TEMPLATE.substitute(\n        module=\"module\",\n        map_fn=\"map_fn\",\n        reduce_fn=\"reduce_fn\",\n        combine_fn=\"combine_fn\",\n        combiner_wp=\"None\",\n    )\n    fd = None\n    try:\n        fd, fn = tempfile.mkstemp(suffix=\".py\", text=True)\n        os.write(fd, code.encode(\"utf-8\"))\n    finally:\n        if fd is not None:\n            os.close(fd)\n    flake8_argv = [fn] + [_ for _ in argv if _.startswith(\"-\")]\n    try:\n        flake8_main(flake8_argv)\n    finally:\n        os.remove(fn)\n\n\nif __name__ == \"__main__\":\n    argv = sys.argv[1:]\n    if set(argv).intersection([\"-h\", \"--help\"]):\n        print(__doc__)\n    else:\n        main(argv)\n"
  },
  {
    "path": ".travis/cmd/hadoop_localfs.sh",
    "content": "#!/bin/bash\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\n\nfunction onshutdown {\n    mr-jobhistory-daemon.sh stop historyserver\n    yarn-daemon.sh stop nodemanager\n    yarn-daemon.sh stop resourcemanager\n}\n\ntrap onshutdown SIGTERM\ntrap onshutdown SIGINT\n\nconf_dir=$(dirname $(dirname $(command -v hadoop)))/etc/hadoop\ncat >\"${conf_dir}\"/core-site.xml <<EOF\n<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<?xml-stylesheet type=\"text/xsl\" href=\"configuration.xsl\"?>\n<configuration>\n</configuration>\nEOF\ncat >\"${conf_dir}\"/hdfs-site.xml <<EOF\n<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<?xml-stylesheet type=\"text/xsl\" href=\"configuration.xsl\"?>\n<configuration>\n</configuration>\nEOF\n\nyarn-daemon.sh start resourcemanager\nyarn-daemon.sh start nodemanager\nmr-jobhistory-daemon.sh start historyserver\n\ntail -f /dev/null\n\nonshutdown\n"
  },
  {
    "path": ".travis/run_checks",
    "content": "#!/bin/bash\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\n\ndocker exec pydoop bash -c 'cd test && ${PYTHON} all_tests.py'\ndocker exec pydoop bash -c 'cd test/avro && ${PYTHON} all_tests.py'\ndocker exec -e DEBUG=\"${DEBUG:-}\" pydoop bash -c 'cd int_test && ./run_all'\ndocker exec -e DEBUG=\"${DEBUG:-}\" pydoop bash -c 'cd examples && ./run_all'\ndocker exec -e DEBUG=\"${DEBUG:-}\" pydoop bash -c 'cd examples/avro && ./run'\n"
  },
  {
    "path": ".travis/start_container",
    "content": "#!/bin/bash\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\nimg=crs4/pydoop:${HADOOP_VERSION}-${TRAVIS_PYTHON_VERSION}\n\npushd \"${this_dir}\"\ncmd_dir=$(readlink -e \"cmd\")\npushd ..\ndocker build . \\\n  --build-arg hadoop_version=${HADOOP_VERSION} \\\n  --build-arg python_version=${TRAVIS_PYTHON_VERSION} \\\n  -t ${img}\nif [ -n \"${LOCAL_FS:-}\" ]; then\n    docker run --rm --name pydoop -v \"${cmd_dir}\":/cmd:ro -d ${img} \\\n      /cmd/hadoop_localfs.sh\nelse\n    docker run --rm --name pydoop -d ${img}\n    docker exec pydoop bash -c 'until datanode_cid; do sleep 0.1; done'\nfi\npopd\npopd\n"
  },
  {
    "path": ".travis.yml",
    "content": "language: python\n\ncache: pip\n\nmatrix:\n  include:\n  - python: \"2.7\"\n    env: HADOOP_VERSION=3.2.0\n  - python: \"3.6\"\n    env: HADOOP_VERSION=2.9.2\n  - python: \"3.6\"\n    env: HADOOP_VERSION=3.2.0\n  - python: \"3.6\"\n    env: HADOOP_VERSION=3.2.0 LOCAL_FS=true\n  - python: \"3.7\"\n    env: HADOOP_VERSION=3.2.0\n    dist: xenial\n\nsudo: required\n\nservices: docker\n\nbefore_install: pip install flake8\n\n# skip installation, requirements are handled in the Docker image\ninstall: true\n\nbefore_script:\n  - flake8 -v .\n  - python .travis/check_script_template.py -v\n  - docker build -t crs4/pydoop-docs -f Dockerfile.docs .\n\nscript:\n - ./.travis/start_container\n - ./.travis/run_checks\n - docker stop pydoop\n\ndeploy:\n  provider: pypi\n  user: \"${CI_USER}\"\n  password: \"${CI_PASS}\"\n  on:\n    python: \"3.7\"\n    repo: crs4/pydoop\n    tags: true\n"
  },
  {
    "path": "AUTHORS",
    "content": "Pydoop is developed and maintained by:\n * Simone Leo <simone.leo@crs4.it>\n * Gianluigi Zanetti <gianluigi.zanetti@crs4.it>\n * Luca Pireddu <luca.pireddu@crs4.it>\n * Francesco Cabras <francesco.cabras@crs4.it>\n * Mauro Del Rio <mauro@crs4.it>\n * Marco Enrico Piras <kikkomep@crs4.it>\n\nOther contributors:\n * Cosmin Cătănoaie\n * Liam Slusser\n * Jeremy G. Kahn\n * Simon Li\n"
  },
  {
    "path": "Dockerfile",
    "content": "ARG hadoop_version=3.2.0\nARG python_version=3.6\n\nFROM crs4/pydoop-base:${hadoop_version}-${python_version}\n\nCOPY . /build/pydoop\nWORKDIR /build/pydoop\n\nRUN ${PYTHON} -m pip install --no-cache-dir --upgrade -r requirements.txt \\\n    && ${PYTHON} setup.py sdist \\\n    && ${PYTHON} -m pip install --pre dist/pydoop-$(cat VERSION).tar.gz\n"
  },
  {
    "path": "Dockerfile.client",
    "content": "ARG hadoop_version=3.2.0\nARG python_version=3.6\n\nFROM crs4/pydoop-client-base:${hadoop_version}-${python_version}\n\nCOPY . /build/pydoop\nWORKDIR /build/pydoop\n\nRUN ${PYTHON} -m pip install --no-cache-dir --upgrade -r requirements.txt \\\n    && ${PYTHON} setup.py build \\\n    && ${PYTHON} setup.py install --skip-build \\\n    && ${PYTHON} setup.py clean\n"
  },
  {
    "path": "Dockerfile.docs",
    "content": "FROM crs4/pydoop-docs-base\n\nCOPY . /build/pydoop\nWORKDIR /build/pydoop\n\nRUN ${PYTHON} -m pip install --no-cache-dir --upgrade -r requirements.txt \\\n    && ${PYTHON} setup.py build \\\n    && ${PYTHON} setup.py install --skip-build \\\n    && ${PYTHON} setup.py clean \\\n    && inkscape -z -D -f logo/logo.svg -e logo.png -w 800 2>/dev/null \\\n    && convert -resize 200x logo.png docs/_static/logo.png \\\n    && inkscape -z -D -f logo/favicon.svg -e 256.png -w 256 -h 256 2>/dev/null \\\n    && for i in 16 32 64 128; do \\\n        convert 256.png -resize ${i}x${i} ${i}.png; \\\n    done \\\n    && convert 16.png 32.png 64.png 128.png docs/_static/favicon.ico \\\n    && for a in script submit; do \\\n        ${PYTHON} dev_tools/dump_app_params --app ${a} -o docs/pydoop_${a}_options.rst; \\\n    done \\\n    && make SPHINXOPTS=\"-W\" -C docs html\n"
  },
  {
    "path": "LICENSE",
    "content": "\n                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "MANIFEST.in",
    "content": "include AUTHORS LICENSE VERSION README.md pydoop.properties requirements.txt\n\nrecursive-include src *\nrecursive-include test *\nrecursive-include examples *\nrecursive-include docs *\nrecursive-include lib *\n"
  },
  {
    "path": "README.md",
    "content": "[![Build Status](https://travis-ci.org/crs4/pydoop.png)](https://travis-ci.org/crs4/pydoop)\n\nPydoop is a Python MapReduce and HDFS API for\n[Hadoop](http://hadoop.apache.org/).\n\nCopyright 2009-2026 [CRS4](http://www.crs4.it/).\n\nTo get started, take a look at [the docs](http://crs4.github.io/pydoop/).\n"
  },
  {
    "path": "VERSION",
    "content": "2.0.0\n"
  },
  {
    "path": "dev_tools/build_deprecation_tables",
    "content": "#!/usr/bin/env python\n\n\"\"\"\nAn utility to generate mrv1 to mrv2 conversion tables.\n\nUsage::\n\n  bash$ build_deprecation_tables /opt/hadoop-2.4.1-src ./pydoop/utils/conversion_tables.py\n\n\n\"\"\"\n\nimport os, sys, re\n\nDEFAULT_DEPRECATED_PROPERTIES_APT_VM_FNAME = \\\n\"hadoop-common-project/hadoop-common/src/site/apt/DeprecatedProperties.apt.vm\"\n\n\n\nblock_separator = '||'\n\ndef extract_tables(apt_vm_fname):\n    \"\"\"Returns the deprecated-to-new-property table and its inverse as two dict(s).\"\"\"\n    with open(apt_vm_fname) as f:\n        lines = [x  for x in f.readlines() if re.match('^\\|[^\\|]', x)]\n    pairs = [p for p in [map(lambda x : x.strip(), l.split('|'))[1:] for l in lines]\n               if not p[1].startswith('NONE')]\n    return dict(pairs), dict(( (y, x) for (x, y) in pairs))\n    \n\ndef main(argv):\n    src_root = argv[0]\n    module_path = argv[1]\n    fname = os.path.join(src_root, DEFAULT_DEPRECATED_PROPERTIES_APT_VM_FNAME)\n    mrv1_to_mrv2, mrv2_to_mrv1 = extract_tables(fname)\n    with open(module_path, 'w') as f:\n        f.write('mrv1_to_mrv2=%r\\n' % mrv1_to_mrv2);\n        f.write('mrv2_to_mrv1=%r\\n' % mrv2_to_mrv1);\n\n\nif __name__ == \"__main__\":\n  main(sys.argv[1:])\n"
  },
  {
    "path": "dev_tools/bump_copyright_year",
    "content": "#!/usr/bin/env python\n\n\"\"\"\\\nSet copyright end year across the distribution.\n\"\"\"\n\nimport sys\nimport os\nimport re\nimport argparse\nimport datetime\n\n\nTHIS_YEAR = datetime.date.today().year\nTHIS_DIR = os.path.dirname(os.path.abspath(__file__))\nPATTERN = re.compile(r\"(?<=opyright 2009-)\\d+\")\n\n\ndef find_files(root_dir):\n    for d, subdirs, fnames in os.walk(root_dir, topdown=True):\n        for fn in fnames:\n            yield os.path.join(d, fn)\n        subdirs[:] = [_ for _ in subdirs if _ != \".git\"]\n\n\ndef bump_end_year(root_dir, year):\n    year = \"%d\" % year\n    for fn in find_files(root_dir):\n        if fn == os.path.abspath(__file__):\n            continue\n        print(\"processing %r\" % (fn,))\n        with open(fn, \"r\") as f:\n            try:\n                content = f.read()\n            except UnicodeDecodeError:\n                continue\n        with open(fn, \"w\") as f:\n            f.write(re.sub(PATTERN, year, content))\n\n\ndef make_parser():\n    parser = argparse.ArgumentParser(description=__doc__)\n    parser.add_argument(\"-y\", type=int, metavar=\"YYYY\", default=THIS_YEAR,\n                        help=\"copyright end year (default = current)\")\n    return parser\n\n\ndef main(argv):\n    parser = make_parser()\n    args = parser.parse_args(argv[1:])\n    repo_root = os.path.dirname(THIS_DIR)\n    bump_end_year(repo_root, args.y)\n\n\nif __name__ == \"__main__\":\n    main(sys.argv)\n"
  },
  {
    "path": "dev_tools/docker/client_side_tests/apache_2.6.0/initialize.sh",
    "content": "#!/bin/bash\n\nport=$1\nclient_id=$2\nrm_container_id=$3\nDOCKER_HOST_IP=${4:-localhost}\n#----------------------------------\nclient_name=`docker exec ${client_id} hostname`\n\n#----- Upload hadoop to the client container\nhdp_ver=hadoop-2.6.0\nhdp_tgz=${hdp_ver}.tar.gz\nif [[ ! -f ${hdp_tgz} ]]\nthen\n\thdp_url=http://mirror.nohup.it/apache/hadoop/common/${hdp_ver}/${hdp_tgz}\n\twget ${hdp_url} -O ${hdp_tgz}\nfi\n\n# copy the hadoop*.tar.gz\nscp -P${port} ${hdp_tgz} root@${DOCKER_HOST_IP}:/opt/\n\n# copy the installer script\nscp -P${port} local_client_setup.sh root@${DOCKER_HOST_IP}:.\n\n# exec and remove the installer script\nssh -p${port} root@${DOCKER_HOST_IP} './local_client_setup.sh && rm local_client_setup.sh'\n\n# copy the hadoop configuration from the resourcemanager container to the client container\necho \"Copying hadoop config from the resourcemanager container...\"\nfor c in core-site.xml mapred-site.xml yarn-site.xml\ndo\n    from=/opt/hadoop/etc/hadoop/${c}\n    to=/opt/hadoop/etc/hadoop/${c}\n    docker exec -it ${rm_container_id} scp ${from} ${client_name}:${to}\ndone\n\n"
  },
  {
    "path": "dev_tools/docker/client_side_tests/apache_2.6.0/local_client_setup.sh",
    "content": "#!/bin/bash\n\n#-----------\n# This script should be run in the client container.\n\n\npushd /opt\n\n#----- Hadoop setup\nhdp_ver=hadoop-2.6.0\nhdp_tgz=${hdp_ver}.tar.gz\ntar xzf ${hdp_tgz}\nln -s ./${hdp_ver} hadoop\ncat <<EOF  > /opt/hadoop/etc/hadoop/core-site.xml\n<configuration>\n<property>\n<name>fs.defaultFS</name>\n<value>hdfs://namenode:9000</value>\n</property>\n</configuration>\nEOF\n\ncat <<EOF  > /opt/hadoop/etc/hadoop/yarn-site.xml\n<configuration>\n\t<property>\n\t  <name>yarn.resourcemanager.hostname</name>\n\t  <value>resourcemanager</value>\n\t</property>\n</configuration>\nEOF\nexport HADOOP_HOME=/opt/hadoop\nexport PATH=${HADOOP_HOME}/bin:${PATH}\npopd\n\n#------------------\n# Pydoop setup\ngit_url=https://github.com/crs4/pydoop.git\n\ncat <<EOF > /home/aen/prepare_pydoop.sh\nexport HADOOP_HOME=/opt/hadoop\ngit clone ${git_url}\ncd pydoop\npython setup.py build\nEOF\n\ncat <<EOF > /home/aen/run_tests.sh\nexport HADOOP_HOME=/opt/hadoop\nexport PATH=\\${HADOOP_HOME}/bin:\\${PATH}\ncd pydoop/test\npython all_tests.py\nEOF\n\ncat <<EOF > /home/aen/run_examples.sh\nexport HADOOP_HOME=/opt/hadoop\nexport PATH=\\${HADOOP_HOME}/bin:\\${PATH}\ncd pydoop/examples\n./run_all\nEOF\n\ncat <<EOF > /home/aen/run_test_jar.sh\nexport HADOOP_HOME=/opt/hadoop\nexport PATH=\\${HADOOP_HOME}/bin:\\${PATH}\nhdfs dfs -put run_test_jar.sh\nyarn jar /opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar wordcount run_test_jar.sh foobar\nEOF\n\n\n#----------------------------------------------------\n# Fix bad sw versions and missing things\napt-get install -y zip \npip install setuptools --upgrade\n\n#su - aen -c '/bin/bash ./prepare_pydoop.sh'\n#su - aen -c '/bin/bash ./run_test_jar.sh'\n#su - aen -c '/bin/bash ./run_tests.sh'\n#su - aen -c '/bin/bash ./run_examples.sh'\n"
  },
  {
    "path": "dev_tools/docker/client_side_tests/hdp_2.2.0.0/initialize.sh",
    "content": "#!/bin/bash\n\nport=$1\nclient_id=$2\nrm_container_id=$3\nDOCKER_HOST_IP=${4:-localhost}\n#----------------------------------\nclient_name=`docker exec ${client_id} hostname`\n\n#----------------------------------\nscp -P${port} local_client_setup.sh root@${DOCKER_HOST_IP}:.\n\n# exec and remove the installer script\nssh -p${port} root@${DOCKER_HOST_IP} './local_client_setup.sh && rm local_client_setup.sh'\n\n# copy the hadoop configuration from the resourcemanager container to the client container\necho \"Copying hadoop config from the resourcemanager container...\"\nfor c in core-site.xml mapred-site.xml yarn-site.xml\ndo\n    from=/opt/hadoop/etc/hadoop/${c}\n    to=/etc/hadoop/conf/${c}\n    docker exec -it ${rm_container_id} scp ${from} ${client_name}:${to}\ndone\n\n"
  },
  {
    "path": "dev_tools/docker/client_side_tests/hdp_2.2.0.0/local_client_setup.sh",
    "content": "#!/bin/bash\n\n# This script should be run in the client container, see initialize.sh\n\n#-----------\nfunction log() {\n    echo \"$1\"\n}\n\n\nfunction install_hdp2_ubuntu_packages() {\n    local VERSION=\"${1}\"\n    local HRTWRKS_REPO=http://public-repo-1.hortonworks.com/HDP/ubuntu12/2.x\n    local HDP_LIST=${HRTWRKS_REPO}/GA/${VERSION}/hdp.list\n\n    log \"Adding repository\"\n    wget -nv ${HDP_LIST} -O /etc/apt/sources.list.d/hdp.list\n    gpg --keyserver pgp.mit.edu --recv-keys B9733A7A07513CAD && gpg -a --export 07513CAD | apt-key add -\n    apt-get update\n    apt-get install -y hadoop hadoop-hdfs libhdfs0 \\\n                       hadoop-yarn hadoop-mapreduce hadoop-client \\\n                       openssl libsnappy1 libsnappy-dev\n}\n\n\n#----- Hadoop setup\nhdp_ver=2.2.0.0\ninstall_hdp2_ubuntu_packages ${hdp_ver}\n\nexport HADOOP_HOME=/usr/hdp/current/hadoop-client\nexport PATH=${HADOOP_HOME}/bin:${PATH}\n\n#------------------\n# Pydoop setup\ngit_url=https://github.com/crs4/pydoop.git\n\ncat <<EOF > /home/aen/prepare_pydoop.sh\ngit clone ${git_url}\ncd pydoop\npython setup.py build\nEOF\n\ncat <<EOF > /home/aen/run_tests.sh\ncd pydoop/test\npython all_tests.py\nEOF\n\ncat <<EOF > /home/aen/run_examples.sh\ncd pydoop/examples\n./run_all\nEOF\n\ncat <<EOF > /home/aen/run_test_jar.sh\nhdfs dfs -put run_test_jar.sh\nyarn jar /usr/hdp/2.2.0.0-2041/hadoop-mapreduce/hadoop-mapreduce-examples.jar wordcount run_test_jar.sh foobar\nEOF\n\n\n#----------------------------------------------------\n# Fix bad sw versions and missing things\napt-get install -y zip \npip install setuptools --upgrade\n\n#su - aen -c '/bin/bash ./prepare_pydoop.sh'\n#cd /home/aen/pydoop\n#python setup.py install\n#cd\n#su - aen -c '/bin/bash ./run_test_jar.sh'\n#su - aen -c '/bin/bash ./run_tests.sh'\n#su - aen -c '/bin/bash ./run_examples.sh'\n"
  },
  {
    "path": "dev_tools/docker/cluster.rst",
    "content": "Testing pydoop using a Docker Cluster\n=====================================\n\nThe purpose of the pydoop docker cluster is to provide a full, standard, hadoop\ncluster that can be used for testing purposes. This is a \"real\" cluster, not a\npseudo-cluster single node thing.\n\nThe supported testing strategy is to do the following:\n\n #. choose and start an appropriate docker cluster;\n #. log in the 'client' node provided by the cluster;\n #. install on the client node the targeted hadoop version -- it should be\n    compatible, at the protocol level should be enough, with the cluster;\n #. install on the client node the pydoop version under test;\n #. run pydoop tests and examples.\n\n\nDocker cluster\n--------------\n\nBuild a cluster\n;;;;;;;;;;;;;;;\n\nClusters configurations are defined in subdirectories of the directory\n``clusters``, e.g., ``clusters/apache_2.6.0``.\n\nDo the following to build all the cluster independent images::\n\n  $ cd clusters\n  $ ../scripts/build_base_images.sh\n  \nNext, build all the cluster dependent images::\n\n  $ ../scripts/build_cluster_images.sh apache_2.6.0\n\nwhere we have used ``apache_2.6.0`` as an example.\n\n\nRun a cluster\n;;;;;;;;;;;;;\n\nTo start a cluster, do the following::\n\n  $ ../scripts/start_cluster.sh apache_2.6.0\n  No stopped containers\n  Creating apache260_zookeeper_1...\n  Creating apache260_bootstrap_1...\n  Creating apache260_client_1...\n  Creating apache260_namenode_1...\n  Creating apache260_datanode_1...\n  Creating apache260_historyserver_1...\n  Creating apache260_resourcemanager_1...\n  Creating apache260_nodemanager_1...\n\nThe script attemps to clean up left-overs from previous runs. Thus if it is not\nthe first time you have run it, it will ask for your permission to rm old containers::\n\n  $ ../scripts/start_cluster.sh apache_2.6.0\n  Stopping apache260_nodemanager_1...\n  Stopping apache260_resourcemanager_1...\n  Stopping apache260_historyserver_1...\n  Stopping apache260_datanode_1...\n  Stopping apache260_namenode_1...\n  Stopping apache260_client_1...\n  Stopping apache260_zookeeper_1...\n  Going to remove apache260_nodemanager_1, apache260_resourcemanager_1, apache260_historyserver_1, apache260_client_1, apache260_datanode_1, apache260_namenode_1, apache260_bootstrap_1, apache260_zookeeper_1\n  Are you sure? [yN] y\n  Removing apache260_zookeeper_1...\n  Removing apache260_bootstrap_1...\n  Removing apache260_client_1...\n  Removing apache260_namenode_1...\n  Removing apache260_datanode_1...\n  Removing apache260_historyserver_1...\n  Removing apache260_resourcemanager_1...\n  Removing apache260_nodemanager_1...\n  Moved logs to logs.backup.12522\n  Moved local to local.backup.12522\n  Creating apache260_zookeeper_1...\n  Creating apache260_bootstrap_1...\n  Creating apache260_client_1...\n  Creating apache260_namenode_1...\n  Creating apache260_datanode_1...\n  Creating apache260_historyserver_1...\n  Creating apache260_resourcemanager_1...\n  Creating apache260_nodemanager_1...\n\n\nTo check how the cluster is doing, look at the logs of the bootstrap node::\n\n  $ cd apache_2.6.0\n  $ docker-compose logs bootstrap\n  Attaching to apache260_bootstrap_1\n  bootstrap_1 | INFO:root:Starting bootstrap.\n  bootstrap_1 | INFO:root:Waiting for /etc/hosts to update on bootstrap\n  bootstrap_1 | INFO:root:Waiting for /etc/hosts to update on bootstrap\n  bootstrap_1 | ....\n  bootstrap_1 | INFO:root:Waiting for /etc/hosts to update on bootstrap\n  bootstrap_1 | INFO:kazoo.client:Connecting to zookeeper:2181\n  bootstrap_1 | INFO:kazoo.client:Zookeeper connection established, state: CONNECTED\n  bootstrap_1 | INFO:root:Booting namenode\n  bootstrap_1 | INFO:root:\tdone.\n  bootstrap_1 | INFO:root:Booting datanode\n  bootstrap_1 | INFO:root:\tdone.\n  bootstrap_1 | Creating /mr-history/tmp\n  bootstrap_1 | Creating /mr-history/done\n  bootstrap_1 | Setting ownership (mapred:hadoop) and permissions for /mr-history\n  bootstrap_1 | INFO:root:Booting resourcemanager\n  bootstrap_1 | INFO:root:\tdone.\n  bootstrap_1 | INFO:root:Booting nodemanager\n  bootstrap_1 | INFO:root:\tdone.\n  bootstrap_1 | INFO:root:Booting historyserver\n  bootstrap_1 | INFO:root:\tdone.\n  bootstrap_1 | INFO:root:Done with bootstrap.\n  apache260_bootstrap_1 exited with code 0\n\nThen check:\n\n  #. the namenode, ``http://localhost:50070``, it should be up and reporting a\n     datanode;\n  #. the resourcemanager, ``http://localhost:8088``, it should be up and reporting a\n     nodemanager;\n  #. the historyserver, ``http://localhost:19888``.\n\n\nHow to use a docker cluster\n---------------------------\n\nThese are the basic steps.\n\nChange directory to ``client_side_tests``, choose a specific distribution, say\n``apache_2.6.0`` and ``cd`` to that directory.\n\nRun the following command::\n\n  $ ../../scripts/start_client.sh [<PORT>]\n\nThe script will create a new docker container with a cluster client node that\nwill respond to ssh connections on port ``PORT``, with 3333 as its default\nvalue.  The ``start_client.sh`` script will execute the bash script\n``initialize.sh``, see the provided client side tests for examples, to install\non the client container the appropriate hadoop distribution, needed software,\nand a set of utility scripts.\n\n.. note::\n\n  You will probably have to answer twice 'yes' to ssh paranoia.\n\n\nLog in on the client, install pydoop and run the tests::\n\n  $ ssh -p 3333 root@localhost\n    Linux minas-morgul 3.18.7-gentoo #1 SMP Mon Feb 23 17:39:58 PST 2015 x86_64\n    \n    The programs included with the Debian GNU/Linux system are free software;\n    the exact distribution terms for each program are described in the\n    individual files in /usr/share/doc/*/copyright.\n\n    Debian GNU/Linux comes with ABSOLUTELY NO WARRANTY, to the extent\n    permitted by applicable law.\n    root@client:~# su - aen -c \"bash -x prepare_pydoop.sh\"\n    root@client:~# cd /home/aen/pydoop/\n    root@client:~# python setup.py install\n    root@client:~# cd\n    root@client:~# su - aen -c \"bash -x run_tests.sh\"\n    root@client:~# su - aen -c \"bash -x run_examples.sh\"    \n\nDetails\n-------\n\nBootstrap strategy\n;;;;;;;;;;;;;;;;;\n\nThe main synchronization issues are:\n\n #. All hosts should be able to resolve logical names to IP, e.g., namenode\n   wants to resolve datenodes' IP to their logical names\n\n #. Part of inter-services communication is handled by using shared hdfs\n   directories that should be accessible with the appropriate permissions as a\n   pre-condition to service firing up.\n\n\nThe bootstrap strategy is as follows.\n\n #. There is an external mechanism -- here is the script\n    ``../scripts/share_etc_hosts.py``, but it should really be integrated in\n    docker-compose -- that guarantees that all nodes have in their ``/etc/hosts``\n    entries for all nodes in the group.  We need to have an external mechanism\n    that can talk to the docker server to be sure that we got all the nodes\n    involved.\n\n #. We have a zookeeper node that is guaranteed to be fired before any other\n    service by having all other nodes linked to it in the docker-compose.yml\n    file.\n\n #. We have an auxiliary service, bootstrap, that is in charge of orchestrating\n    the system bootstrap.\n\n #. The expected bootstrap workflow is as follows.\n\n   a. docker-compose starts\n   b. all services (except zookeeper and bootstrap) wait until\n      ``zookeeper:/<servicename>`` is set to ``boot``\n   c. bootstrap then does the following:\n      \n      1. waits until its /etc/hosts  has been changed;\n      2. sets ``/{namenode,datanode}`` to boot;\n      3. waits until namenode sets the ``/namenode`` to ``up``;\n      4. creates the needed hdfs dirs with appropriate permissions;\n      5. sets ``/{resourcemanager,nodemanager,historyserver}`` to ``boot``;\n      6. dies gracefully.\n\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/docker-compose.yml",
    "content": "zookeeper:\n  image: crs4_pydoop/apache_2.6.0_zookeeper:latest\n  name: zookeeper\n  hostname: zookeeper\n  ports:\n    - \"2181:2181\"\n\nbootstrap:\n  image: crs4_pydoop/apache_2.6.0_bootstrap:latest\n  name: bootstrap\n  hostname: bootstrap\n  links:\n    - zookeeper\n    \nnamenode:\n  image: crs4_pydoop/apache_2.6.0_namenode:latest\n  name: namenode\n  hostname: namenode\n  volumes:\n    - ./logs:/tmp/logs\n  links:\n    - zookeeper\n  ports:\n    - \"9000:9000\"\n    - \"50070:50070\"\n\ndatanode:\n  image: crs4_pydoop/apache_2.6.0_datanode:latest\n  name: datanode\n  hostname: datanode\n  volumes_from:\n    - namenode\n  links:\n    - zookeeper\n  ports:\n    - \"50020:50020\"        \n    \nresourcemanager:\n  image: crs4_pydoop/apache_2.6.0_resourcemanager:latest\n  name: resourcemanager\n  hostname: resourcemanager\n  volumes_from:\n    - namenode\n  links:\n    - zookeeper\n  ports:\n    - \"8088:8088\"\n    - \"8021:8021\"    \n    - \"8031:8031\"\n    - \"8033:8033\"    \n\nhistoryserver:\n  image: crs4_pydoop/apache_2.6.0_historyserver:latest\n  name: historyserver\n  hostname: historyserver\n  volumes_from:\n    - namenode\n  links:\n    - zookeeper\n  ports:\n    - \"10020:10020\"\n    - \"19888:19888\"\n\nnodemanager:\n  image: crs4_pydoop/apache_2.6.0_nodemanager:latest\n  name: nodemanager\n  hostname: nodemanager\n  links:\n    - zookeeper\n  ports:\n    - \"8042:8042\"\n  volumes_from:\n    - namenode\n    - client\n    \nclient:\n  image: crs4_pydoop/client:latest\n  name: client\n  hostname: client\n  ports:\n    - \"2222:22\"\n  volumes:\n    - ./local:/usr/local\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/base/Dockerfile",
    "content": "#----------------------------------------------------\nFROM crs4_pydoop/base:latest\n\n# ------------------------------------------------------------------\n# Get zookeeper\n     \nENV zoo_ver zookeeper-3.4.6\nENV zoo_tgz ${zoo_ver}.tar.gz\nENV zoo_site http://mirror.nohup.it/apache/zookeeper\nENV zoo_tgz_site ${zoo_site}/${zoo_ver}\n\nRUN wget ${zoo_tgz_site}/${zoo_tgz} -O ${zoo_tgz} && \\\n    mkdir -p /opt && tar -C /opt -xzf ${zoo_tgz} && rm -f ${zoo_tgz} && \\\n    ln -s /opt/${zoo_ver} /opt/zookeeper\n\nENV ZOO_DATA_DIR      /data/zookeeper/data\nENV ZOO_CLIENT_PORT   2181\n\nEXPOSE ${ZOO_CLIENT_PORT}\n\nRUN mkdir -p ${ZOO_DATA_DIR}\nRUN echo \"tickTime=2000\"                  > /opt/zookeeper/conf/zoo.cfg && \\\n    echo \"dataDir ${ZOO_DATA_DIR}\"       >> /opt/zookeeper/conf/zoo.cfg && \\\n    echo \"clientPort ${ZOO_CLIENT_PORT}\" >> /opt/zookeeper/conf/zoo.cfg && \\\n    echo 1 > ${ZOO_DATA_DIR}/myid\n\n# Note that we are forcing the installation into dist-packages,\n# so that it will be possible to share kazoo and externally mount /usr/local later.\nRUN pip install kazoo -t /usr/lib/python2.7/dist-packages\nCOPY scripts/zk_wait.py /tmp/\nCOPY scripts/zk_set.py /tmp/\n# -----------------------------------------------------------------\n# Get hadoop\n\nENV hdp_ver hadoop-2.6.0\nENV hdp_tgz ${hdp_ver}.tar.gz\nENV hdp_site http://mirror.nohup.it/apache/hadoop/common\nENV hdp_tgz_site ${hdp_site}/hadoop-2.6.0\n\nRUN wget ${hdp_tgz_site}/${hdp_tgz} -O ${hdp_tgz} && \\\n    mkdir -p /opt && tar -C /opt -xzf ${hdp_tgz} && rm -f ${hdp_tgz} && \\\n    ln -s /opt/${hdp_ver} /opt/hadoop\n\n# ------------------------------------------------------------------\n# User:Group\t   Daemons\n# hdfs:hadoop\t   NameNode, Secondary NameNode, JournalNode, DataNode\n# yarn:hadoop\t   ResourceManager, NodeManager\n# mapred:hadoop\t MapReduce JobHistory Server\n\nENV HADOOP_GROUP hadoop\nENV HDFS_USER hdfs\nENV YARN_USER yarn\nENV MAPRED_USER mapred\n\nENV HDP_DATA_ROOT /data/hadoop\nENV LOG_DIR_ROOT /tmp/logs\nENV HADOOP_TMP_DIR /tmp\n\nENV HADOOP_CONF_DIR  /opt/hadoop/etc/hadoop\n\nENV DFS_NAME_DIR ${HDP_DATA_ROOT}/hdfs/nn\nENV DFS_DATA_DIR ${HDP_DATA_ROOT}/hdfs/dn\nENV DFS_CHECKPOINT_DIR   ${HDP_DATA_ROOT}/hdfs/snn\nENV HDFS_LOG_DIR ${LOG_DIR_ROOT}/hdfs\nENV HDFS_PID_DIR ${HDP_DATA_ROOT}/pid/hdfs\n\nENV YARN_LOCAL_DIR ${HDP_DATA_ROOT}/yarn\nENV YARN_LOG_DIR ${LOG_DIR_ROOT}/yarn\nENV YARN_LOCAL_LOG_DIR ${YARN_LOCAL_DIR}/userlogs\nENV YARN_PID_DIR ${HDP_DATA_ROOT}/pid/yarn\n\nENV YARN_REMOTE_APP_LOG_DIR   /app-logs\n\nENV MAPRED_LOG_DIR   ${LOG_DIR_ROOT}/mapred\nENV MAPRED_PID_DIR   ${HDP_DATA_ROOT}/pid/mapred\n\nENV MAPRED_JH_ROOT_DIR /mr-history\nENV MAPRED_JH_INTERMEDIATE_DONE_DIR ${MAPRED_JH_ROOT_DIR}/tmp\nENV MAPRED_JH_DONE_DIR ${MAPRED_JH_ROOT_DIR}/done\n\n#----------------------------------------------------------\n\n# Create groups and users\nRUN groupadd ${HADOOP_GROUP} && \\\n    useradd -g ${HADOOP_GROUP} ${HDFS_USER} && \\\n    useradd -g ${HADOOP_GROUP} ${YARN_USER} && \\\n    useradd -g ${HADOOP_GROUP} ${MAPRED_USER}\n\n# Create DATA_DIR_ROOT\nRUN mkdir -p ${HDP_DATA_ROOT} && \\\n    chmod -R 755 ${HDP_DATA_ROOT}\n\n# Create LOG_DIR_ROOT\nRUN mkdir -p ${LOG_DIR_ROOT} && \\\n    chmod -R 1777 ${LOG_DIR_ROOT}\n\t\nRUN mkdir -p ${HADOOP_CONF_DIR}\n\t\n### HDFS DIRs ###########################################################\n\n# DataNode\nRUN mkdir -p ${DFS_DATA_DIR} && \\\n    chown -R ${HDFS_USER}:${HADOOP_GROUP} ${DFS_DATA_DIR} && \\\n    chmod -R 750 ${DFS_DATA_DIR}\n\n# NameNode\nRUN mkdir -p ${DFS_NAME_DIR} && \\\n    chown -R ${HDFS_USER}:${HADOOP_GROUP} ${DFS_NAME_DIR} && \\\n    chmod -R 755 ${DFS_NAME_DIR}\n\t\n# HDFS log dir\nRUN\tmkdir -p ${HDFS_LOG_DIR} && \\\n    chown -R ${HDFS_USER}:${HADOOP_GROUP} ${HDFS_LOG_DIR} && \\\n    chmod -R 750 ${HDFS_LOG_DIR}\n\t\n# HDFS pid dir\t\nRUN mkdir -p ${HDFS_PID_DIR} && \\\n    chown -R ${HDFS_USER}:${HADOOP_GROUP} ${HDFS_PID_DIR} && \\\n    chmod -R 750 ${HDFS_PID_DIR}\n\n#\nRUN mkdir -p ${DFS_CHECKPOINT_DIR} && \\\n    chown -R ${HDFS_USER}:${HADOOP_GROUP} ${DFS_CHECKPOINT_DIR} && \\\n    chmod -R 755 ${DFS_CHECKPOINT_DIR}\n \n \n### YARN DIRs ########################################################### \n\t\n# YARN_LOCAL_DIR\nRUN mkdir -p ${YARN_LOCAL_DIR} && \\\n    chown -R ${YARN_USER}:${HADOOP_GROUP} ${YARN_LOCAL_DIR} && \\\n    chmod -R 755 ${YARN_LOCAL_DIR}\n\n# YARN log dir\nRUN mkdir -p ${YARN_LOG_DIR} && \\\n    chown -R ${YARN_USER}:${HADOOP_GROUP} ${YARN_LOG_DIR} && \\\n    chmod -R 755 ${YARN_LOG_DIR}\n\n# YARN_LOCAL_LOG_DIR\nRUN mkdir -p ${YARN_LOCAL_LOG_DIR} && \\\n    chown -R ${YARN_USER}:${HADOOP_GROUP} ${YARN_LOCAL_LOG_DIR} && \\\n    chmod -R 755 ${YARN_LOCAL_LOG_DIR}\n\n# YARN pid dir\nRUN mkdir -p $YARN_PID_DIR && \\\n    chown -R $YARN_USER:$HADOOP_GROUP $YARN_PID_DIR && \\\n    chmod -R 755 $YARN_PID_DIR\n\t\n\t\n### MAPRED DIRs ##########################################################\t\n\n# MAPRED log dir\nRUN mkdir -p $MAPRED_LOG_DIR && \\\n    chown -R $MAPRED_USER:$HADOOP_GROUP $MAPRED_LOG_DIR && \\\n    chmod -R 755 $MAPRED_LOG_DIR\n\t\n# MAPRED pid dir\t\nRUN mkdir -p $MAPRED_PID_DIR && \\\n    chown -R $MAPRED_USER:$HADOOP_GROUP $MAPRED_PID_DIR && \\\n    chmod -R 755 $MAPRED_PID_DIR\n\nRUN mkdir -p $ ${YARN_REMOTE_APP_LOG_DIR} && \\\n    chown -R ${YARN_USER}:${HADOOP_GROUP} ${YARN_REMOTE_APP_LOG_DIR} && \\\n    chmod -R 777 ${YARN_REMOTE_APP_LOG_DIR}\n\n\nCOPY scripts/generate_conf_files.py /tmp/\nRUN python2.7 /tmp/generate_conf_files.py ${HADOOP_CONF_DIR}\n\nENV HADOOP_HOME /opt/hadoop\nENV PATH ${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:$PATH\n\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/base/scripts/generate_conf_files.py",
    "content": "import sys\nimport os\nimport xml.etree.cElementTree as ET\n\n\ndef add_property(conf, name, value):\n    prop = ET.SubElement(conf, 'property')\n    ET.SubElement(prop, 'name').text = name\n    ET.SubElement(prop, 'value').text = value\n\n\ndef write_xml(root, fname):\n    tree = ET.ElementTree(root)\n    with open(fname, 'w') as f:\n        f.write('<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n')\n        f.write('<?xml-stylesheet type=\"text/xsl\" href=\"configuration.xsl\"?>')\n        tree.write(f)\n\n\ndef generate_xml_conf_file(fname, props):\n    root = ET.Element(\"configuration\")\n    for name, value in props:\n        add_property(root, name, value)\n    write_xml(root, fname)\n\n\ndef generate_core_site(fname):\n    hostname = 'namenode'\n    generate_xml_conf_file(fname, (\n        ('fs.defaultFS', 'hdfs://%s:8020' % hostname),\n        ('hadoop.tmp.dir', 'file://' + os.environ['HADOOP_TMP_DIR'])\n    ))\n\n\ndef generate_hdfs_site(fname):\n    generate_xml_conf_file(fname, (\n        ('dfs.replication', '1'),\n        ('dfs.namenode.name.dir', 'file://' + os.environ['DFS_NAME_DIR']),\n        ('dfs.datanode.data.dir', 'file://' + os.environ['DFS_DATA_DIR']),\n        ('dfs.namenode.checkpoint.dir', os.environ['DFS_CHECKPOINT_DIR']),\n        ('dfs.namenode.checkpoint.edits.dir',\n            os.environ['DFS_CHECKPOINT_DIR']),\n    ))\n\n\ndef generate_yarn_site(fname):\n    generate_xml_conf_file(fname, (\n        ('yarn.resourcemanager.hostname', 'resourcemanager'),\n        ('yarn.nodemanager.hostname', 'nodemanager'),\n        ('yarn.nodemanager.aux-services', 'mapreduce_shuffle'),\n        ('yarn.nodemanager.aux-services.mapreduce.shuffle.class',\n            'org.apache.hadoop.mapred.ShuffleHandler'),\n        # seconds to delay before deleting application\n        # localized logs and files. > 0 if debugging.\n        ('yarn.nodemanager.delete.debug-delay-sec', '600'),\n        ('yarn.nodemanager.log-dirs',\n            'file://' + os.environ['YARN_LOCAL_LOG_DIR']),\n        ('yarn.log.dir', os.environ['YARN_LOG_DIR']),\n        ('yarn.nodemanager.remote-app-log-dir',\n            os.environ['YARN_REMOTE_APP_LOG_DIR']),\n        ('yarn.log-aggregation-enable', 'true'),\n        # ('yarn.log-aggregation.retain-seconds', '360000'),\n        # ('yarn.log-aggregation.retain-check-interval-seconds', '360'),\n        # ('yarn.log.server.url', 'http://historyserver:19888'),\n    ))\n\n\ndef generate_mapred_site(fname):\n    generate_xml_conf_file(fname, (\n        ('mapreduce.framework.name', 'yarn'),\n\n        # MRv1\n        ('mapreduce.jobtracker.address', 'resourcemanager:8021'),\n        ('mapreduce.jobtracker.http.address', 'resourcemanager:50030'),\n        ('mapreduce.tasktracker.http.address', 'nodemanager:50060'),\n\n        # History Server\n        ('mapreduce.jobhistory.address', 'historyserver:10020'),\n        ('mapreduce.jobhistory.webapp.address', 'historyserver:19888'),\n        ('mapreduce.jobhistory.intermediate-done-dir',\n            os.environ['MAPRED_JH_INTERMEDIATE_DONE_DIR']),\n        ('mapreduce.jobhistory.done-dir', os.environ['MAPRED_JH_DONE_DIR']),\n    ))\n\n\ndef generate_capacity_scheduler(fname):\n    generate_xml_conf_file(fname, (\n        ('yarn.scheduler.capacity.resource-calculator',\n         'org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator'),\n        ('yarn.scheduler.capacity.root.queues', 'default'),\n        ('yarn.scheduler.capacity.root.default.capacity', '100'),\n        ('yarn.scheduler.capacity.root.default.user-limit-factor', '1'),\n        ('yarn.scheduler.capacity.root.default.maximum-capacity', '100'),\n        ('yarn.scheduler.capacity.root.default.state', 'RUNNING'),\n        ('yarn.scheduler.capacity.root.default.acl_submit_applications', '*'),\n        ('yarn.scheduler.capacity.root.default.acl_administer_queue', '*'),\n        ('yarn.scheduler.capacity.node-locality-delay', '40')))\n\n\ndef main(argv):\n    target_dir = argv[1]\n    generate_core_site(os.path.join(target_dir, 'core-site.xml'))\n    generate_hdfs_site(os.path.join(target_dir, 'hdfs-site.xml'))\n    generate_yarn_site(os.path.join(target_dir, 'yarn-site.xml'))\n    generate_mapred_site(os.path.join(target_dir, 'mapred-site.xml'))\n    generate_capacity_scheduler(os.path.join(target_dir,\n                                             'capacity-scheduler.xml'))\n\n\nmain(sys.argv)\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/base/scripts/zk_set.py",
    "content": "import sys\nimport os\nfrom kazoo.client import KazooClient\n\nimport logging\nlogging.basicConfig()\n\nlogger = logging.getLogger()\nlogger.setLevel(logging.INFO)\n\n\nkz = KazooClient('zookeeper', int(os.environ['ZOO_CLIENT_PORT']))\n\npath = '/' + sys.argv[1]\nvalue = sys.argv[2]\n\nkz.start()\nkz.set(path, value)\nkz.stop()\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/base/scripts/zk_wait.py",
    "content": "import sys\nimport os\nimport time\nfrom kazoo.client import KazooClient\n\nimport logging\nlogging.basicConfig()\n\nlogger = logging.getLogger()\nlogger.setLevel(logging.INFO)\n\nhost = 'zookeeper'\nport = int(os.environ['ZOO_CLIENT_PORT'])\nlogger.info('Starting on %s:%d', host, port)\n\nkz = KazooClient(host, port)\n\npath = '/' + sys.argv[1]\nlogger.info('Path is %s', path)\n\ndone = False\nwhile not done:\n    kz.start(timeout=15)\n    done = kz.exists(path)\n    kz.stop()\n    time.sleep(10)\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/bootstrap/Dockerfile",
    "content": "#----------------------------------------------------\nFROM crs4_pydoop/apache_2.6.0_base:latest\n\n\nCOPY scripts/bootstrap.py /tmp/\nCOPY scripts/create_hdfs_dirs.sh /tmp/\n\nCMD [\"/usr/bin/python\", \"/tmp/bootstrap.py\"]\n    \n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/bootstrap/scripts/bootstrap.py",
    "content": "from kazoo.client import KazooClient\nimport os\nimport time\nimport logging\nimport platform\n\nlogging.basicConfig()\n\nlogger = logging.getLogger()\nlogger.setLevel(logging.INFO)\n\n\n# FIXME this will break if our name is a substring of the hosts we are linked\n# to.\ndef etc_updated():\n    hostname = platform.node()\n    logger.info('Waiting for /etc/hosts to update on %s', hostname)\n    if not hostname:\n        raise RuntimeError('hostname is undefined')\n    with open('/etc/hosts') as f:\n        return sum(x.find(hostname) > -1 for x in f) > 1\n    logger.info('\\tdone')\n\n\ndef boot_node(kz, nodename):\n    logger.info('Booting %s', nodename)\n    path = '/' + nodename\n    kz.create(path, 'boot')\n    while kz.get(path)[0] != 'up':\n        time.sleep(2)\n    logger.info('\\tdone.')\n\n\ndef main():\n    logger.info('Starting bootstrap.')\n    zookeeper_host = 'zookeeper'\n    zookeeper_port = int(os.environ['ZOO_CLIENT_PORT'])\n    while not etc_updated():\n        time.sleep(1)\n    kz = KazooClient(hosts='%s:%d' % (zookeeper_host, zookeeper_port))\n    kz.start()\n    boot_node(kz, 'namenode')\n    boot_node(kz, 'datanode')\n    os.system('bash /tmp/create_hdfs_dirs.sh')\n    boot_node(kz, 'resourcemanager')\n    boot_node(kz, 'nodemanager')\n    boot_node(kz, 'historyserver')\n    logger.info('Done with bootstrap.')\n\n\nmain()\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/bootstrap/scripts/create_hdfs_dirs.sh",
    "content": "#!/bin/bash\n\nexport HADOOP_LOG_DIR=${HDFS_LOG_DIR}\nexport HADOOP_PID_DIR=${HDFS_PID_DIR}\n\nHADOOP_BIN=${HADOOP_HOME}/bin\n\n\n# su ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -mkdir -p ${YARN_REMOTE_APP_LOG_DIR}\"\n# su ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -chown -R ${YARN_USER}:${HADOOP_GROUP} ${YARN_REMOTE_APP_LOG_DIR}\"\n# su ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -chmod -R ${YARN_REMOTE_APP_LOG_DIR}\"\n\n#for d in ${MAPRED_JH_DONE_DIR} ${MAPRED_JH_INTERMEDIATE_DONE_DIR}\n# do\n#     su ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -mkdir -p ${d}\"\n#     su ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -chown -R ${MAPRED_USER}:${HADOOP_GROUP} ${d}\"\n#     su ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -chmod -R 777 ${d}\"\n# done\n\nsu ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -mkdir -p /tmp\"\nsu ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -chmod -R 1777 /tmp\"\n\necho \"Creating /tmp/hadoop-yarn (owner ${MAPRED_USER}:${HADOOP_GROUP})\"\nsu ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -mkdir -p /tmp/hadoop-yarn/staging\"\n#su ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -mkdir -p /tmp/hadoop-yarn/staging/history/tmp\"\nsu ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -chown -R ${MAPRED_USER}:${HADOOP_GROUP} /tmp/hadoop-yarn\"\nsu ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -chmod -R 1777 /tmp/hadoop-yarn\"\n\n\necho \"Creating ${MAPRED_JH_INTERMEDIATE_DONE_DIR}\"\nsu ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -mkdir -p ${MAPRED_JH_INTERMEDIATE_DONE_DIR}\"\necho \"Creating ${MAPRED_JH_DONE_DIR}\"\nsu ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -mkdir -p ${MAPRED_JH_DONE_DIR}\"\necho \"Setting ownership (${MAPRED_USER}:${HADOOP_GROUP}) and permissions for ${MAPRED_JH_ROOT_DIR}\"\nsu ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -chown -R ${MAPRED_USER}:${HADOOP_GROUP} ${MAPRED_JH_ROOT_DIR}\"\nsu ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -chmod -R 1777 ${MAPRED_JH_ROOT_DIR}\"\n\n\nsu ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -mkdir -p /user/${UNPRIV_USER}\"\nsu ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -chown ${UNPRIV_USER} /user/${UNPRIV_USER}\"\n\nsu ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -mkdir -p /user/${MAPRED_USER}\"\nsu ${HDFS_USER} -c \"${HADOOP_BIN}/hdfs dfs -chown ${MAPRED_USER} /user/${MAPRED_USER}\"\nsu ${MAPRED_USER} -c \"${HADOOP_BIN}/hdfs dfs -mkdir /user/${MAPRED_USER}/logs\"\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/datanode/Dockerfile",
    "content": "#----------------------------------------------------\nFROM crs4_pydoop/apache_2.6.0_base:latest\n\n#\nEXPOSE  50020\n\nCOPY scripts/start_datanode.sh /tmp/\n\nCMD [\"/bin/bash\", \"/tmp/start_datanode.sh\"]\n\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/datanode/scripts/start_datanode.sh",
    "content": "#!/bin/bash\n\n#--- manage_deamon stardard\nexport HADOOP_LOG_DIR=${HDFS_LOG_DIR}\nexport HADOOP_PID_DIR=${HDFS_PID_DIR}\n\npython /tmp/zk_wait.py datanode\n\nsu - ${HDFS_USER} -p -c \"${HADOOP_HOME}/sbin/hadoop-daemon.sh --config ${HADOOP_CONF_DIR} start datanode\"\n\n# FIXME\npython /tmp/zk_set.py datanode up\n\necho \"Log is  ${HDFS_LOG_DIR}/*datanode-${HOSTNAME}.out\"\n\ntail -f ${HDFS_LOG_DIR}/*datanode-${HOSTNAME}.out\n\n\n\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/historyserver/Dockerfile",
    "content": "#----------------------------------------------------\nFROM crs4_pydoop/apache_2.6.0_base:latest\n\n#\nEXPOSE 10020 19888\n\nCOPY scripts/start_historyserver.sh /tmp/\n\nCMD [\"/bin/bash\", \"/tmp/start_historyserver.sh\"]\n\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/historyserver/scripts/start_historyserver.sh",
    "content": "#!/bin/bash\n\npython /tmp/zk_wait.py historyserver\n\n# we should actually check that the nodemanager is up ...\npython /tmp/zk_set.py historyserver up\n\nexport HADOOP_JHS_LOGGER=DEBUG,JSA\n\nsu ${MAPRED_USER} -c \"${HADOOP_HOME}/bin/mapred --config ${HADOOP_CONF_DIR} historyserver 2>&1 >/tmp/logs/historyserver.out\"\n\n\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/namenode/Dockerfile",
    "content": "#----------------------------------------------------\nFROM crs4_pydoop/apache_2.6.0_base:latest\n\n# HDFS WebUI and HDFS default port\nEXPOSE  50070 9000\n\nCOPY scripts/start_namenode.sh /tmp/\n\nCMD [\"/bin/bash\", \"/tmp/start_namenode.sh\"]\n    \n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/namenode/scripts/start_namenode.sh",
    "content": "#!/bin/bash\n\n#--- manage_deamon stardard\nexport HADOOP_LOG_DIR=${HDFS_LOG_DIR}\nexport HADOOP_PID_DIR=${HDFS_PID_DIR}\n\npython /tmp/zk_wait.py namenode\n\nsu -l ${HDFS_USER} -c \"${HADOOP_HOME}/bin/hdfs --config ${HADOOP_CONF_DIR} namenode -format\"\n\nsu -l -p ${HDFS_USER} -c \"${HADOOP_HOME}/sbin/hadoop-daemon.sh --config ${HADOOP_CONF_DIR} start namenode\"\n\n# we should actually check that the namenode is up ...\npython /tmp/zk_set.py namenode up\n\necho \"log is ${HDFS_LOG_DIR}/*namenode-${HOSTNAME}.out\"\n\ntail -f ${HDFS_LOG_DIR}/*namenode-${HOSTNAME}.out\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/nodemanager/Dockerfile",
    "content": "#----------------------------------------------------\nFROM crs4_pydoop/apache_2.6.0_base:latest\n\n#\nEXPOSE 8042\n\nCOPY scripts/start_nodemanager.sh /tmp/\n\nCMD [\"/bin/bash\", \"/tmp/start_nodemanager.sh\"]\n\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/nodemanager/scripts/start_nodemanager.sh",
    "content": "#!/bin/bash\n\nexport YARN_LOG_DIR=${YARN_LOG_DIR}\nexport HADOOP_PID_DIR=${HDFS_PID_DIR}\n\npython /tmp/zk_wait.py nodemanager\n\n# YARN_OPTS=\"$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR\"\n# YARN_OPTS=\"$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR\"\n# YARN_OPTS=\"$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE\"\n# YARN_OPTS=\"$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE\"\n# YARN_OPTS=\"$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME\"\n# YARN_OPTS=\"$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING\"\n# YARN_OPTS=\"$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}\"\n# YARN_OPTS=\"$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}\"\n\n\nsu - ${YARN_USER} -p -c \"${HADOOP_HOME}/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start nodemanager\"\n\n# we should actually check that the nodemanager is up ...\npython /tmp/zk_set.py nodemanager up\n\necho log is ${YARN_LOG_DIR}/*nodemanager-${HOSTNAME}.out\n\ntail -f ${YARN_LOG_DIR}/*nodemanager-${HOSTNAME}.out\n\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/resourcemanager/Dockerfile",
    "content": "#----------------------------------------------------\nFROM crs4_pydoop/apache_2.6.0_base:latest\n\n#\nEXPOSE 8088 8021 8031 8032 8033\n\nCOPY scripts/start_resourcemanager.sh /tmp/\n\nCMD [\"/bin/bash\", \"/tmp/start_resourcemanager.sh\"]\n\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/resourcemanager/scripts/start_resourcemanager.sh",
    "content": "#!/bin/bash\n\nexport YARN_LOG_DIR=${YARN_LOG_DIR}\nexport HADOOP_PID_DIR=${HDFS_PID_DIR}\nexport YARN_OPTS=''\n\nexport HADOOP_MAPRED_LOG_DIR=${YARN_LOG_DIR}\n\n# YARN_OPTS=\"$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR\"\n# YARN_OPTS=\"$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR\"\n# YARN_OPTS=\"$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE\"\n# YARN_OPTS=\"$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE\"\n# YARN_OPTS=\"$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME\"\n# YARN_OPTS=\"$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING\"\n# YARN_OPTS=\"$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}\"\n# YARN_OPTS=\"$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}\"\n\n\npython /tmp/zk_wait.py resourcemanager\n\nsu - ${YARN_USER} -p -c \"${HADOOP_HOME}/sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start resourcemanager\"\n\n# su - ${MAPRED_USER} -p -c \"${HADOOP_HOME}/sbin/mr-jobhistory-daemon.sh --config ${HADOOP_CONF_DIR} start historyserver\"\n\n# we should actually check that the resourcemanager is up ...\npython /tmp/zk_set.py resourcemanager up\n\necho log is ${YARN_LOG_DIR}/*resourcemanager-${HOSTNAME}.out\n\ntail -f ${YARN_LOG_DIR}/*resourcemanager-${HOSTNAME}.out\n\n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/zookeeper/Dockerfile",
    "content": "#----------------------------------------------------\nFROM crs4_pydoop/apache_2.6.0_base:latest\n\nEXPOSE 2181\n\nCMD [\"/opt/zookeeper/bin/zkServer.sh\", \"start-foreground\"]\n    \n"
  },
  {
    "path": "dev_tools/docker/clusters/apache_2.6.0/images/zookeeper/scripts/start_namenode.sh",
    "content": "#!/bin/bash\n\n#--- manage_deamon stardard\nexport HADOOP_LOG_DIR=${HDFS_LOG_DIR}\nexport HADOOP_PID_DIR=${HDFS_PID_DIR}\n\npython /tmp/zk_wait.py namenode\n\nsu ${HDFS_USER} -c \"${HADOOP_HOME}/bin/hdfs --config ${HADOOP_CONF_DIR} namenode -format\"\n\n# we should actually check that the namenode is up ...\npython /tmp/zk_set.py namenode up\n\nsu ${HDFS_USER} -c \"${HADOOP_HOME}/bin/hdfs --config ${HADOOP_CONF_DIR} namenode\"\n\n\n"
  },
  {
    "path": "dev_tools/docker/images/base/Dockerfile",
    "content": "#----------------------------------------------------\n#\n# A basic java machine with java, basic services and iv6 disabled\n#----------------------------------------------------\nFROM debian:latest\n\n#----------------------------------------------------\n# Install java and basic services\nRUN echo \"deb http://ppa.launchpad.net/webupd8team/java/ubuntu trusty main\" | tee /etc/apt/sources.list.d/webupd8team-java.list && \\\n    echo \"deb-src http://ppa.launchpad.net/webupd8team/java/ubuntu trusty main\" | tee -a /etc/apt/sources.list.d/webupd8team-java.list && \\\n    apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys EEA14886 && \\\n    apt-get update && \\\n    echo yes | apt-get install -y --force-yes oracle-java8-installer && \\\n    apt-get install -y \\\n    apt-utils \\\n    openssh-server \\\n    python \\\n    python-pip \\\n    wget\n\nENV JAVA_HOME /usr/lib/jvm/java-8-oracle\nRUN echo \"export JAVA_HOME=${JAVA_HOME}\" >> /etc/profile.d/java.sh\n\n#----------------------------------------------------\n# disable ipv6\nRUN echo \"net.ipv6.conf.all.disable_ipv6=1\"     >> /etc/sysctl.conf && \\\n    echo \"net.ipv6.conf.default.disable_ipv6=1\" >> /etc/sysctl.conf && \\\n    echo \"net.ipv6.conf.lo.disable_ipv6=1\"      >>  /etc/sysctl.conf\n\n#----------------------------------------------------\n# add default unprivileged user (Alfred E. Neuman, \"What? Me worry?\")\nENV UNPRIV_USER aen\nRUN useradd -m ${UNPRIV_USER} -s /bin/bash && \\\n    echo \"${UNPRIV_USER}:hadoop\" | chpasswd\n    \nRUN mkdir -p /root/.ssh && \\\n    ssh-keygen -t dsa -P '' -f /root/.ssh/id_dsa && \\\n    cat /root/.ssh/id_dsa.pub >> /root/.ssh/authorized_keys"
  },
  {
    "path": "dev_tools/docker/images/client/Dockerfile",
    "content": "#----------------------------------------------------\nFROM crs4_pydoop/base:latest\n\n#----------------------------------\n# Install useful stuff\n# NO update. We should be in line with base\nRUN apt-get install -y git build-essential python-dev\n\n#----------------------------------\n# Enable sshd\nRUN mkdir /var/run/sshd\nRUN echo 'root:hadoop' | chpasswd\nRUN sed -i 's/PermitRootLogin without-password/PermitRootLogin yes/' /etc/ssh/sshd_config\n\n# SSH login fix. Otherwise user is kicked off after login\nRUN sed 's@session\\s*required\\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd\n\nENV NOTVISIBLE \"in users profile\"\nRUN echo \"export VISIBLE=now\" >> /etc/profile\n\nEXPOSE 22\n\n#-----------------------------------\nCMD [\"/usr/sbin/sshd\", \"-D\"]"
  },
  {
    "path": "dev_tools/docker/scripts/build_base_images.sh",
    "content": "#!/bin/bash\n\ncurrent_path=$(cd $(dirname ${BASH_SOURCE}); pwd; cd - >/dev/null)\nimages_path=\"${current_path}/../images\"\n\necho \"Building crs4_pydoop/base image (path: ${images_path}/base)\"\ndocker build -t crs4_pydoop/base\t${images_path}/base\n\necho \"Building crs4_pydoop/client image (path: ${images_path}/client)\"\ndocker build -t crs4_pydoop/client ${images_path}/client\n"
  },
  {
    "path": "dev_tools/docker/scripts/build_cluster_images.sh",
    "content": "#!/bin/bash\n\nTAG=${1}\n\nCL_DIR=${TAG}/images\n\nfor d in ${CL_DIR}/*\ndo\n    if [ -d ${d} -a -e ${d}/Dockerfile ]; then\n        base=${d##${CL_DIR}/}\n        docker build -t crs4_pydoop/${TAG}_${base} ${d}\n    fi\ndone\n         \nexit\n\n# docker build -t crs4_pydoop/${TAG}_base     ${CL_DIR}/base\n# docker build -t crs4_pydoop/${TAG}_zookeeper ${CL_DIR}/zookeeper\n# docker build -t crs4_pydoop/${TAG}_namenode ${CL_DIR}/namenode\n# docker build -t crs4_pydoop/${TAG}_datanode ${CL_DIR}/datanode\n# docker build -t crs4_pydoop/${TAG}_resourcemanager ${CL_DIR}/resourcemanager\n# docker build -t crs4_pydoop/${TAG}_nodemanager ${CL_DIR}/nodemanager\n# docker build -t crs4_pydoop/${TAG}_historyserver ${CL_DIR}/historyserver\n# docker build -t crs4_pydoop/${TAG}_bootstrap     ${CL_DIR}/bootstrap\n\n\n"
  },
  {
    "path": "dev_tools/docker/scripts/share_etc_hosts.py",
    "content": "import os\nimport sys\nimport ssl\nimport logging\nfrom docker import tls\nfrom docker import Client\n\n\nlogging.basicConfig()\n\nlogger = logging.getLogger('share_etc_hosts')\nlogger.setLevel(logging.DEBUG)\n\n\nclass App(object):\n    def __init__(self, compose_group_name):\n        self.client = docker_client()\n        self.containers = self._get_containers(compose_group_name)\n\n    def _get_containers(self, compose_group_name):\n        head = '/%s_' % compose_group_name\n        cs = [c for c in self.client.containers()\n              if c['Names'][0].startswith(head)]\n        return cs\n\n    def _get_hosts(self):\n        hosts = {}\n        for c in self.containers:\n            d = self.client.inspect_container(c['Id'])\n            hosts[c['Id']] = (d['NetworkSettings']['IPAddress'],\n                              d['Config']['Hostname'])\n        return hosts\n\n    def share_etc_hosts(self):\n        hosts = self._get_hosts()\n        host_table = str('\\n'.join(['%s\\t%s' % h for h in hosts.itervalues()]))\n        logger.debug('Host table is:\\n%s', host_table)\n        cmd = '/bin/bash -c \"echo -e %r >> /etc/hosts\"' % host_table\n        for k in hosts:\n            logger.debug('Updating %s', k)\n            print(self.client.execute(k, cmd))\n\n\ndef docker_client():\n    \"\"\"\n    Returns a docker-py client configured using environment variables\n    according to the same logic as the official Docker client.\n    \"\"\"\n    cert_path = os.environ.get('DOCKER_CERT_PATH', '')\n    if cert_path == '':\n        cert_path = os.path.join(os.environ.get('HOME', ''), '.docker')\n\n    base_url = os.environ.get('DOCKER_HOST')\n    tls_config = None\n\n    if os.environ.get('DOCKER_TLS_VERIFY', '') != '':\n        parts = base_url.split('://', 1)\n        base_url = '%s://%s' % ('https', parts[1])\n\n        client_cert = (os.path.join(cert_path, 'cert.pem'),\n                       os.path.join(cert_path, 'key.pem'))\n        ca_cert = os.path.join(cert_path, 'ca.pem')\n\n        tls_config = tls.TLSConfig(\n            ssl_version=ssl.PROTOCOL_TLSv1,\n            verify=True,\n            assert_hostname=False,\n            client_cert=client_cert,\n            ca_cert=ca_cert,\n        )\n\n    timeout = int(os.environ.get('DOCKER_CLIENT_TIMEOUT', 60))\n    return Client(\n        base_url=base_url, tls=tls_config, version='1.15', timeout=timeout\n    )\n\n\ndef main(argv):\n    tag = argv[1].replace('.', '').replace('_', '')\n    logger.info('Tag is:%s', tag)\n    app = App(tag)\n    app.share_etc_hosts()\n\n\nmain(sys.argv)\n"
  },
  {
    "path": "dev_tools/docker/scripts/start_client.sh",
    "content": "#!/bin/bash\n\n#-------------------------------------------\n#\n# Insert a new client in a running cluster\n#\n# Usage:\n#        $ cd client_side_tests/<client>\n#        $ ../../scripts/start_client.sh <PORT>\n#\nreal_path=`readlink -f ${BASH_SOURCE[0]}`\nscript_dir=`dirname ${real_path}`\nshare_hosts_bin=\"python ${script_dir}/share_etc_hosts.py\"\n\nclient_dir=`basename $PWD`\nport=${1:-3333}\n\nif [[ -z \"${DOCKER_HOST_IP}\" ]]\nthen \n\techo \"No explicit DOCKER_HOST_IP in your env: localhost is assumed\"\n\tDOCKER_HOST_IP=localhost\nfi\n\n# We assume that there is only one service with that name\ncluster_tag=$(docker ps | grep resourcemanager | \\\n                     awk '{print $NF}'| sed -e 's/_.*$//')\nclient_name=${cluster_tag}_client_${client_dir}\ndocker run -d --name ${client_name} -p ${port}:22 crs4_pydoop/client:latest\n${share_hosts_bin} ${cluster_tag}\n\nrm_id=$(docker ps | grep resourcemanager | awk '{print $1}')\nclient_id=$(docker ps | grep ${client_name} | awk '{print $1}')\n\n(cat ${HOME}/.ssh/id_dsa.pub | docker exec -i ${client_id} tee -a /root/.ssh/authorized_keys) > /dev/null\n\nif [ -x ./initialize.sh ]; then\n    ./initialize.sh ${port} ${client_id} ${rm_id}  ${DOCKER_HOST_IP}\nfi\n\n\n"
  },
  {
    "path": "dev_tools/docker/scripts/start_cluster.sh",
    "content": "#!/bin/bash\n\ncluster_name=$1\nscript_dir=$(cd $(dirname ${BASH_SOURCE}); pwd; cd - >/dev/null)\nshare_hosts_bin=\"python ${script_dir}/share_etc_hosts.py\"\ncluster_path=\"${script_dir}/../clusters/${cluster_name}\"\n\ntag=`echo ${cluster_name} | tr -d '._/'`\n\ncd ${cluster_path}\n\ndocker-compose stop\ndocker-compose rm\n\nfor x in logs local\ndo\n    if [ -d ${x} ]; then\n        backup=${x}.backup.$$\n        mv ${x} ${backup}\n        echo \"Moved ${x} to ${backup}\"\n    fi\n    mkdir ${x}\n    chmod 1777 ${x}\ndone\n\ndocker-compose up -d\n${share_hosts_bin} ${tag}\n"
  },
  {
    "path": "dev_tools/docker_build",
    "content": "#!/usr/bin/env bash\n\nset -euo pipefail\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n\npushd \"${this_dir}/..\"\ndocker build --build-arg HADOOP_MAJOR_VERSION=2 -t crs4/pydoop-hadoop2 .\ndocker build -t crs4/pydoop .\ndocker build -t crs4/pydoop-docs -f Dockerfile.docs .\npopd\n"
  },
  {
    "path": "dev_tools/dump_app_params",
    "content": "#!/usr/bin/env python\n\n\"\"\"\nDump app options in rst table format.\n\"\"\"\n\nimport sys\nimport argparse\n\nimport pydoop.app.main\n\n\nAUTOGEN_NOTICE = \"\"\"\\\n..\n  Auto-generated by %(prog)s. DO NOT EDIT!\n  To update, run:\n    %(prog)s --app %(app)s -o %(out_fn)s\n\n\"\"\"\n\n\ndef set_option_attrs(actions):\n    for a in actions:\n        opts = a.option_strings\n        assert len(opts) > 0\n        try:\n            a.short_opt, a.long_opt = opts\n        except ValueError:\n            o = opts[0]\n            assert o.startswith('-')\n            if o.startswith('--'):\n                a.short_opt, a.long_opt = None, o\n            else:\n                a.short_opt, a.long_opt = o, None\n\n\ndef get_col_widths(actions):\n    lengths = {}\n    for a in actions:\n        for n in 'short_opt', 'long_opt', 'help':\n            attr = getattr(a, n)\n            lengths.setdefault(n, []).append(0 if attr is None else len(attr))\n    widths = dict((k, max(v)) for k, v in lengths.items())\n    # add 4 for ``backticks``\n    for n in 'short_opt', 'long_opt':\n        widths[n] += 4\n    return widths\n\n\nclass Formatter(object):\n\n    NAMES = 'short_opt', 'long_opt', 'help'\n\n    def __init__(self, actions):\n        self.col_widths = get_col_widths(actions)\n        self.actions = actions\n\n    def format_line(self, fields):\n        ln = [f.ljust(self.col_widths[n]) for f, n in zip(fields, self.NAMES)]\n        return '| %s |' % ' | '.join(ln)\n\n    def format_action(self, action):\n        ln = []\n        for n in 'short_opt', 'long_opt':\n            opt = getattr(action, n)\n            ln.append('``%s``' % opt if opt else '')\n        ln.append(getattr(action, 'help'))\n        return self.format_line(ln)\n\n    def hline(self, filler='-'):\n        ln = []\n        for n in self.NAMES:\n            ln.append(filler * self.col_widths[n])\n        return '+{0}{1}{0}+'.format(\n            filler, '{0}+{0}'.format(filler).join(ln)\n        )\n\n    def header_lines(self):\n        lines = [self.hline()]\n        lines.append(self.format_line(['Short', 'Long', 'Meaning']))\n        lines.append(self.hline(filler='='))\n        return lines\n\n    def dump_table(self, outf, exclude_h=True):\n        for ln in self.header_lines():\n            outf.write(ln + '\\n')\n        for a in self.actions:\n            if exclude_h and a.short_opt == '-h':\n                continue\n            outf.write(self.format_action(a) + '\\n')\n            outf.write(self.hline() + '\\n')\n\n\ndef make_parser():\n    parser = argparse.ArgumentParser(description='dump pydoop app help')\n    parser.add_argument('-o', '--out-fn', metavar='FILE', help='output file')\n    parser.add_argument('--app', metavar='PYDOOP_APP_NAME', default='script')\n    return parser\n\n\ndef main():\n    parser = make_parser()\n    args = parser.parse_args()\n    outf = None\n    pydoop_parser = pydoop.app.main.make_parser()\n    subp = pydoop_parser._pydoop_docs_helper[args.app]\n    act_map = dict((_.title, _._group_actions) for _ in subp._action_groups)\n    actions = act_map['optional arguments']\n    set_option_attrs(actions)\n    fmt = Formatter(actions)\n    try:\n        outf = open(args.out_fn, 'w') if args.out_fn else sys.stdout\n        outf.write(AUTOGEN_NOTICE % {\n            'prog': sys.argv[0],\n            'app': args.app,\n            'out_fn': args.out_fn\n        })\n        fmt.dump_table(outf)\n    finally:\n        if outf:\n            outf.close()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "dev_tools/edit_conf",
    "content": "#!/usr/bin/env python\n\n\"\"\"\\\nA utility to edit hadoop configuration files.\n\nUsage::\n\n  $ edit_conf conf/yarn-site.xml tmp.xml \\\n       yarn.nodemanager.resource.cpu-vcores 2 \\\n       yarn.nodemanager.resource.memory-mb 1024\n\"\"\"\n\nfrom lxml import etree as ET\nimport sys\n\n\ndef doc_to_dict(doc):\n    props = {}\n    root = doc.getroot()\n    for p in root.findall('property'):\n        props[p.find('name').text] = p.find('value').text\n    return props\n\n\ndef dict_to_doc(props):\n    doc = ET.ElementTree(ET.fromstring('<configuration/>'))\n    root = doc.getroot()\n    pi = ET.ProcessingInstruction(\n        'xml-stylesheet',\n        'type=\"text/xsl\" href=\"configuration.xsl\"')\n    root.addprevious(pi)\n    for k in props:\n        p = ET.SubElement(root, \"property\")\n        name = ET.SubElement(p, \"name\")\n        val = ET.SubElement(p, \"value\")\n        name.text, val.text = k, props[k]\n    return doc\n\n\ndef main(argv):\n    assert len(argv) >= 2 and not (len(argv) & 0x01)\n    conf_input = argv[0]\n    conf_output = argv[1]\n    doc = ET.parse(conf_input)\n    props = doc_to_dict(doc)\n    ai = iter(argv[2:])\n    for k, v in zip(ai, ai):\n        props[k] = v\n    ndoc = dict_to_doc(props)\n    with open(conf_output, 'wb') as f:\n        f.write(ET.tostring(\n            ndoc,\n            encoding=\"utf-8\",\n            xml_declaration=True,\n            pretty_print=True\n        ))\n\n\nif __name__ == \"__main__\":\n    main(sys.argv[1:])\n"
  },
  {
    "path": "dev_tools/git_export",
    "content": "#!/usr/bin/env python\n\n\"\"\"\nExport git working copy including uncommitted changes\n\"\"\"\n\nimport sys\nimport os\nimport argparse\nimport shutil\nimport subprocess as sp\n\n\nTHIS_DIR = os.path.dirname(os.path.abspath(__file__))\nPARENT_DIR = os.path.dirname(THIS_DIR)\nDEFAULT_EXPORT_DIR = os.path.join(PARENT_DIR, \"git_export\")\n\n\ndef get_sources():\n    cmd = \"git ls-files --full-name %s\" % PARENT_DIR\n    return sp.check_output(cmd, shell=True).splitlines()\n\n\ndef export(sources, export_root):\n    if os.path.isdir(export_root):\n        shutil.rmtree(export_root)\n    os.makedirs(export_root)\n    for fn in sources:\n        d, bn = os.path.split(fn)\n        if bn.startswith(\".git\"):\n            print \"skipping\", fn\n            continue\n        d = os.path.join(export_root, d)\n        if not os.path.isdir(d):\n            os.makedirs(d)\n        in_path = os.path.join(PARENT_DIR, fn)\n        if os.path.islink(in_path):\n            in_path = os.path.realpath(in_path)\n            out_path = os.path.join(d, bn)\n            if os.path.isdir(in_path):\n                shutil.copytree(in_path, out_path, symlinks=True)\n            else:\n                shutil.copy(in_path, out_path)\n        else:\n            shutil.copy(in_path, d)\n\n\ndef make_parser():\n    parser = argparse.ArgumentParser(description=__doc__)\n    parser.add_argument(\"-o\", \"--output-dir\", metavar=\"DIR\",\n                        help=\"output directory\", default=DEFAULT_EXPORT_DIR)\n    return parser\n\n\ndef main(argv):\n    parser = make_parser()\n    args = parser.parse_args(argv[1:])\n    sources = get_sources()\n    export(sources, args.output_dir)\n\n\nif __name__ == \"__main__\":\n    main(sys.argv)\n"
  },
  {
    "path": "dev_tools/import_src",
    "content": "#!/usr/bin/env python\n\n\"\"\"\nImport Hadoop pipes/utils source code.\n\nNOTE: starting from cdh4.3, there is a single Hadoop tarball with both\nmr2 and mr1 code. The latter is located in:\n${HADOOP_HOME}/src/hadoop-mapreduce1-project/. To fetch the code for\nmrv1, run import_src ${HADOOP_HOME}/src/hadoop-mapreduce1-project; to\nfetch the code for mrv2, run import_src ${HADOOP_HOME} --skip-dir\nhadoop-mapreduce1-project.\n\"\"\"\n\nimport sys, os, argparse, warnings, shutil\n\n\nWANTED = {  # basename: relative location\n  \"StringUtils.cc\": \"utils/impl\",\n  \"SerialUtils.cc\": \"utils/impl\",\n  \"StringUtils.hh\": \"utils/api/hadoop\",\n  \"SerialUtils.hh\": \"utils/api/hadoop\",\n  \"HadoopPipes.cc\": \"pipes/impl\",\n  \"Pipes.hh\": \"pipes/api/hadoop\",\n  \"TemplateFactory.hh\": \"pipes/api/hadoop\",\n  #--- libhdfs, all versions ---\n  \"hdfs.h\": \"libhdfs\",\n  \"hdfs.c\": \"libhdfs\",\n  # --- libhdfs, old versions ---\n  \"hdfsJniHelper.h\": \"libhdfs\",\n  \"hdfsJniHelper.c\": \"libhdfs\",\n  # --- libhdfs, recent versions ---\n  \"jni_helper.h\": \"libhdfs\",\n  \"jni_helper.c\": \"libhdfs\",\n  \"native_mini_dfs.h\": \"libhdfs\",\n  \"native_mini_dfs.c\": \"libhdfs\",\n  \"exception.h\": \"libhdfs\",\n  \"exception.c\": \"libhdfs\",\n  # --- java pipes ---\n  \"Application.java\": \"org/apache/hadoop/mapred/pipes\",\n  \"BinaryProtocol.java\": \"org/apache/hadoop/mapred/pipes\",\n  \"DownwardProtocol.java\": \"org/apache/hadoop/mapred/pipes\",\n  \"OutputHandler.java\": \"org/apache/hadoop/mapred/pipes\",\n  \"PipesMapRunner.java\": \"org/apache/hadoop/mapred/pipes\",\n  \"PipesNonJavaInputFormat.java\": \"org/apache/hadoop/mapred/pipes\",\n  \"PipesPartitioner.java\": \"org/apache/hadoop/mapred/pipes\",\n  \"PipesReducer.java\": \"org/apache/hadoop/mapred/pipes\",\n  \"Submitter.java\": \"org/apache/hadoop/mapred/pipes\",\n  \"UpwardProtocol.java\": \"org/apache/hadoop/mapred/pipes\",\n  \"LocalJobRunner.java\": \"org/apache/hadoop/mapred\",\n  }\n\n\ndef get_sources(root_dir, skip=None):\n  sources = {}\n  for d, _, basenames in os.walk(root_dir):\n    if skip in d.split(os.sep):\n      continue\n    for bn in basenames:\n      if bn in WANTED:\n        if d.endswith(WANTED[bn]):\n          sources[bn] = os.path.join(d, bn)\n  missing = set(WANTED) - set(sources)\n  if missing:\n    warnings.warn(\"not found: %r\" % (sorted(missing),))\n  return sources\n\n\ndef make_parser():\n  parser = argparse.ArgumentParser(description=__doc__)\n  parser.add_argument('hadoop_home', metavar=\"HADOOP_HOME\")\n  parser.add_argument(\"-o\", \"--output-dir\", metavar=\"DIR\",\n                      help=\"output directory\")\n  parser.add_argument(\"-s\", \"--skip-dir\", metavar=\"DIR\",\n                      help=\"skip directories with this basename\")\n  return parser\n\n\ndef main(argv):\n  parser = make_parser()\n  args = parser.parse_args(argv[1:])\n  if not args.output_dir:\n    this_dir = os.path.dirname(os.path.abspath(__file__))\n    parent_dir = os.path.dirname(this_dir)\n    args.output_dir = os.path.join(\n      parent_dir, \"src\", os.path.basename(args.hadoop_home.rstrip(\"/\"))\n      )\n  if args.skip_dir:\n    args.skip_dir = os.path.basename(args.skip_dir)\n  sources = get_sources(args.hadoop_home, skip=args.skip_dir)\n  for bn, p in sources.iteritems():\n    out_dir = os.path.join(args.output_dir, WANTED[bn])\n    try:\n      os.makedirs(out_dir)\n    except OSError:\n      pass\n    shutil.copy(p, out_dir)\n    print \"%s -> %s\" % (p, out_dir) \n\n\nif __name__ == \"__main__\":\n  main(sys.argv)\n"
  },
  {
    "path": "dev_tools/mapred_pipes",
    "content": "#!/usr/bin/env bash\n\n# Set up the layout needed to build the \"mapred\" version of pipes\n\nset -euo pipefail\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n\nif [ $# -lt 1 ]; then\n    echo \"Usage: $0 HADOOP_SRC\"\n    exit 1\nfi\nif [ ! -d \"${1}\"/hadoop-mapreduce-project ]; then\n    echo \"ERROR: \\\"$1\\\" does not look like a Hadoop source dir\"\n    exit 1\nfi\nhadoop_src=${1}\n\npushd \"${this_dir}/..\"\nmapred_pipes_dir=src/it/crs4/pydoop/mapred/pipes\nrm -rf \"${mapred_pipes_dir}\"\nmkdir -p \"${mapred_pipes_dir}\"\ncp -rf \"${hadoop_src}\"/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/* \"${mapred_pipes_dir}\"/\nsed -i 's/package org\\.apache\\.hadoop/package it\\.crs4\\.pydoop/g' \"${mapred_pipes_dir}\"/*\n\n# not exactly future-proof\nsed_cmd=\"s|self\\.java_files = |self\\.java_files = glob.glob(\\\"${mapred_pipes_dir}/*.java\\\") + |\"\nsed -i \"${sed_cmd}\" setup.py\npopd\n"
  },
  {
    "path": "dev_tools/unpack_debian",
    "content": "#!/usr/bin/env python\n\n\"\"\"\nUnpack debian packages -- a quick shortcut for debug purposes.\n\"\"\"\n\nimport sys, os, argparse, shutil, subprocess as sp\n\n\nTHIS_DIR = os.path.dirname(os.path.abspath(__file__))\nPARENT_DIR = os.path.dirname(THIS_DIR)\nDEFAULT_FROM_DIR = os.path.join(PARENT_DIR, \"sandbox\")\nDEFAULT_TO_DIR = os.path.join(PARENT_DIR, \"temp\")\n\n\ndef get_pkg_map(from_dir):\n  pkg_map = {}\n  for fn in os.listdir(from_dir):\n    if fn.endswith(\".deb\"):\n      tag = fn.split(\"_\", 1)[0]\n      pkg_map[tag] = os.path.abspath(os.path.join(from_dir, fn))\n  return pkg_map\n\n\ndef unpack(pkg_map, to_dir):\n  if os.path.isdir(to_dir):\n    shutil.rmtree(to_dir)\n  os.makedirs(to_dir)\n  for tag, fn in pkg_map.iteritems():\n    d = os.path.join(to_dir, tag)\n    os.makedirs(d)\n    old_wd = os.getcwd()\n    os.chdir(d)\n    print \"unpacking %s to %s\" % (fn, d)\n    sp.check_call(\"ar x %s\" % fn, shell=True)\n    sp.check_call(\"tar xf data.tar.gz\", shell=True)\n    sp.check_call(\"tar xf control.tar.gz\", shell=True)\n    os.chdir(old_wd)\n\n\ndef make_parser():\n  parser = argparse.ArgumentParser(description=__doc__)\n  parser.add_argument(\"-i\", \"--input-dir\", metavar=\"DIR\",\n                      help=\"input directory\", default=DEFAULT_FROM_DIR)\n  parser.add_argument(\"-o\", \"--output-dir\", metavar=\"DIR\",\n                      help=\"output directory\", default=DEFAULT_TO_DIR)\n  return parser\n\n\ndef main(argv):\n  parser = make_parser()\n  args = parser.parse_args(argv[1:])\n  pkg_map = get_pkg_map(args.input_dir)\n  unpack(pkg_map, args.output_dir)\n\n\nif __name__ == \"__main__\":\n  main(sys.argv)\n"
  },
  {
    "path": "dev_tools/update_docs",
    "content": "#!/bin/bash\n\nset -eu\n\ndie() {\n    echo \"$1\" 1>&2\n    exit 1\n}\n\nDOCS_PREFIX=\"docs/_build/html\"\nREPO=\"https://github.com/crs4/pydoop.git\"\n\n[ -f \"setup.py\" ] || die \"ERROR: run from the main repo dir\"\n\ngit subtree pull --prefix=\"${DOCS_PREFIX}\" \"${REPO}\" gh-pages --squash\nmake docs\ngit add \"${DOCS_PREFIX}\"\ngit commit -a -m \"updated gh-pages\"\ngit subtree push --prefix=\"${DOCS_PREFIX}\" \"${REPO}\" gh-pages --squash\n"
  },
  {
    "path": "docs/Makefile",
    "content": "# Makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line.\nSPHINXOPTS    =\nSPHINXBUILD   = sphinx-build\nPAPER         =\nBUILDDIR      = _build\n\n# Internal variables.\nPAPEROPT_a4     = -D latex_paper_size=a4\nPAPEROPT_letter = -D latex_paper_size=letter\nALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .\n\n.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest\n\nhelp:\n\t@echo \"Please use \\`make <target>' where <target> is one of\"\n\t@echo \"  html      to make standalone HTML files\"\n\t@echo \"  dirhtml   to make HTML files named index.html in directories\"\n\t@echo \"  pickle    to make pickle files\"\n\t@echo \"  json      to make JSON files\"\n\t@echo \"  htmlhelp  to make HTML files and a HTML help project\"\n\t@echo \"  qthelp    to make HTML files and a qthelp project\"\n\t@echo \"  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter\"\n\t@echo \"  changes   to make an overview of all changed/added/deprecated items\"\n\t@echo \"  linkcheck to check all external links for integrity\"\n\t@echo \"  doctest   to run all doctests embedded in the documentation (if enabled)\"\n\nclean:\n\t-rm -rf $(BUILDDIR)/*\n\nhtml:\n\t$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html\n\t@echo\n\t@echo \"Build finished. The HTML pages are in $(BUILDDIR)/html.\"\n\ndirhtml:\n\t$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml\n\t@echo\n\t@echo \"Build finished. The HTML pages are in $(BUILDDIR)/dirhtml.\"\n\npickle:\n\t$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle\n\t@echo\n\t@echo \"Build finished; now you can process the pickle files.\"\n\njson:\n\t$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json\n\t@echo\n\t@echo \"Build finished; now you can process the JSON files.\"\n\nhtmlhelp:\n\t$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp\n\t@echo\n\t@echo \"Build finished; now you can run HTML Help Workshop with the\" \\\n\t      \".hhp project file in $(BUILDDIR)/htmlhelp.\"\n\nqthelp:\n\t$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp\n\t@echo\n\t@echo \"Build finished; now you can run \"qcollectiongenerator\" with the\" \\\n\t      \".qhcp project file in $(BUILDDIR)/qthelp, like this:\"\n\t@echo \"# qcollectiongenerator $(BUILDDIR)/qthelp/Pydoop.qhcp\"\n\t@echo \"To view the help file:\"\n\t@echo \"# assistant -collectionFile $(BUILDDIR)/qthelp/Pydoop.qhc\"\n\nlatex:\n\t$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex\n\t@echo\n\t@echo \"Build finished; the LaTeX files are in $(BUILDDIR)/latex.\"\n\t@echo \"Run \\`make all-pdf' or \\`make all-ps' in that directory to\" \\\n\t      \"run these through (pdf)latex.\"\n\nchanges:\n\t$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes\n\t@echo\n\t@echo \"The overview file is in $(BUILDDIR)/changes.\"\n\nlinkcheck:\n\t$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck\n\t@echo\n\t@echo \"Link check complete; look for any errors in the above output \" \\\n\t      \"or in $(BUILDDIR)/linkcheck/output.txt.\"\n\ndoctest:\n\t$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest\n\t@echo \"Testing of doctests in the sources finished, look at the \" \\\n\t      \"results in $(BUILDDIR)/doctest/output.txt.\"\n"
  },
  {
    "path": "docs/_build/.gitignore",
    "content": "*\n!.gitignore\n!html\n"
  },
  {
    "path": "docs/_templates/layout.html",
    "content": "{% extends \"!layout.html\" %}\n\n\n{%- macro mysidebar() %}\n      {%- if not embedded %}{% if not theme_nosidebar|tobool %}\n      <div class=\"sphinxsidebar\">\n        <div class=\"sphinxsidebarwrapper\">\n          {%- block sidebarlogo %}\n          {%- if logo %}\n            <p class=\"logo\"><a href=\"{{ pathto(master_doc) }}\">\n              <img class=\"logo\" src=\"{{ pathto('_static/' + logo, 1) }}\" alt=\"Logo\"/>\n            </a></p>\n          {%- endif %}\n          {%- endblock %}\n          {%- block sidebartoc %}\n          {%- if display_toc %}\n            <h3><a href=\"{{ pathto(master_doc) }}\">{{ _('Table Of Contents') }}</a></h3>\n            {{ toc }}\n          {%- endif %}\n          {%- endblock %}\n          {%- block sidebarrel %}\n          {%- if prev %}\n            <h4>{{ _('Previous topic') }}</h4>\n            <p class=\"topless\"><a href=\"{{ prev.link|e }}\"\n                                  title=\"{{ _('previous chapter') }}\">{{ prev.title }}</a></p>\n          {%- endif %}\n          {%- if next %}\n            <h4>{{ _('Next topic') }}</h4>\n            <p class=\"topless\"><a href=\"{{ next.link|e }}\"\n                                  title=\"{{ _('next chapter') }}\">{{ next.title }}</a></p>\n          {%- endif %}\n          {%- endblock %}\n          {%- block sidebarsourcelink %}\n          {%- endblock %}\n\n\t\t\t\t\t<h4>Get Pydoop</h4>\n\t\t\t\t\t<ul>\n\t\t\t\t\t\t<li> <a href=\"https://pypi.python.org/pypi/pydoop\">Download page</a> </li>\n\t\t\t\t\t\t<li> <a href=\"{{ pathto('installation') }}\"> Installation Instructions </a> </li>\n\t\t\t\t\t</ul>\n\n\t\t\t\t\t<h4>Contributors</h4>\n\t\t\t\t\t<p class=\"topless\">\n\t\t\t\t\tPydoop is developed by:\n\t\t\t\t\t<a href=\"http://www.crs4.it\">\n\t\t\t\t\t\t<img src=\"{{ pathto(\"_static/crs4.png\", 1) }}\" alt=\"CRS4\" width=\"200\" height=\"60\" />\n\t\t\t\t\t</a>\n\t\t\t\t\t</p>\n          {%- if customsidebar %}\n          {% include customsidebar %}\n          {%- endif %}\n          {%- block sidebarsearch %}\n          {%- if pagename != \"search\" %}\n          <div id=\"searchbox\" style=\"display: none\">\n            <h3>{{ _('Quick search') }}</h3>\n              <form class=\"search\" action=\"{{ pathto('search') }}\" method=\"get\">\n                <input type=\"text\" name=\"q\" size=\"18\" />\n                <input type=\"submit\" value=\"{{ _('Go') }}\" />\n                <input type=\"hidden\" name=\"check_keywords\" value=\"yes\" />\n                <input type=\"hidden\" name=\"area\" value=\"default\" />\n              </form>\n              <p class=\"searchtip\" style=\"font-size: 90%\">\n              {{ _('Enter search terms or a module, class or function name.') }}\n              </p>\n          </div>\n          <script type=\"text/javascript\">$('#searchbox').show(0);</script>\n          {%- endif %}\n          {%- endblock %}\n        </div>\n      </div>\n      {%- endif %}{% endif %}\n{%- endmacro %}\n\n\n{% block rootrellink %}\n\t<li><a href=\"{{ pathto('index') }}\">Home</a>|&nbsp;</li>\n\t<li><a href=\"{{ pathto('installation') }}\">Installation</a>|&nbsp;</li>\n\t<li><a href=\"https://github.com/crs4/pydoop/issues\">Support</a>|&nbsp;</li>\n\t<li><a href=\"https://github.com/crs4/pydoop\">Git Repo</a>|&nbsp;</li>\n\t<li><a href=\"https://crs4.github.io/pydoop/_pydoop1\">Pydoop 1</a></li>\n{% endblock %}\n\n{# put the sidebar before the body #}\n{% block sidebar1 %}\n{{ mysidebar() }}\n{% endblock %}\n{% block sidebar2 %}{% endblock %}\n"
  },
  {
    "path": "docs/api_docs/hadut.rst",
    "content": ".. _hadut:\n\n:mod:`pydoop.hadut` --- Hadoop shell interaction\n================================================\n\n.. automodule:: pydoop.hadut\n   :members:\n"
  },
  {
    "path": "docs/api_docs/hdfs_api.rst",
    "content": ".. _hdfs-api:\n\n:mod:`pydoop.hdfs` --- HDFS API\n===============================\n\n.. automodule:: pydoop.hdfs\n   :members:\n\n.. automodule:: pydoop.hdfs.path\n   :members:\n\n.. automodule:: pydoop.hdfs.fs\n   :members:\n\n.. automodule:: pydoop.hdfs.file\n   :members: FileIO\n\n.. autoclass:: pydoop.hdfs.file.local_file\n"
  },
  {
    "path": "docs/api_docs/index.rst",
    "content": ".. _api-docs:\n\nAPI Docs\n========\n\n.. toctree::\n\n   mr_api\n   hdfs_api\n   hadut\n"
  },
  {
    "path": "docs/api_docs/mr_api.rst",
    "content": ".. _mr_api:\n\n:mod:`pydoop.mapreduce.api` --- MapReduce API\n=============================================\n\n.. automodule:: pydoop.mapreduce.api\n   :members:\n\n.. autofunction:: pydoop.mapreduce.pipes.run_task\n"
  },
  {
    "path": "docs/conf.py",
    "content": "# -*- coding: utf-8 -*-\n#\n# Pydoop documentation build configuration file, created by\n# sphinx-quickstart on Sun Jun 20 17:06:55 2010.\n#\n# This file is execfile()d with the current directory set to its\n# containing dir.\n#\n# Note that not all possible configuration values are present in this\n# autogenerated file.\n#\n# All configuration values have a default; values that are commented out\n# serve to show the default.\n\nimport datetime\n\nFIRST_RELEASE_YEAR = 2009\nCURRENT_YEAR = datetime.datetime.now().year\n\n# No need to hack the path, we install before building docs\n# sys.path[1:1] = [ os.path.abspath('../pydoop') ]\n\n# -- General configuration ----------------------------------------------------\n\n# Add any Sphinx extension module names here, as strings. They can be\n# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom\n# ones.\nextensions = [\n    'sphinx.ext.autodoc',\n    'sphinx.ext.doctest',\n    'sphinx.ext.imgmath',\n    'sphinx.ext.ifconfig',\n    'sphinx.ext.intersphinx'\n]\n\n# Add any paths that contain templates here, relative to this directory.\ntemplates_path = ['_templates']\n\n# The suffix of source filenames.\nsource_suffix = '.rst'\n\n# The encoding of source files.\n# source_encoding = 'utf-8'\n\n# The master toctree document.\nmaster_doc = 'index'\n\n# General information about the project.\nproject = u'Pydoop'\ncopyright = u'%d-%d, CRS4' % (FIRST_RELEASE_YEAR, CURRENT_YEAR)\n\n# The version info for the project you're documenting, acts as replacement for\n# |version| and |release|, also used in various other places throughout the\n# built documents.\n#\n\n# The short X.Y version.\nwith open(\"../VERSION\") as f:\n    version_string = f.read().strip()\nversion = \".\".join(version_string.split(\".\", 2)[:2])\n# The full version, including alpha/beta/rc tags.\nrelease = version_string\n\n# The language for content autogenerated by Sphinx. Refer to documentation\n# for a list of supported languages.\n# language = None\n\n# There are two options for replacing |today|: either, you set today to some\n# non-false value, then it is used:\n# today = ''\n# Else, today_fmt is used as the format for a strftime call.\n# today_fmt = '%B %d, %Y'\n\n# Avoid doc-not-included-in-toctree warning\nexclude_patterns = [\n    'pydoop_script_options.rst',  # included with ..include::\n    'pydoop_submit_options.rst',  # included with ..include::\n]\n\n# List of directories, relative to source directory, that shouldn't be searched\n# for source files.\nexclude_trees = ['_build']\n\n# The reST default role (used for this markup: `text`) to use for all\n# documents.\n# default_role = None\n\n# If true, '()' will be appended to :func: etc. cross-reference text.\n# add_function_parentheses = True\n\n# If true, the current module name will be prepended to all description\n# unit titles (such as .. function::).\n# add_module_names = True\n\n# If true, sectionauthor and moduleauthor directives will be shown in the\n# output. They are ignored by default.\n# show_authors = False\n\n# The name of the Pygments (syntax highlighting) style to use.\npygments_style = 'sphinx'\n\n# A list of ignored prefixes for module index sorting.\n# modindex_common_prefix = []\n\n\n# -- Options for HTML output --------------------------------------------------\n\n# The theme to use for HTML and HTML Help pages.  Major themes that come with\n# Sphinx are currently 'default' and 'sphinxdoc'.\nhtml_theme = 'sphinxdoc'\n\n# Theme options are theme-specific and customize the look and feel of a theme\n# further.  For a list of options available for each theme, see the\n# documentation.\n# html_theme_options = {}\n\n# Add any paths that contain custom themes here, relative to this directory.\n# html_theme_path = []\n\n# The name for this set of Sphinx documents.  If None, it defaults to\n# \"<project> v<release> documentation\".\n# html_title = None\n\n# A shorter title for the navigation bar.  Default is the same as html_title.\n# html_short_title = None\n\n# The name of an image file (relative to this directory) to place at the top\n# of the sidebar.\nhtml_logo = \"_static/logo.png\"\n\n# The name of an image file (within the static path) to use as favicon of the\n# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32\n# pixels large.\nhtml_favicon = \"_static/favicon.ico\"\n\n# Add any paths that contain custom static files (such as style sheets) here,\n# relative to this directory. They are copied after the builtin static files,\n# so a file named \"default.css\" will overwrite the builtin \"default.css\".\nhtml_static_path = ['_static']\n\n# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,\n# using the given strftime format.\n# html_last_updated_fmt = '%b %d, %Y'\n\n# If true, SmartyPants will be used to convert quotes and dashes to\n# typographically correct entities.\n# html_use_smartypants = True\n\n# Custom sidebar templates, maps document names to template names.\n# html_sidebars = {}\n\n# Additional templates that should be rendered to pages, maps page names to\n# template names.\n# html_additional_pages = {}\n\n# If false, no module index is generated.\n# html_use_modindex = True\n\n# If false, no index is generated.\n# html_use_index = True\n\n# If true, the index is split into individual pages for each letter.\n# html_split_index = False\n\n# If true, links to the reST sources are added to the pages.\n# html_show_sourcelink = True\n\n# If true, an OpenSearch description file will be output, and all pages will\n# contain a <link> tag referring to it.  The value of this option must be the\n# base URL from which the finished HTML is served.\n# html_use_opensearch = ''\n\n# If nonempty, this is the file name suffix for HTML files (e.g. \".xhtml\").\n# html_file_suffix = ''\n\n# Output file base name for HTML help builder.\nhtmlhelp_basename = 'Pydoopdoc'\n\n\n# -- Options for LaTeX output -------------------------------------------------\n\n# The paper size ('letter' or 'a4').\n# latex_paper_size = 'letter'\n\n# The font size ('10pt', '11pt' or '12pt').\n# latex_font_size = '10pt'\n\n# Grouping the document tree into LaTeX files. List of tuples\n# (source start file, target name, title, author, documentclass\n# [howto/manual]).\nlatex_documents = [\n    ('index', 'Pydoop.tex', u'Pydoop Documentation',\n     u'Simone Leo, Gianluigi Zanetti', 'manual'),\n]\n\n# The name of an image file (relative to this directory) to place at the top of\n# the title page.\n# latex_logo = None\n\n# For \"manual\" documents, if this is true, then toplevel headings are parts,\n# not chapters.\n# latex_use_parts = False\n\n# Additional stuff for the LaTeX preamble.\n# latex_preamble = ''\n\n# Documents to append as an appendix to all manuals.\n# latex_appendices = []\n\n# If false, no module index is generated.\n# latex_use_modindex = True\n\n# Example configuration for intersphinx: refer to the Python standard library.\nintersphinx_mapping = {'python': ('http://docs.python.org/2.7', None)}\n"
  },
  {
    "path": "docs/examples/avro.rst",
    "content": ".. _avro_io:\n\nAvro I/O\n========\n\nPydoop transparently supports reading and writing `Avro\n<http://avro.apache.org>`_ records in MapReduce applications. This must be\nenabled by setting appropriate options in ``pydoop submit`` (see below).\n\nThe following program implements a (slightly\nmodified) version of the color count example from the Avro docs:\n\n.. literalinclude:: ../../examples/avro/py/color_count.py\n   :language: python\n   :start-after: DOCS_INCLUDE_START\n\nThe application counts the per-office occurrence of favorite colors in\na dataset of user records with the following structure:\n\n.. literalinclude:: ../../examples/avro/schemas/user.avsc\n   :language: javascript\n\nUser records are read from an Avro container stored on HDFS, and\nresults are written to another Avro container with the following\nschema:\n\n.. literalinclude:: ../../examples/avro/schemas/stats.avsc\n   :language: javascript\n\nPydoop transparently serializes and/or deserializes Avro data as\nneeded, allowing you to work directly with Python dictionaries.  To\nget this behavior, enable Avro I/O and specify the output schema as follows:\n\n.. code-block:: bash\n\n  export STATS_SCHEMA=$(cat stats.avsc)\n  pydoop submit \\\n    -D pydoop.mapreduce.avro.value.output.schema=\"${STATS_SCHEMA}\" \\\n    --avro-input v --avro-output v \\\n    --upload-file-to-cache color_count.py \\\n    color_count input output\n\nThe ``--avro-input v`` and ``--avro-output v`` flags specify that we\nwant to work with Avro records on MapReduce values; the other possible\nchoices are ``\"k\"``, where records are exchanged over keys, and\n``\"kv\"``, which assumes that the top-level record structure has two\nfields named ``\"key\"`` and ``\"value\"`` and passes the former on keys\nand the latter on values.\n\nNote that we did not have to specify any input schema: in this case,\nAvro automatically falls back to the *writer schema*, i.e., the one\nthat's been used to write the container file.\n\nThe ``examples/avro`` directory contains examples for all I/O modes.\n\n\nAvro-Parquet I/O\n----------------\n\nThe above example focuses on `Avro containers\n<http://avro.apache.org/docs/1.7.6/spec.html#Object+Container+Files>`_.\nHowever, Pydoop supports any input/output format that exchanges Avro\nrecords.  In particular, it can be used to read from and write to\nAvro-Parquet files, i.e., `Parquet\n<http://parquet.incubator.apache.org>`_ files that use the Avro object\nmodel.\n\n.. note::\n\n  Make sure you have Parquet version 1.6 or later to avoid running\n  into `object reuse problems\n  <https://issues.apache.org/jira/browse/PARQUET-62>`_.  More\n  generally, the record writer must be aware of the fact that records\n  passed to its ``write`` method are mutable and can be reused by the\n  caller.\n\nThe following application reproduces the k-mer count example from the\n`ADAM <https://github.com/bigdatagenomics/adam>`_ docs:\n\n.. literalinclude:: ../../examples/avro/py/kmer_count.py\n   :language: python\n   :start-after: DOCS_INCLUDE_START\n\nTo run the above program, execute pydoop submit as follows:\n\n.. code-block:: bash\n\n  export PROJECTION=$(cat projection.avsc)\n  pydoop submit \\\n     -D parquet.avro.projection=\"${PROJECTION}\" \\\n    --upload-file-to-cache kmer_count.py \\\n    --input-format parquet.avro.AvroParquetInputFormat \\\n    --avro-input v --libjars \"path/to/the/parquet/jar\" \\\n    kmer_count input output\n\nSince we are using an external input format (Avro container input and\noutput formats are integrated into the Java Pydoop code), we have to\nspecify the corresponding class via ``--input-format`` and its jar\nwith ``--libjars``.  The optional parquet projection allows to extract\nonly selected fields from the input data.  Note that, in this case,\nreading input records from values is not an option: that's how\n``AvroParquetInputFormat`` works.\n\nMore Avro-Parquet examples are available under ``examples/avro``.\n\n\nRunning the examples\n--------------------\n\nTo run the Avro examples you have to install the Python Avro package\n(you can get it from the Avro web site), while the ``avro`` jar is\nincluded in Hadoop and the ``avro-mapred`` one is included in Pydoop.\nPart of the examples code (e.g., input generation) is written in Java.\nCompilation and packaging into a jar is handled by the bash runners,\nbut `Maven <https://maven.apache.org/>`_ needs to be installed on the\nclient machine.\n"
  },
  {
    "path": "docs/examples/index.rst",
    "content": ".. _examples:\n\nExamples\n========\n\n.. toctree::\n   :maxdepth: 2\n\n   intro\n   sequence_file\n   input_format\n   avro\n"
  },
  {
    "path": "docs/examples/input_format.rst",
    "content": ".. _input_format_example:\n\nWriting a Custom InputFormat\n============================\n\nYou can use a custom Java ``InputFormat`` together with a Python\n:class:`~pydoop.mapreduce.api.RecordReader`: the java RecordReader\nsupplied by the ``InputFormat`` will be overridden by the Python one.\n\nConsider the following simple modification of Hadoop's built-in\n``TextInputFormat``:\n\n.. literalinclude:: ../../examples/input_format/it/crs4/pydoop/mapreduce/TextInputFormat.java\n   :language: java\n   :start-after: DOCS_INCLUDE_START\n\nWith respect to the default one, this InputFormat adds a configurable\nboolean parameter (``pydoop.input.issplitable``) that, if set to\n``false``, makes input files non-splitable (i.e., you can't get more\ninput splits than the number of input files).\n\nFor details on how to compile the above code into a jar and use it\nwith Pydoop, see ``examples/input_format``\\ .\n"
  },
  {
    "path": "docs/examples/intro.rst",
    "content": "Introduction\n============\n\nPydoop includes several usage examples: you can find them in the\n\"examples\" subdirectory of the distribution root. \n\n\nPython Dependencies\n-------------------\n\nIf you've installed Pydoop or other Python packages needed by your\napplication in a non-standard location (e.g.,\n``/opt/lib/python3.6/site-packages``), the Python code that runs within\nHadoop tasks might not be able to find them. Note that, according to your\nHadoop version or configuration, map and reduce tasks might run as a\ndifferent user than the one who launched the job. If you can't install\nglobally, Pydoop offers the option of shipping packages automatically\nupon job submission, see the section on :ref:`installation-free\nusage<self_contained>`.\n\n\nInput Data\n----------\n\nMost examples, by default, take their input from a free version of\nLewis Carrol's \"Alice's Adventures in Wonderland\" available at\n`Project Gutenberg <http://www.gutenberg.org>`_ (see the\n``examples/input`` sub-directory).\n"
  },
  {
    "path": "docs/examples/sequence_file.rst",
    "content": "Using the Hadoop SequenceFile Format\n====================================\n\nAlthough many MapReduce applications deal with text files, there are\nmany cases where processing binary data is required. In this case, you\nbasically have two options:\n\n#. write appropriate :class:`~pydoop.mapreduce.api.RecordReader` /\n   :class:`~pydoop.mapreduce.api.RecordWriter` classes for the binary format\n   you need to process\n#. convert your data to Hadoop's standard ``SequenceFile`` format.\n\nTo write sequence files with Pydoop, set the output format and the\ncompression type as follows::\n\n  pydoop submit \\\n  --output-format=org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat \\\n  -D mapreduce.output.fileoutputformat.compress.type=NONE|RECORD|BLOCK [...]\n\nTo read sequence files, set the input format as follows::\n\n  pydoop submit \\\n  --input-format=org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat\n\n\nExample Application: Filter Wordcount Results\n---------------------------------------------\n\n``SequenceFile`` is mostly useful to handle complex objects like\nC-style structs or images. To keep our example as simple as possible,\nwe considered a situation where a MapReduce task needs to emit the raw\nbytes of an integer value.\n\nWe wrote a trivial application that reads input from a previous\n:ref:`word count <word_count>` run and filters out\nwords whose count falls below a\nconfigurable threshold. Of course, the filter could have been directly\napplied to the wordcount reducer: the job has been artificially split\ninto two runs to give a ``SequenceFile`` read / write example.\n\nSuppose you know in advance that most counts will be large, but not so\nlarge that they cannot fit in a 32-bit integer: since the decimal\nrepresentation could require as much as 10 bytes, you decide to save\nspace by having the wordcount reducer emit the raw four bytes of the\ninteger instead:\n\n.. literalinclude:: ../../examples/sequence_file/bin/wordcount.py\n   :language: python\n   :pyobject: WordCountReducer\n\nSince newline characters can appear in the serialized values, you\ncannot use the standard text format where each line contains a\ntab-separated key-value pair. The problem can be solved by using\n``SequenceFileOutputFormat`` for wordcount and\n``SequenceFileInputFormat`` for the filtering application.\n\nThe full source code for the example is available under\n``examples/sequence_file``\\ .\n"
  },
  {
    "path": "docs/how_to_cite.rst",
    "content": "How to Cite\n===========\n\nPydoop is developed and maintained by researchers at `CRS4\n<http://www.crs4.it>`_ -- Distributed Computing group.  If you use\nPydoop as part of your research work, please cite `the HPDC 2010 paper\n<https://doi.org/10.1145/1851476.1851594>`_.\n\n**Plain text**::\n\n  S. Leo and G. Zanetti.  Pydoop: a Python MapReduce and HDFS API for\n  Hadoop.  In Proceedings of the 19th ACM International Symposium on\n  High Performance Distributed Computing, 819-825, 2010.\n\n**BibTeX**::\n\n  @inproceedings{Leo:2010:PPM:1851476.1851594,\n   author = {Leo, Simone and Zanetti, Gianluigi},\n   title = {{Pydoop: a Python MapReduce and HDFS API for Hadoop}},\n   booktitle = {{Proceedings of the 19th ACM International Symposium on High Performance Distributed Computing}},\n   series = {HPDC '10},\n   year = {2010},\n   isbn = {978-1-60558-942-8},\n   location = {Chicago, Illinois},\n   pages = {819--825},\n   numpages = {7},\n   url = {http://doi.acm.org/10.1145/1851476.1851594},\n   doi = {10.1145/1851476.1851594},\n   acmid = {1851594},\n   publisher = {ACM},\n   address = {New York, NY, USA},\n  }\n"
  },
  {
    "path": "docs/index.rst",
    "content": ".. Pydoop documentation master file, created by\n   sphinx-quickstart on Sun Jun 20 17:06:55 2010.\n   You can adapt this file completely to your liking, but it should at least\n   contain the root `toctree` directive.\n\n**Pydoop** is a Python interface to `Hadoop\n<http://hadoop.apache.org>`_ that allows you to write MapReduce\napplications in pure Python:\n\n.. literalinclude:: ../examples/pydoop_submit/mr/wordcount_minimal.py\n   :language: python\n   :pyobject: Mapper\n\n.. literalinclude:: ../examples/pydoop_submit/mr/wordcount_minimal.py\n   :language: python\n   :pyobject: Reducer\n\nFeature highlights:\n\n* a rich :ref:`HDFS API <hdfs_api_tutorial>`;\n\n* a :ref:`MapReduce API <api_tutorial>` that allows to write pure\n  Python record readers / writers, partitioners and combiners;\n\n* transparent :ref:`Avro (de)serialization <avro_io>`.\n\nPydoop enables MapReduce programming via a pure (except for a\nperformance-critical serialization section) Python client for Hadoop\nPipes, and HDFS access through an extension module based on `libhdfs\n<https://hadoop.apache.org/docs/r2.6.0/hadoop-project-dist/hadoop-hdfs/LibHdfs.html>`_.\n\nTo get started, read the :ref:`tutorial <tutorial>`.  Full docs,\nincluding :ref:`installation instructions <installation>`, are listed\nbelow.\n\n\nContents\n========\n\n.. toctree::\n   :maxdepth: 2\n\n   news/index\n   tutorial/index\n   installation\n   pydoop_script\n   running_pydoop_applications\n   api_docs/index\n   examples/index\n   self_contained\n   how_to_cite\n\n\nIndices and Tables\n==================\n\n* :ref:`genindex`\n* :ref:`modindex`\n* :ref:`search`\n"
  },
  {
    "path": "docs/installation.rst",
    "content": ".. _installation:\n\nInstallation\n============\n\nPrerequisites\n-------------\n\nWe regularly test Pydoop on Ubuntu only, but it should also work on other\nLinux distros and (possibly with some tweaking) on macOS. Other platforms are\n**not** supported. Additional requirements:\n\n* `Python <http://www.python.org>`_ 2 or 3, including header files (e.g.,\n  ``apt-get install python-dev``, ``yum install python-devel``);\n\n* `setuptools <https://pypi.python.org/pypi/setuptools>`_ >= 3.3;\n\n* Hadoop >=2. We run regular CI tests with recent versions of\n  `Apache Hadoop <http://hadoop.apache.org/releases.html>`_ 2.x and 3.x,\n  but we expect Pydoop to also work with other Hadoop distributions. In\n  particular, we have tested it on `Amazon EMR <https://aws.amazon.com/emr>`_\n  (see :ref:`emr`).\n\nThese are both build time and run time requirements. At build time you will\nalso need a C++ compiler (e.g., ``apt-get install build-essential``, ``yum\ninstall gcc gcc-c++``) and a JDK (a JRE is not sufficient).\n\n**Optional:**\n\n* `Avro <https://avro.apache.org/>`_ Python implementation to enable\n  :ref:`avro_io` (run time only). Note that the pip packages for Python 2 and 3\n  are named differently (respectively ``avro`` and ``avro-python3``).\n\n\nEnvironment Setup\n-----------------\n\nTo compile the HDFS extension module, Pydoop needs the path to the JDK\ninstallation. You can specify this via ``JAVA_HOME``. For instance::\n\n  export JAVA_HOME=\"/usr/lib/jvm/java-8-openjdk-amd64\"\n\nNote that Pydoop is interested in the **JDK** home (where ``include/jni.h``\ncan be found), not the JRE home. Depending on your Java distribution and\nversion, these can be different directories (usually the former being the\nlatter's parent). If ``JAVA_HOME`` is not found in the environment, Pydoop\nwill try to locate the JDK via Java system properties.\n\nPydoop also includes some Java components, and it needs Hadoop libraries to be\nin the ``CLASSPATH`` in order to build them. This is done by calling ``hadoop\nclasspath``, so make sure that the ``hadoop`` executable is in the\n``PATH``. For instance, if Hadoop was installed by unpacking the tarball into\n``/opt/hadoop``::\n\n  export PATH=\"/opt/hadoop/bin:/opt/hadoop/sbin:${PATH}\"\n\nThe Hadoop class path is also needed at run time by the HDFS extension. Again,\nsince Pydoop picks it up from ``hadoop classpath``, ensure that ``hadoop`` is\nin the ``PATH``, as shown above. ``pydoop submit`` must also be able to call\nthe ``hadoop`` executable.\n\nAdditionally, Pydoop needs to read part of the Hadoop configuration to adapt\nto specific scenarios. If ``HADOOP_CONF_DIR`` is in the environment, Pydoop\nwill try to read the configuration from the corresponding location. As a\nfallback, Pydoop will also try ``${HADOOP_HOME}/etc/hadoop`` (in the above\nexample, ``HADOOP_HOME`` would be ``/opt/hadoop``). If ``HADOOP_HOME`` is not\ndefined, Pydoop will try to guess it from the ``hadoop`` executable (again,\nthis will have to be in the ``PATH``).\n\n\nBuilding and Installing\n-----------------------\n\nInstall prerequisites::\n\n  pip install --upgrade pip\n  pip install --upgrade -r requirements.txt\n\nInstall Pydoop via pip::\n\n  pip install pydoop\n\nTo install a pre-release (e.g., alpha, beta) add ``--pre``::\n\n  pip install --pre pydoop\n\nYou can also install the latest development version from GitHub::\n\n  git clone https://github.com/crs4/pydoop.git\n  cd pydoop\n  python setup.py build\n  python setup.py install --skip-build\n\nIf possible, you should install Pydoop on all cluster nodes. Alternatively, it\ncan be distributed, together with your MapReduce applications, via the Hadoop\ndistributed cache (see :doc:`self_contained`).\n\n\nTroubleshooting\n---------------\n\n#. ``libjvm.so`` not found: try the following::\n\n    export LD_LIBRARY_PATH=\"${JAVA_HOME}/jre/lib/amd64/server:${LD_LIBRARY_PATH}\"\n\n#. non-standard include/lib directories: the setup script looks for\n   includes and libraries in standard places -- read ``setup.py`` for\n   details. If some of the requirements are stored in different\n   locations, you need to add them to the search path. Example::\n\n    python setup.py build_ext -L/my/lib/path -I/my/include/path -R/my/lib/path\n    python setup.py build\n    python setup.py install --skip-build\n\n   Alternatively, you can write a small ``setup.cfg`` file for distutils:\n\n   .. code-block:: cfg\n\n    [build_ext]\n    include_dirs=/my/include/path\n    library_dirs=/my/lib/path\n    rpath=%(library_dirs)s\n\n   and then run ``python setup.py install``.\n\n   Finally, you can achieve the same result by manipulating the\n   environment.  This is particularly useful in the case of automatic\n   download and install with pip::\n\n    export CPATH=\"/my/include/path:${CPATH}\"\n    export LD_LIBRARY_PATH=\"/my/lib/path:${LD_LIBRARY_PATH}\"\n    pip install pydoop\n\n\nTesting your Installation\n-------------------------\n\nAfter Pydoop has been successfully installed, you might want to run unit\ntests and/or examples to verify that everything works fine. Here is a short\nlist of things that can go wrong and how to fix them. For full details on\nrunning tests and examples, see ``.travis.yml``.\n\n#. Incomplete configuration: make sure that Pydoop is able to find the\n   ``hadoop`` executable and configuration directory (check the above section\n   on environment setup).\n\n#. Cluster not ready: wait until all Hadoop daemons are up and HDFS exits from\n   safe mode (``hadoop dfsadmin -safemode wait``).\n\n#. HDFS tests may fail if your NameNode's hostname and port are\n   non-standard. In this case, set the ``HDFS_HOST`` and ``HDFS_PORT``\n   environment variables accordingly.\n\n#. Some HDFS tests may fail if not run by the cluster superuser, in\n   particular ``capacity``, ``chown`` and ``used``.  To get superuser\n   privileges, you can either start the cluster with your own user account or\n   set the ``dfs.permissions.superusergroup`` Hadoop property to one of your\n   unix groups (type ``groups`` at the command prompt to get the list of\n   groups for your current user), then restart the HDFS daemons.\n\n\n.. _emr:\n\nUsing Pydoop on Amazon EMR\n--------------------------\n\nYou can configure your EMR cluster to automatically install Pydoop on\nall nodes via `Bootstrap Actions\n<https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-plan-bootstrap.html>`_. The\nmain difficulty is that Pydoop relies on Hadoop being installed and\nconfigured, even at compile time, so the bootstrap script needs to\nwait until EMR has finished setting it up:\n\n.. code-block:: bash\n\n  #!/bin/bash\n  PYDOOP_INSTALL_SCRIPT=$(cat <<EOF\n  #!/bin/bash\n  NM_PID=/var/run/hadoop-yarn/yarn-yarn-nodemanager.pid\n  RM_PID=/var/run/hadoop-yarn/yarn-yarn-resourcemanager.pid\n  while [ ! -f \\${RM_PID} ] && [ ! -f \\${NM_PID} ]; do\n    sleep 2\n  done\n  export JAVA_HOME=/etc/alternatives/java_sdk\n  sudo -E pip install pydoop\n  EOF\n  )\n  echo \"${PYDOOP_INSTALL_SCRIPT}\" | tee -a /tmp/pydoop_install.sh\n  chmod u+x /tmp/pydoop_install.sh\n  /tmp/pydoop_install.sh >/tmp/pydoop_install.out 2>/tmp/pydoop_install.err &\n\nThe bootstrap script creates the actual installation script and calls\nit; the latter, in turn, waits for either the resource manager or the\nnode manager to be up (i.e., for YARN to be up whether we are on\nthe master or on a slave) before installing Pydoop. If you want to use\nPython 3, install version 3.6 with yum:\n\n.. code-block:: bash\n\n  #!/bin/bash\n  sudo yum -y install python36-devel python36-pip\n  sudo alternatives --set python /usr/bin/python3.6\n  PYDOOP_INSTALL_SCRIPT=$(cat <<EOF\n  ...\n\nThe above instructions have been tested on ``emr-5.12.0``.\n\n\nTrying Pydoop without installing it\n-----------------------------------\n\nYou can try Pydoop on a `Docker <https://www.docker.com/>`_ container. The\nDockerfile is in the distribution root directory::\n\n  docker build -t pydoop .\n  docker run --name pydoop -d pydoop\n\nThis spins up a single-node, `pseudo-distributed\n<https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html#Pseudo-Distributed_Operation>`_\nHadoop cluster with `HDFS\n<https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html#Introduction>`_,\n`YARN\n<https://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/YARN.html>`_\nand a Job History server. Before attempting to use the container, wait a few\nseconds until all daemons are up and running.\n\nYou may want to expose some ports to the host, such as the ones used by the\nweb interfaces. For instance::\n\n  docker run --name pydoop -p 8088:8088 -p 9870:9870 -p 19888:19888 -d pydoop\n\nRefer to the Hadoop docs for a complete list of ports used by the various\nservices.\n"
  },
  {
    "path": "docs/news/archive.rst",
    "content": "News Archive\n------------\n\n\nNew in 1.2.0\n^^^^^^^^^^^^\n\n * Added support for Hadoop 2.7.2.\n * Dropped support for Python 2.6. Maintaining 2.6 compatibility would\n   require adding another dimension to the Travis matrix, vastly\n   increasing the build time and ultimately slowing down the\n   development. Since the default Python version in all major\n   distributions is 2.7, the added effort would gain us little.\n * Bug fixes.\n\n\nNew in 1.1.0\n^^^^^^^^^^^^\n\n * Added support for `HDP <http://hortonworks.com/hdp/>`_ 2.2.\n * `Pyavroc <https://github.com/Byhiras/pyavroc>`_ is now\n   automatically loaded if installed, enabling much faster (30-40x)\n   Avro (de)serialization.\n * Added Timer objects to help debug performance issues.\n * ``NoSeparatorTextOutputFormat`` is now available for all MR\n   versions.\n * Added Avro support to the Hadoop Simulator.\n * Bug fixes and performance improvements.\n\n\nNew in 1.0.0\n^^^^^^^^^^^^\n\n * Pydoop now features a brand new, more pythonic :ref:`MapReduce API <mr_api>`\n * Added built-in `Avro <http://avro.apache.org>`_ support (for now,\n   only with Hadoop 2).  By setting a few flags in the submitter and\n   selecting ``AvroContext`` as your application's context class, you\n   can read and write Avro data, transparently manipulating records as\n   Python dictionaries.  See the :ref:`avro_io` docs for further details.\n * The new :ref:`pydoop submit <running_apps>` tool drastically\n   simplifies job submission, in particular when running applications\n   without installing Pydoop and other dependencies on the cluster\n   nodes (see :ref:`self_contained`).\n * Added support for testing Pydoop programs in a simulated Hadoop framework\n * Added support (experimental) for MapReduce V2 input/output formats (see\n   :ref:`input_format_example`)\n * The :mod:`~pydoop.hdfs.path` module offers many new functions that\n   serve as the HDFS-aware counterparts of those in :mod:`os.path`\n * The pipes backend (except for the performance-critical\n   serialization section) has been reimplemented in pure Python\n * An alternative (optional) JPype HDFS backend is available\n   (currently slower than the one based on libhdfs)\n * Added support for CDH5 and Apache Hadoop 2.4.1, 2.5.2 and 2.6.0\n * Removed support for CDH3 and Apache Hadoop 0.20.2\n * Installation has been greatly simplified: now Pydoop does not\n   require any external library to build its native extensions\n\n\nNew in 0.12.0\n^^^^^^^^^^^^^\n\n * YARN is now fully supported\n * Added support for CDH 4.4.0 and CDH 4.5.0\n\n\nNew in 0.11.1\n^^^^^^^^^^^^^\n\n * Added support for hadoop 2.2.0\n * Added support for hadoop 1.2.1\n\n   \nNew in 0.10.0\n^^^^^^^^^^^^^\n\n * Added support for CDH 4.3.0\n\n * Added a :meth:`~pydoop.hdfs.fs.hdfs.walk` method to hdfs instances\n   (works similarly to :func:`os.walk` from Python's standard library)\n\n * The Hadoop version parser is now more flexible.  It should be able\n   to parse version strings for all CDH releases, including older ones\n   (note that most of them are **not** supported)\n\n * Pydoop script can now handle modules whose file name has no extension\n\n * Fixed \"unable to load native-hadoop library\" problem (thanks to\n   Liam Slusser)\n\n\nNew in 0.9.0\n^^^^^^^^^^^^\n\n* Added explicit support for:\n\n  * Apache Hadoop 1.1.2\n  * CDH 4.2.0\n\n* Added support for Cloudera from-parcels layout (as installed by\n  Cloudera Manager)\n\n* Added :func:`pydoop.hdfs.move`\n\n* Record writers can now be used in map-only jobs\n\n\nNew in 0.8.1\n^^^^^^^^^^^^\n\n* Fixed a problem that was breaking installation from PyPI via pip install\n\n\nNew in 0.8.0\n^^^^^^^^^^^^\n\n* Added support for Apple OS X Mountain Lion\n* Added support for Hadoop 1.1.1\n* Patches now include a fix for `HDFS-829\n  <https://issues.apache.org/jira/browse/HDFS-829>`_\n* Restructured docs\n\n  * A separate tutorial section collects and expands introductory material\n\n\nNew in 0.7.0\n^^^^^^^^^^^^\n\n* Added Debian package\n\n\nNew in 0.7.0-rc3\n^^^^^^^^^^^^^^^^\n\n* Fixed a bug in the hdfs instance caching method\n\n\nNew in 0.7.0-rc2\n^^^^^^^^^^^^^^^^\n\n* Support for HDFS append open mode\n\n  * fails if your Hadoop version and/or configuration does not support\n    HDFS append\n\n\nNew in 0.7.0-rc1\n^^^^^^^^^^^^^^^^\n\n* Works with CDH4, with the following limitations:\n\n  * support for MapReduce v1 only\n  * CDH4 must be installed from dist-specific packages (no tarball)\n\n* Tested with the latest releases of other Hadoop versions\n\n  * Apache Hadoop 0.20.2, 1.0.4\n  * CDH 3u5, 4.1.2\n\n* Simpler build process\n\n  * the source code we need is now included, rather than searched for\n    at compile time\n\n* Pydoop scripts can now accept user-defined configuration parameters\n\n  * New examples show how to use the new feature\n\n* New wrapper object makes it easier to interact with the JobConf\n* New hdfs.path functions: isdir, isfile, kind\n* HDFS: support for string description of permission modes in chmod\n* Several bug fixes\n\n\nNew in 0.6.6\n^^^^^^^^^^^^\n\nFixed a bug that was causing the pipes runner to incorrectly preprocess\ncommand line options.\n\n\nNew in 0.6.4\n^^^^^^^^^^^^\n\nFixed several bugs triggered by using a local fs as the default fs for\nHadoop.  This happens when you set a ``file:`` path as the value of\n``fs.defaultFS`` in core-site.xml.  For instance:\n\n.. code-block:: xml\n\n  <property>\n    <name>fs.defaultFS</name>\n    <value>file:///var/hadoop/data</value>\n  </property>\n\n\nNew in 0.6.0\n^^^^^^^^^^^^\n\n* The HDFS API features new high-level tools for easier manipulation\n  of files and directories. See the :ref:`API docs <hdfs-api>` for\n  more info\n* Examples have been thoroughly revised in order to make them easier\n  to understand and run\n* Several bugs were fixed; we also introduced a few optimizations,\n  most notably the automatic caching of HDFS instances\n\n\nNew in 0.5.0\n^^^^^^^^^^^^\n\n* Pydoop now works with Hadoop 1.0\n* Multiple versions of Hadoop can now be supported by the same\n  installation of Pydoop.\n* We have added a :ref:`command line tool <pydoop_script_tutorial>` to\n  make it trivially simple to write shorts scripts for simple\n  problems.\n* In order to work out-of-the-box, Pydoop now requires Pydoop 2.7.\n  Python 2.6 can be used provided that you install a few additional\n  modules (see the :ref:`installation <installation>` page for\n  details).\n* We have dropped support for the 0.21 branch of Hadoop, which has\n  been marked as unstable and unsupported by Hadoop developers.\n"
  },
  {
    "path": "docs/news/index.rst",
    "content": ".. _news:\n\nNews\n====\n\n.. toctree::\n   :maxdepth: 1\n\n   latest\n   archive\n"
  },
  {
    "path": "docs/news/latest.rst",
    "content": "New in 2.0.0\n------------\n\nPydoop 2.0.0 adds Python 3 and Hadoop 3 support, and features a complete\noverhaul of the ``mapreduce`` subpackage, which is now easier to use and more\nefficient. As any major software release, Pydoop 2 also makes some\nbackwards-incompatible changes, mainly by dropping old, seldom-used\nfeatures. Finally, it includes several bug fixes and performance\nimprovements. Here is a more detailed list of changes:\n\n * Python 3 support.\n * Hadoop 3 support.\n * The ``sercore`` extension, together with most of the ``pydoop.mapreduce``\n   subpackage, has been rewritten from scratch. Now it's simpler and slightly\n   faster (much faster when using a combiner).\n * ``JobConf`` is now fully compatible with ``dict``.\n * ``pydoop submit`` now works when the default file system is local.\n * Compilation of avro-parquet-based examples is now much faster.\n * Many utilities for guessing Hadoop environment details have been either\n   removed or drastically simplified (affects ``hadoop_utils`` and related\n   package-level functions). Pydoop now assumes that the ``hadoop`` command is\n   in the ``PATH``, and uses only that information to try fallback values when\n   ``HADOOP_HOME`` and/or ``HADOOP_CONF_DIR`` are not defined.\n * The ``hadut`` module has been stripped down to contain little more than\n   what's required by ``pydoop submit``. In particular, ``PipesRunner`` is\n   gone. Running applications with ``mapred pipes`` still works, but with\n   caveats (e.g., `it does not work on the local fs\n   <https://issues.apache.org/jira/browse/MAPREDUCE-4000>`_, and controlling\n   remote task environment is not trivial).\n * The ``hdfs`` module no longer provides a default value for ``LIBHDFS_OPTS``.\n * The Hadoop simulator has been dropped.\n * `Support for opaque binary input splits <https://github.com/crs4/pydoop/pull/302>`_.\n * `Dropped support for Hadoop 1 <https://github.com/crs4/pydoop/pull/237>`_.\n * `Dropped old MapReduce API <https://github.com/crs4/pydoop/pull/255>`_.\n * `Dropped JPype HDFS backend <https://github.com/crs4/pydoop/pull/238>`_.\n * Bug fixes and performance improvements.\n"
  },
  {
    "path": "docs/pydoop_script.rst",
    "content": ".. _pydoop_script_guide:\n\nPydoop Script User Guide\n========================\n\nPydoop Script is the easiest way to write simple MapReduce programs\nfor Hadoop.  With Pydoop Script, you only need to write a map and/or a reduce\nfunctions and the system will take care of the rest.\n\nFor a full explanation please see the :ref:`tutorial <pydoop_script_tutorial>`.\n\n\nCommand Line Tool\n-----------------\n\nIn the simplest case, Pydoop Script is invoked as::\n\n  pydoop script MODULE INPUT OUTPUT\n\nwhere ``MODULE`` is the file (on your local file system) containing\nyour map and reduce functions, in Python, while ``INPUT`` and\n``OUTPUT`` are, respectively, the HDFS paths of your input data and\nyour job's output directory.\n\nOptions are shown in the following table.\n\n.. include:: pydoop_script_options.rst\n\n\nExample: Word Count with Stop Words\n+++++++++++++++++++++++++++++++++++\n\nHere is the word count example modified to ignore stop words from a\nfile that is distributed to all the nodes via the Hadoop distributed\ncache:\n\n.. literalinclude:: ../examples/pydoop_script/scripts/wordcount_sw.py\n   :language: python\n   :start-after: DOCS_INCLUDE_START\n\nTo execute the above script, save it to a ``wc.py`` file and run::\n\n  pydoop script wc.py hdfs_input hdfs_output --upload-file-to-cache stop_words.txt\n\nwhere ``stop_words.txt`` is a text file that contains the stop words,\none per line.\n\nWhile this script works, it has the obvious weakness of loading the\nstop words list even when executing the reducer (since it's loaded as\nsoon as we import the module).  If this inconvenience is a concern, we\ncould solve the issue by triggering the loading from the ``mapper``\nfunction, or by writing a :ref:`full Pydoop application <api_tutorial>`\nwhich would give us all the control we need to only load the list when\nrequired.\n\nWriting your Map and Reduce Functions\n-------------------------------------\n\nIn this section we assume you'll be using the default ``TextInputFormat``\nand ``TextOutputFormat``.\n\nMapper\n++++++\n\nThe ``mapper`` function in your module will be called for each record\nin your input data.  It receives 3 parameters:\n\n#. key: the byte offset with respect to the current input file. In most cases,\n   you can ignore it;\n#. value: the line of text to be processed;\n#. writer object: a Python object to write output and count values (see below);\n#. optionally, a job conf object from which to fetch configuration\n   property values (see `Accessing Parameters`_ below).\n\nCombiner\n++++++++\n\nThe ``combiner`` function will be called for each unique key-value pair\nproduced by your map function.  It also receives 3 parameters:\n\n#. key: the key produced by your map function\n#. values iterable: iterate over this parameter to see all the values emitted\n   for the current key\n#. writer object: a writer object identical to the one given to the\n   map function\n#. optionally, a job conf object, identical to the one given to the\n   map function.\n\nThe key-value pair emitted by your combiner will be piped to the reducer.\n\nReducer\n+++++++\n\nThe ``reducer`` function will be called for each unique key-value pair\nproduced by your map function.  It also receives 3 parameters:\n\n#. key: the key produced by your map function;\n#. values iterable: iterate over this parameter to traverse all the\n   values emitted for the current key;\n#. writer object: this is identical to the one given to the map function;\n#. optionally, a job conf object, identical to the one given to the\n   map function.\n\nThe key-value pair emitted by your reducer will be joined by the\nkey-value separator specified with the ``--kv-separator`` option\n(a tab character by default).\n\n\nWriter Object\n+++++++++++++\n\nThe writer object given as the third parameter to both the ``mapper``\nand ``reducer`` functions has the following methods:\n\n* ``emit(k, v)``: pass a ``(k, v)`` key-value pair to the framework;\n* ``count(what, how_many)``: add ``how_many`` to the counter named\n  ``what``.  If the counter doesn't already exist, it will be created\n  dynamically;\n* ``status(msg)``: update the task status to ``msg``;\n* ``progress()``: mark your task as having made progress without changing\n  the status message.\n\nThe latter two methods are useful for keeping your task alive in cases\nwhere the amount of computation to be done for a single record might\nexceed Hadoop's timeout interval (Hadoop kills a task if it neither reads an\ninput, writes an output, nor updates its status for a configurable amount\nof time, set to 10 minutes by default).\n\n\nAccessing Parameters\n++++++++++++++++++++\n\nPydoop Script lets you access the values of your job configuration\nproperties through a dict-like :class:`~pydoop.mapreduce.api.JobConf`\nobject, which gets passed as the fourth (optional) parameter to your\nfunctions.\n\n\nNaming your Functions\n+++++++++++++++++++++\n\nIf you'd like to give your map and reduce functions names different\nfrom ``mapper`` and ``reducer``, you may do so, but you must tell the\nscript tool.  Use the ``--map-fn`` and ``--reduce-fn`` command line\narguments to select your customized names.  Combiner functions can only\nbe assigned by explicitly setting the ``--combine-fn`` flag.\n\n\nMap-only Jobs\n+++++++++++++\n\nYou may have a program that doesn't use a reduce function.  Specify\n``--num-reducers 0`` on the command line and your map output will be\nwritten directly to file.  In this case, your map output will go\ndirectly to the output formatter and be written to your final output,\nseparated by the key-value separator.\n"
  },
  {
    "path": "docs/pydoop_script_options.rst",
    "content": "..\n  Auto-generated by dev_tools/dump_app_params. DO NOT EDIT!\n  To update, run:\n    dev_tools/dump_app_params --app script -o docs/pydoop_script_options.rst\n\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n| Short  | Long                          | Meaning                                                                                                                                                  |\n+========+===============================+==========================================================================================================================================================+\n|        | ``--num-reducers``            | Number of reduce tasks. Specify 0 to only perform map phase                                                                                              |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--no-override-home``        | Don't set the script's HOME directory to the $HOME in your environment.  Hadoop will set it to the value of the 'mapreduce.admin.user.home.dir' property |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--no-override-env``         | Use the default PATH, LD_LIBRARY_PATH and PYTHONPATH, instead of copying them from the submitting client node                                            |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--no-override-ld-path``     | Use the default LD_LIBRARY_PATH instead of copying it from the submitting client node                                                                    |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--no-override-pypath``      | Use the default PYTHONPATH instead of copying it from the submitting client node                                                                         |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--no-override-path``        | Use the default PATH instead of copying it from the submitting client node                                                                               |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--set-env``                 | Set environment variables for the tasks. If a variable is set to '', it will not be overridden by Pydoop.                                                |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n| ``-D`` | ``--job-conf``                | Set a Hadoop property, e.g., -D mapreduce.job.priority=high                                                                                              |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--python-zip``              | Additional python zip file                                                                                                                               |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--upload-file-to-cache``    | Upload and add this file to the distributed cache.                                                                                                       |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--upload-archive-to-cache`` | Upload and add this archive file to the distributed cache.                                                                                               |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--log-level``               | Logging level                                                                                                                                            |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--job-name``                | name of the job                                                                                                                                          |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--python-program``          | python executable that should be used by the wrapper                                                                                                     |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--pretend``                 | Do not actually submit a job, print the generated config settings and the command line that would be invoked                                             |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--hadoop-conf``             | Hadoop configuration file                                                                                                                                |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--input-format``            | java classname of InputFormat                                                                                                                            |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n| ``-m`` | ``--map-fn``                  | name of map function within module                                                                                                                       |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n| ``-r`` | ``--reduce-fn``               | name of reduce function within module                                                                                                                    |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n| ``-c`` | ``--combine-fn``              | name of combine function within module                                                                                                                   |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--combiner-fn``             | --combine-fn alias for backwards compatibility                                                                                                           |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n| ``-t`` | ``--kv-separator``            | output key-value separator                                                                                                                               |\n+--------+-------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n"
  },
  {
    "path": "docs/pydoop_submit_options.rst",
    "content": "..\n  Auto-generated by dev_tools/dump_app_params. DO NOT EDIT!\n  To update, run:\n    dev_tools/dump_app_params --app submit -o docs/pydoop_submit_options.rst\n\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n| Short  | Long                                   | Meaning                                                                                                                                                  |\n+========+========================================+==========================================================================================================================================================+\n|        | ``--num-reducers``                     | Number of reduce tasks. Specify 0 to only perform map phase                                                                                              |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--no-override-home``                 | Don't set the script's HOME directory to the $HOME in your environment.  Hadoop will set it to the value of the 'mapreduce.admin.user.home.dir' property |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--no-override-env``                  | Use the default PATH, LD_LIBRARY_PATH and PYTHONPATH, instead of copying them from the submitting client node                                            |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--no-override-ld-path``              | Use the default LD_LIBRARY_PATH instead of copying it from the submitting client node                                                                    |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--no-override-pypath``               | Use the default PYTHONPATH instead of copying it from the submitting client node                                                                         |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--no-override-path``                 | Use the default PATH instead of copying it from the submitting client node                                                                               |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--set-env``                          | Set environment variables for the tasks. If a variable is set to '', it will not be overridden by Pydoop.                                                |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n| ``-D`` | ``--job-conf``                         | Set a Hadoop property, e.g., -D mapreduce.job.priority=high                                                                                              |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--python-zip``                       | Additional python zip file                                                                                                                               |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--upload-file-to-cache``             | Upload and add this file to the distributed cache.                                                                                                       |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--upload-archive-to-cache``          | Upload and add this archive file to the distributed cache.                                                                                               |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--log-level``                        | Logging level                                                                                                                                            |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--job-name``                         | name of the job                                                                                                                                          |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--python-program``                   | python executable that should be used by the wrapper                                                                                                     |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--pretend``                          | Do not actually submit a job, print the generated config settings and the command line that would be invoked                                             |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--hadoop-conf``                      | Hadoop configuration file                                                                                                                                |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--input-format``                     | java classname of InputFormat                                                                                                                            |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--disable-property-name-conversion`` | Do not adapt property names to the hadoop version used.                                                                                                  |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--do-not-use-java-record-reader``    | Disable java RecordReader                                                                                                                                |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--do-not-use-java-record-writer``    | Disable java RecordWriter                                                                                                                                |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--output-format``                    | java classname of OutputFormat                                                                                                                           |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--libjars``                          | Additional comma-separated list of jar files                                                                                                             |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--cache-file``                       | Add this HDFS file to the distributed cache as a file.                                                                                                   |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--cache-archive``                    | Add this HDFS archive file to the distributed cacheas an archive.                                                                                        |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--entry-point``                      | Explicitly execute MODULE.ENTRY_POINT() in the launcher script.                                                                                          |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--avro-input``                       | Avro input mode (key, value or both)                                                                                                                     |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--avro-output``                      | Avro output mode (key, value or both)                                                                                                                    |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--pstats-dir``                       | Profile each task and store stats in this dir                                                                                                            |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--pstats-fmt``                       | pstats filename pattern (expert use only)                                                                                                                |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n|        | ``--keep-wd``                          | Don't remove the work dir                                                                                                                                |\n+--------+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+\n"
  },
  {
    "path": "docs/running_pydoop_applications.rst",
    "content": ".. _running_apps:\n\nPydoop Submit User Guide\n========================\n\nPydoop applications are run via the ``pydoop submit`` command.  To\nstart, you will need a working Hadoop cluster.  If you don't have one\navailable, you can bring up a single-node Hadoop cluster on your\nmachine -- see `the Hadoop web site <http://hadoop.apache.org>`_ for\ninstructions. Alternatively, the source directory contains a\nDockerfile that can be used to build an image with Hadoop and Pydoop\ninstalled and (minimally) configured. Check out ``.travis.yml`` for\nusage hints.\n\nIf your application is contained in a single (local) file named\n``wc.py``, with an entry point called ``__main__`` (see\n:ref:`api_tutorial`) you can run it as follows::\n\n  pydoop submit --upload-file-to-cache wc.py wc input output\n\nwhere ``input`` (file or directory) and ``output`` (directory) are\nHDFS paths.  Note that the ``output`` directory will not be\noverwritten: instead, an error will be generated if it already exists\nwhen you launch the program.\n\nIf your entry point has a different name, specify it via ``--entry-point``.\n\nThe following table shows command line options for ``pydoop submit``:\n\n.. include:: pydoop_submit_options.rst\n\n\nSetting the Environment for your Program\n----------------------------------------\n\nWhen working on a shared cluster where you don't have root access, you\nmight have a lot of software installed in non-standard locations, such\nas your home directory. Since non-interactive ssh connections do not\nusually preserve your environment, you might lose some essential\nsetting like ``LD_LIBRARY_PATH``\\ .\n\nFor this reason, by default ``pydoop submit`` copies some environment\nvariables from the submitting node to the driver script that runs each task\non Hadoop.  If this behavior is not desired, you can disable it via the\n``--no-override-env`` command line option.\n"
  },
  {
    "path": "docs/self_contained.rst",
    "content": ".. _self_contained:\n\nInstallation-free Usage\n=======================\n\nThis example shows how to use the Hadoop Distributed Cache (DC) to\ndistribute Python packages, possibly including Pydoop itself, to all\ncluster nodes at job launch time. This is useful in all cases where\ninstalling to each node is not feasible (e.g., lack of a shared mount\npoint). Of course, Hadoop itself must be already installed and\nproperly configured in all cluster nodes before you can run this.\n\nSource code for this example is available under ``examples/self_contained``\\ .\n\n\nExample Application: Count Vowels\n---------------------------------\n\nThe example MapReduce application, ``vowelcount``, is rather trivial: it counts\nthe occurrence of each vowel in the input text. Since the point here\nis to show how a structured package can be distributed and imported,\nthe implementation is exceedingly verbose.\n\n.. literalinclude:: ../examples/self_contained/vowelcount/lib/__init__.py\n   :language: python\n   :start-after: DOCS_INCLUDE_START\n\n.. literalinclude:: ../examples/self_contained/vowelcount/mr/mapper.py\n   :language: python\n   :pyobject: Mapper\n\n.. literalinclude:: ../examples/self_contained/vowelcount/mr/reducer.py\n   :language: python\n   :pyobject: Reducer\n\n\nHow it Works\n------------\n\nThe DC supports automatic distribution of files and archives across\nthe cluster at job launch time.  This feature can be used to dispatch\nPython packages to all nodes, eliminating the need to install\ndependencies for your application, including Pydoop itself::\n\n  pydoop submit --upload-archive-to-cache vowelcount.tgz \\\n                --upload-archive-to-cache pydoop.tgz [...]\n\nThe ``pydoop.tgz`` and ``vowelcount.tgz`` archives will be copied to\nall slave nodes and unpacked; in addition, ``pydoop`` and\n``vowelcount`` symlinks will be created in the current working\ndirectory of each task before it is executed.  If you include in each\narchive the *contents* of the corresponding package, they will be\navailable for import::\n\n  cd examples/self_contained/vowelcount\n  tar cfz ../vowelcount.tgz .\n\nThe archive must be in one of the formats supported by Hadoop: zip, tar or tgz.\n\n.. note::\n\n  Pydoop submit automatically builds the name of the symlink that\n  points to the unpacked archive by stripping the last extension.\n  Thus, ``foo.tar.gz`` will not work as expected, since the link will\n  be called ``foo.tar``. Always use the ``.tgz`` extension in this\n  case.\n\nThe example is supposed to work with Pydoop and vowelcount *not*\ninstalled on the slave nodes (you do need Pydoop on the client machine\nused to run the example, however).\n"
  },
  {
    "path": "docs/tutorial/hdfs_api.rst",
    "content": ".. _hdfs_api_tutorial:\n\nThe HDFS API\n============\n\nThe :ref:`HDFS API <hdfs-api>` allows you to connect to an HDFS\ninstallation, read and write files and get information on files,\ndirectories and global file system properties:\n\n.. literalinclude:: ../../examples/hdfs/repl_session.py\n   :language: python\n   :start-after: DOCS_INCLUDE_START\n   :end-before: DOCS_INCLUDE_END\n\n\nLow-level API\n-------------\n\nThe high level API showcased above can be inefficient\nwhen performing multiple operations on the same HDFS instance. This is\ndue to the fact that, under the hood, each function opens a separate\nconnection to the HDFS server and closes it before returning. The\nfollowing example shows how to build statistics of HDFS usage by block\nsize by directly instantiating an ``hdfs`` object, which represents an\nopen connection to an HDFS instance. Full source code for the example,\nincluding a script that can be used to generate an HDFS directory tree\nis located under ``examples/hdfs`` in the Pydoop distribution.\n\n.. literalinclude:: ../../examples/hdfs/treewalk.py\n   :language: python\n   :start-after: DOCS_INCLUDE_START\n\nFor more information, see the :ref:`HDFS API reference <hdfs-api>`.\n"
  },
  {
    "path": "docs/tutorial/index.rst",
    "content": ".. _tutorial:\n\nTutorial\n========\n\n.. toctree::\n   :maxdepth: 2\n\n   pydoop_script\n   hdfs_api\n   mapred_api\n"
  },
  {
    "path": "docs/tutorial/mapred_api.rst",
    "content": ".. _api_tutorial:\n\nWriting Full-Featured Applications\n==================================\n\nWhile :ref:`Pydoop Script <pydoop_script_tutorial>` allows to solve\nmany problems with minimal programming effort, some tasks require a\nbroader set of features. If your data is not simple text with one record\nper line, for instance, you may need to write a record reader; if\nyou need to change the way intermediate keys are assigned to reducers,\nyou have to write your own partitioner.  These components are\naccessible via the Pydoop MapReduce API.\n\nThe rest of this section serves as an introduction to MapReduce\nprogramming with Pydoop; the :ref:`API reference <mr_api>` has\nall the details.\n\n\nMappers and Reducers\n--------------------\n\nThe Pydoop API is object-oriented: the application developer writes a\n:class:`~pydoop.mapreduce.api.Mapper` class, whose core job is\nperformed by the :meth:`~pydoop.mapreduce.api.Mapper.map` method, and\na :class:`~pydoop.mapreduce.api.Reducer` class that processes data via\nthe :meth:`~pydoop.mapreduce.api.Reducer.reduce` method.  The\nfollowing snippet shows how to write the mapper and reducer for\n*wordcount*, an application that counts the occurrence of each word in a\ntext data set:\n\n.. literalinclude:: ../../examples/pydoop_submit/mr/wordcount_minimal.py\n   :language: python\n   :start-after: DOCS_INCLUDE_START\n\nThe mapper is instantiated by the MapReduce framework that, for each\ninput record, calls the ``map`` method passing a ``context`` object to it.\nThe context serves as a communication interface between the framework\nand the application: in the ``map`` method, it is used to get the current\nkey (not used in the above example) and value, and to emit (send back\nto the framework) intermediate key-value pairs.  The reducer works in\na similar way, the main difference being the fact that the ``reduce``\nmethod gets a set of values for each key.  The context has several\nother functions that we will explore later.\n\nTo run the above program, save it to a ``wc.py`` file and execute::\n\n  pydoop submit --upload-file-to-cache wc.py wc input output\n\nWhere ``input`` is the HDFS input directory.\n\nSee the section on :ref:`running Pydoop programs<running_apps>` for\nmore details.  Source code for the word count example is located under\n``examples/pydoop_submit/mr`` in the Pydoop distribution.\n\n\nCounters and Status Updates\n---------------------------\n\nHadoop features application-wide counters that can be set and\nincremented by developers.  Status updates are arbitrary text messages\nsent to the framework: these are especially useful in cases where the\ncomputation associated with a single input record can take a\nconsiderable amount of time, since Hadoop kills tasks that read no\ninput, write no output and do not update the status within a\nconfigurable amount of time (ten minutes by default).\n\nThe following snippet shows how to modify the above example to use\ncounters and status updates:\n\n.. literalinclude:: ../../examples/pydoop_submit/mr/wordcount_full.py\n   :language: python\n   :pyobject: Mapper\n\n.. literalinclude:: ../../examples/pydoop_submit/mr/wordcount_full.py\n   :language: python\n   :pyobject: Reducer\n\nCounter values and status updates show up in Hadoop's web interface.\nIn addition, the final values of all counters are listed in the\ncommand line output of the job (note that the list also includes Hadoop's\ndefault counters).\n\n\nRecord Readers and Writers\n--------------------------\n\nBy default, Hadoop assumes you want to process plain text and splits\ninput data into text lines.  If you need to process binary data, or\nyour text data is structured into records that span multiple lines,\nyou need to write your own :class:`~pydoop.mapreduce.api.RecordReader`.\nThe **record reader** operates at the HDFS file level: its job is to read\ndata from the file and feed it as a stream of key-value pairs\n(records) to the mapper. To interact with HDFS files, we need to import the\n``hdfs`` submodule:\n\n.. code-block:: python\n\n  import pydoop.hdfs as hdfs\n\nThe following example shows how to write a record reader that mimics\nHadoop's default ``LineRecordReader``, where keys are byte offsets\nwith respect to the whole file and values are text lines:\n\n.. literalinclude:: ../../examples/pydoop_submit/mr/wordcount_full.py\n   :language: python\n   :pyobject: Reader\n\nFrom the context, the record reader gets the following information on\nthe byte chunk assigned to the current task, or **input split**:\n\n* the name of the file it belongs to;\n* its offset with respect to the beginning of the file;\n* its length.\n\nThis allows to open the file, seek to the correct offset and read\nuntil the end of the split is reached.  The framework gets the record\nstream by means of repeated calls to the\n:meth:`~pydoop.mapreduce.api.RecordReader.next` method.  The\n:meth:`~pydoop.mapreduce.api.RecordReader.get_progress` method is\ncalled by the framework to get the fraction of the input split that's\nalready been processed.  The ``close`` method (present in all\ncomponents except for the partitioner) is called by the framework once\nit has finished retrieving the records: this is the right place to\nperform cleanup tasks such as closing open handles.\n\nTo use the reader, pass the class object to the factory with\n``record_reader_class=Reader`` and, when running the program with\n``pydoop submit``, set the ``--do-not-use-java-record-reader`` flag.\n\nThe **record writer** writes key/value pairs to output files. The default\nbehavior is to write one tab-separated key/value pair per line; if you\nwant to do something different, you have to write a custom\n:class:`~pydoop.mapreduce.api.RecordWriter`:\n\n.. literalinclude:: ../../examples/pydoop_submit/mr/wordcount_full.py\n   :language: python\n   :pyobject: Writer\n\nThe above example, which simply reproduces the default behavior, also\nshows how to get job configuration parameters: the one starting with\n``mapreduce`` is a standard Hadoop parameter, while ``pydoop.hdfs.user``\nis a custom parameter defined by the application developer.\nConfiguration properties are passed as ``-D <key>=<value>`` (e.g.,\n``-D mapreduce.output.textoutputformat.separator='|'``) to the submitter.\n\nTo use the writer, pass the class object to the factory with\n``record_writer_class=Writer`` and, when running the program with\n``pydoop submit``, set the ``--do-not-use-java-record-writer`` flag.\n\n\nPartitioners and Combiners\n--------------------------\n\nThe :class:`~pydoop.mapreduce.api.Partitioner` assigns intermediate keys to\nreducers. If you do *not* explicitly set a partitioner via the factory,\npartitioning will be done on the Java side. By default, Hadoop uses\n`HashPartitioner\n<https://hadoop.apache.org/docs/r3.0.0/api/org/apache/hadoop/mapreduce/lib/partition/HashPartitioner.html>`_,\nwhich selects the reducer on the basis of a hash function of the key.\n\nTo write a custom partitioner in Python, subclass\n:class:`~pydoop.mapreduce.api.Partitioner`, overriding the\n:meth:`~pydoop.mapreduce.api.Partitioner.partition` method. The framework will\ncall this method with the current key and the total number of reducers ``N``\nas the arguments, and expect the chosen reducer ID --- in the ``[0, ...,\nN-1]`` range --- as the return value.\n\nThe following examples shows how to write a partitioner that simply mimics the\ndefault ``HashPartitioner`` behavior:\n\n.. literalinclude:: ../../examples/pydoop_submit/mr/wordcount_full.py\n   :language: python\n   :pyobject: Partitioner\n   :prepend: from hashlib import md5\n\nThe combiner is functionally identical to a reducer, but it is run\nlocally, on the key-value stream output by a single mapper.  Although\nnothing prevents the combiner from processing values differently from\nthe reducer, the former, provided that the reduce function is\nassociative and idempotent, is typically configured to be the same as\nthe latter, in order to perform local aggregation and thus help cut\ndown network traffic.\n\nLocal aggregation is implemented by caching intermediate key/value pairs in a\ndictionary. Like in standard Java Hadoop, cache size is controlled by\n``mapreduce.task.io.sort.mb`` and defaults to 100 MB. Pydoop uses\n:func:`sys.getsizeof` to determine key/value size, which takes into account\nPython object overhead. This can be quite substantial (e.g.,\n``sys.getsizeof(b\"foo\") == 36``) and must be taken into account if fine tuning\nis desired.\n\n.. important:: Due to the caching, when using a combiner there are\n  limitations on the types that can be used for intermediate keys and\n  values. First of all, keys must be `hashable\n  <https://docs.python.org/3/glossary.html>`_. In addition, values\n  belonging to a mutable type should not change after having been\n  emitted by the mapper. For instance, the following (however contrived)\n  example would not work as expected:\n\n  .. code-block:: python\n\n    intermediate_value = {}\n\n    class Mapper(api.Mapper):\n      def map(self, ctx):\n         intermediate_value.clear()\n         intermediate_value[ctx.key] = ctx.value\n         ctx.emit(\"foo\", intermediate_value)\n\n  For these reasons, it is recommended to use immutable types for both keys\n  and values when the job includes a combiner.\n\nCustom partitioner and combiner classes must be declared to the factory as\ndone above for record readers and writers. To recap, if we need to use all of\nthe above components, we need to instantiate the factory as:\n\n.. literalinclude:: ../../examples/pydoop_submit/mr/wordcount_full.py\n   :language: python\n   :start-after: DOCS_INCLUDE_START\n   :end-before: DOCS_INCLUDE_END\n\n\nProfiling Your Application\n--------------------------\n\nPython has built-in support for application `profiling\n<https://docs.python.org/3/library/profile.html>`_. Profiling a standalone\nprogram is relatively straightforward: run it through ``cProfile``, store\nstats in a file and use ``pstats`` to read and interpret them. A MapReduce\njob, however, spawns multiple map and reduce tasks, so we need a way to\ncollect all stats. Pydoop supports this via a ``pstats_dir`` argument to\n``run_task``:\n\n.. code-block:: python\n\n  pipes.run_task(factory, pstats_dir=\"pstats\")\n\nWith the above call, Pydoop will run each MapReduce task with ``cProfile``,\nand store resulting pstats files in the ``\"pstats\"`` directory on HDFS.\nYou can also enable profiling in the ``pydoop submit`` command line:\n\n.. code-block:: bash\n\n  pydoop submit --pstats-dir HDFS_DIR [...]\n\nIf the pstats directory is specified both ways, the one from ``run_task``\ntakes precedence.\n\nAnother way to do time measurements is via counters. The ``utils.misc`` module\nprovides a ``Timer`` object for this purpose:\n\n.. code-block:: python\n\n  from pydoop.utils.misc import Timer\n\n  class Mapper(api.Mapper):\n\n      def __init__(self, context):\n          super(Mapper, self).__init__(context)\n          self.timer = Timer(context)\n\n      def map(self, context):\n          with self.timer.time_block(\"tokenize\"):\n              words = context.value.split()\n          for w in words:\n              context.emit(w, 1)\n\nWith the above coding, the total time spent to execute\n``context.value.split()`` (in ms) will be automatically accumulated in\na ``TIME_TOKENIZE`` counter under the ``Timer`` counter group.\n\nSince profiling and timers can substantially slow down the Hadoop job, they\nshould only be used for performance debugging.\n"
  },
  {
    "path": "docs/tutorial/pydoop_script.rst",
    "content": ".. _pydoop_script_tutorial:\n\nEasy Hadoop Scripting with Pydoop Script\n========================================\n\nPydoop Script is the easiest way to write simple MapReduce programs\nfor Hadoop.  With Pydoop Script, your code focuses on the core of the\nMapReduce model: the mapper and reducer functions.\n\n\nWriting and Running Scripts\n---------------------------\n\nWrite a ``script.py`` Python module that contains the mapper and\nreducer functions:\n\n.. code-block:: python\n\n  def mapper(input_key, input_value, writer):\n      # your computation here\n      writer.emit(intermediate_key, intermediate_value)\n\n  def reducer(intermediate_key, value_iterator, writer):\n      # your computation here\n      writer.emit(output_key, output_value)\n\nThe program can be run as follows::\n\n  pydoop script script.py hdfs_input hdfs_output\n\n\nExamples\n--------\n\nThe following examples show how to use Pydoop Script for common\nproblems.  More examples can be found in the\n``examples/pydoop_script`` subdirectory of Pydoop's source\ndistribution root.  The :ref:`Pydoop Script Guide\n<pydoop_script_guide>` contains more detailed information on writing\nand running programs.\n\n\n.. _word_count:\n\nWord Count\n++++++++++\n\nCount the occurrence of each word in a set of text files.\n\n.. literalinclude:: ../../examples/pydoop_script/scripts/wordcount.py\n   :language: python\n   :start-after: DOCS_INCLUDE_START\n\nA few more lines allow to set a combiner for local aggregation:\n\n.. literalinclude:: ../../examples/pydoop_script/scripts/wc_combiner.py\n   :language: python\n   :start-after: DOCS_INCLUDE_START\n\nRun the example with::\n\n  pydoop script -c combiner wordcount.py hdfs_input hdfs_output\n\nNote that we need to explicitly set the ``-c`` flag to activate the\ncombiner.  By default, no combiner is called.\n\nOne thing to remember is that the current Hadoop Pipes architecture\nruns the combiner under the hood of the executable run by ``pipes``,\nso it does not update the combiner counters of the general Hadoop\nframework.  Thus, if you run the above script, you'll get a value of 0\nfor \"Combine input/output records\" in the \"Map-Reduce Framework\"\ngroup, but the \"combiner calls\" counter should be updated correctly.\n\n\nMap-only Jobs and Output Separators\n+++++++++++++++++++++++++++++++++++\n\nSuppose we want to convert all input text to lower case. All we need to do is read each input line, convert it to lower case and emit it (for instance, as the output value). Since there is no aggregation involved, we don't need a reducer:\n\n.. literalinclude:: ../../examples/pydoop_script/scripts/lowercase.py\n   :language: python\n   :start-after: DOCS_INCLUDE_START\n\nThe only problem with the above code is that, by default, each output key-value pair is written as tab-separated, which would lead to each output line having a leading tab character that's not found in the original input (note that we'd get a *trailing* tab if we emitted each record as the output key instead). We can turn off the reduce phase and get an empty separator for output key-value pairs by submitting the job with the following options::\n\n  pydoop script --num-reducers 0 -t '' lowercase.py hdfs_input hdfs_output\n\n\nCustom Parameters\n+++++++++++++++++\n\nSuppose we want to select all lines containing a substring to be given at run time (distributed grep). As in the previous example, we can do this with a map-only job (read each input line and emit it if it contains the substring), but we need a way for the user of our application to specify the substring to be matched. This can be done by adding a fourth argument to the mapper function:\n\n.. literalinclude:: ../../examples/pydoop_script/scripts/grep.py\n   :language: python\n   :start-after: DOCS_INCLUDE_START\n\nIn this case, Pydoop Script passes the Hadoop job configuration to the ``mapper`` function as a dictionary via the fourth argument. Moreover, just like Hadoop tools (e.g., ``hadoop pipes``), Pydoop Script allows to set additional configuration parameters via ``-D key=value``. To search for \"hello\", for instance, we can run the application as::\n\n  pydoop script --num-reducers 0 -t '' -D grep-expression=hello \\\n    grep.py hdfs_input hdfs_output\n\n\nApplicability\n-------------\n\nPydoop Script makes it easy to solve simple problems.  It makes it\nfeasible to write simple (even throw-away) scripts to perform simple\nmanipulations or analyses on your data, especially if it's text-based.\nIf you can specify your algorithm in two simple functions that have no\nstate or have a simple state that can be stored in module variables,\nthen you can consider using Pydoop Script.\nIf, on the other hand, you need more sophisticated processing, consider\nusing the :ref:`full Pydoop API <api_tutorial>`.\n"
  },
  {
    "path": "examples/README",
    "content": "This directory contains several Pydoop usage examples. Documentation\nis in the \"examples\" subsection of the Pydoop html docs (look for the\n\"docs\" subdirectory in the distribution root).\n"
  },
  {
    "path": "examples/avro/build.sh",
    "content": "#!/usr/bin/env bash\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n. \"${this_dir}/config.sh\"\n\npushd \"${this_dir}\"\ngen_classpath\ncp=\"$(<\"${CP_PATH}\"):$(${HADOOP} classpath)\"\nmkdir -p \"${CLASS_DIR}\"\njavac -cp \"${cp}\" -d \"${CLASS_DIR}\" src/main/java/it/crs4/pydoop/*\njar -cf \"${JAR_PATH}\" -C \"${CLASS_DIR}\" ./it\npopd\n"
  },
  {
    "path": "examples/avro/config.sh",
    "content": "[ -n \"${PYDOOP_AVRO_EXAMPLES:-}\" ] && return || readonly PYDOOP_AVRO_EXAMPLES=1\n\nTARGET=\"target\"\nexport CLASS_DIR=\"${TARGET}/classes\"\nexport CP_PATH=\"${TARGET}/cp.txt\"\nexport JAR_PATH=\"${TARGET}/pydoop-avro-examples.jar\"\n\ngen_classpath() {\n    [ -f \"${CP_PATH}\" ] && return 0\n    mkdir -p \"${TARGET}\"\n    mvn dependency:resolve\n    mvn dependency:build-classpath -D mdep.outputFile=\"${CP_PATH}\"\n    echo -n ':'$(readlink -e ../../lib)/'*' >> \"${CP_PATH}\"\n}\n\nexport -f gen_classpath\n"
  },
  {
    "path": "examples/avro/pom.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n<!--\n  BEGIN_COPYRIGHT\n\n  Copyright 2009-2026 CRS4.\n\n  Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n  use this file except in compliance with the License. You may obtain a copy\n  of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\n  Unless required by applicable law or agreed to in writing, software\n  distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n  License for the specific language governing permissions and limitations\n  under the License.\n\n  END_COPYRIGHT\n-->\n\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\"\n\t xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n\t xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd\">\n\n  <modelVersion>4.0.0</modelVersion>\n  <groupId>it.crs4.pydoop</groupId>\n  <artifactId>pydoop-avro-examples</artifactId>\n  <packaging>jar</packaging>\n  <version>2.0a2</version>\n  <name>Pydoop Avro Examples</name>\n  <url>https://crs4.github.io/pydoop/</url>\n\n  <properties>\n    <parquet.version>1.7.0</parquet.version>\n  </properties>\n\n  <dependencies>\n    <dependency>\n      <groupId>org.apache.parquet</groupId>\n      <artifactId>parquet-common</artifactId>\n      <version>${parquet.version}</version>\n    </dependency>\n    <dependency>\n      <groupId>org.apache.parquet</groupId>\n      <artifactId>parquet-column</artifactId>\n      <version>${parquet.version}</version>\n    </dependency>\n    <dependency>\n      <groupId>org.apache.parquet</groupId>\n      <artifactId>parquet-hadoop</artifactId>\n      <version>${parquet.version}</version>\n    </dependency>\n    <dependency>\n      <groupId>org.apache.parquet</groupId>\n      <artifactId>parquet-avro</artifactId>\n      <version>${parquet.version}</version>\n    </dependency>\n  </dependencies>\n\n</project>\n"
  },
  {
    "path": "examples/avro/py/avro_base.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport sys\nimport abc\nfrom collections import Counter\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pp\n\n\nclass ColorPickBase(api.Mapper):\n\n    @abc.abstractmethod\n    def get_user(self, ctx):\n        \"\"\"\n        Get the user record.  This is just to avoid writing near identical\n        examples for the various key/value cases.  In a real application,\n        carrying records over keys or values would be a design decision,\n        so you would simply do, e.g., ``user = self.value``.\n        \"\"\"\n\n    def map(self, ctx):\n        user = self.get_user(ctx)\n        color = user['favorite_color']\n        if color is not None:\n            ctx.emit(user['office'], Counter({color: 1}))\n\n\nclass AvroKeyColorPick(ColorPickBase):\n\n    def get_user(self, ctx):\n        return ctx.key\n\n\nclass AvroValueColorPick(ColorPickBase):\n\n    def get_user(self, ctx):\n        return ctx.value\n\n\nclass AvroKeyValueColorPick(ColorPickBase):\n\n    def get_user(self, ctx):\n        return ctx.key\n\n    def map(self, ctx):\n        sys.stdout.write(\"value (unused): %r\\n\" % (ctx.value,))\n        super(AvroKeyValueColorPick, self).map(ctx)\n\n\nclass ColorCountBase(api.Reducer):\n\n    def reduce(self, ctx):\n        s = sum(ctx.values, Counter())\n        self.emit(s, ctx)\n\n    @abc.abstractmethod\n    def emit(self, s, ctx):\n        \"\"\"\n        Emit the sum to the ctx.  As in the base mapper, this is just to\n        avoid writing near identical examples.\n        \"\"\"\n\n\nclass NoAvroColorCount(ColorCountBase):\n\n    def emit(self, s, ctx):\n        ctx.emit(ctx.key, \"%r\" % s)\n\n\nclass AvroKeyColorCount(ColorCountBase):\n\n    def emit(self, s, ctx):\n        ctx.emit({'office': ctx.key, 'counts': s}, ctx.key)\n\n\nclass AvroValueColorCount(ColorCountBase):\n\n    def emit(self, s, ctx):\n        ctx.emit(ctx.key, {'office': ctx.key, 'counts': s})\n\n\nclass AvroKeyValueColorCount(ColorCountBase):\n\n    def emit(self, s, ctx):\n        record = {'office': ctx.key, 'counts': s}\n        ctx.emit(record, record)  # FIXME: do something fancier\n\n\ndef run_task(mapper_class, reducer_class=NoAvroColorCount):\n    pp.run_task(pp.Factory(mapper_class, reducer_class=reducer_class))\n"
  },
  {
    "path": "examples/avro/py/avro_container_dump_results.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport sys\n\nfrom avro.io import DatumReader\nfrom avro.datafile import DataFileReader\n\n\ndef main(fn, out_fn, avro_mode=''):\n    with open(out_fn, 'w') as fo:\n        with open(fn, 'rb') as f:\n            reader = DataFileReader(f, DatumReader())\n            for r in reader:\n                if avro_mode.upper() == 'KV':\n                    r = r['key']\n\n                fo.write('%s\\t%r\\n' % (r['office'], r['counts']))\n    print('wrote', out_fn)\n\n\nif __name__ == '__main__':\n    main(*sys.argv[1:])\n"
  },
  {
    "path": "examples/avro/py/avro_key_in.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nfrom avro_base import AvroKeyColorPick, run_task\n\n\ndef __main__():\n    run_task(AvroKeyColorPick)\n\n\nif __name__ == '__main__':\n    __main__()\n"
  },
  {
    "path": "examples/avro/py/avro_key_in_out.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nfrom avro_base import AvroKeyColorPick, AvroKeyColorCount, run_task\n\n\ndef __main__():\n    run_task(AvroKeyColorPick, AvroKeyColorCount)\n\n\nif __name__ == '__main__':\n    __main__()\n"
  },
  {
    "path": "examples/avro/py/avro_key_value_in.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nfrom avro_base import AvroKeyValueColorPick, run_task\n\n\ndef __main__():\n    run_task(AvroKeyValueColorPick)\n\n\nif __name__ == '__main__':\n    __main__()\n"
  },
  {
    "path": "examples/avro/py/avro_key_value_in_out.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nfrom avro_base import AvroKeyValueColorPick, AvroKeyValueColorCount, run_task\n\n\ndef __main__():\n    run_task(AvroKeyValueColorPick, AvroKeyValueColorCount)\n\n\nif __name__ == '__main__':\n    __main__()\n"
  },
  {
    "path": "examples/avro/py/avro_parquet_dump_results.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pp\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, ctx):\n        cc_stat = ctx.value\n        ctx.emit(cc_stat['office'], repr(cc_stat['counts']))\n\n\ndef __main__():\n    pp.run_task(pp.Factory(Mapper))\n"
  },
  {
    "path": "examples/avro/py/avro_pyrw.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nAvro color count with Python record reader/writer.\n\"\"\"\n\nfrom collections import Counter\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pp\nfrom pydoop.avrolib import AvroReader, AvroWriter, parse\n\n\nclass UserReader(AvroReader):\n    pass\n\n\nclass ColorWriter(AvroWriter):\n\n    schema = parse(open(\"stats.avsc\").read())\n\n    def emit(self, key, value):\n        self.writer.append({'office': key, 'counts': value})\n\n\nclass ColorPick(api.Mapper):\n\n    def map(self, ctx):\n        user = ctx.value\n        color = user['favorite_color']\n        if color is not None:\n            ctx.emit(user['office'], Counter({color: 1}))\n\n\nclass ColorCount(api.Reducer):\n\n    def reduce(self, ctx):\n        s = sum(ctx.values, Counter())\n        ctx.emit(ctx.key, s)\n\n\npp.run_task(pp.Factory(\n    mapper_class=ColorPick,\n    reducer_class=ColorCount,\n    record_reader_class=UserReader,\n    record_writer_class=ColorWriter\n), private_encoding=True)\n"
  },
  {
    "path": "examples/avro/py/avro_value_in.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nfrom avro_base import AvroValueColorPick, run_task\n\n\ndef __main__():\n    run_task(AvroValueColorPick)\n\n\nif __name__ == '__main__':\n    __main__()\n"
  },
  {
    "path": "examples/avro/py/avro_value_in_out.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nfrom avro_base import AvroValueColorPick, AvroValueColorCount, run_task\n\n\ndef __main__():\n    run_task(AvroValueColorPick, AvroValueColorCount)\n\n\nif __name__ == '__main__':\n    __main__()\n"
  },
  {
    "path": "examples/avro/py/check_cc.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport sys\nimport os\nimport errno\nfrom collections import Counter\n\nfrom avro.io import DatumReader\nfrom avro.datafile import DataFileReader\nfrom pydoop.utils.py3compat import iteritems\n\n\ndef iter_fnames(path):\n    try:\n        contents = os.listdir(path)\n    except OSError as e:\n        if e.errno == errno.ENOTDIR:\n            yield path\n    else:\n        for name in contents:\n            yield os.path.join(path, name)\n\n\ndef main(in_, out_):\n\n    expected = {}\n    for in_fn in iter_fnames(in_):\n        with open(in_fn, 'rb') as f:\n            reader = DataFileReader(f, DatumReader())\n            for r in reader:\n                expected.setdefault(\n                    r[\"office\"], Counter()\n                )[r[\"favorite_color\"]] += 1\n\n    computed = {}\n    for out_fn in iter_fnames(out_):\n        with open(out_fn) as f:\n            for l in f:\n                p = l.strip().split('\\t')\n                computed[p[0]] = eval(p[1])\n\n    if set(computed) != set(expected):\n        sys.exit(\"ERROR: computed keys != expected keys: %r != %r\" % (\n            sorted(computed), sorted(expected)))\n    for k, v in iteritems(expected):\n        if computed[k] != v:\n            sys.exit(\"ERROR: %r: %r != %r\" % (k, computed[k], dict(v)))\n    print('All is ok!')\n\n\nif __name__ == '__main__':\n    main(sys.argv[1], sys.argv[2])\n"
  },
  {
    "path": "examples/avro/py/check_results.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport sys\nimport os\nimport errno\nfrom collections import Counter\n\nfrom pydoop.utils.py3compat import iteritems\n\n\ndef iter_lines(path):\n    try:\n        contents = os.listdir(path)\n    except OSError as e:\n        if e.errno == errno.ENOTDIR:\n            contents = [path]\n    for name in contents:\n        with open(os.path.join(path, name)) as f:\n            for line in f:\n                yield line\n\n\ndef main(exp, res):\n\n    expected = {}\n    for l in iter_lines(exp):\n        p = l.strip().split(';')\n        expected.setdefault(p[1], Counter())[p[2]] += 1\n\n    computed = {}\n    for l in iter_lines(res):\n        p = l.strip().split('\\t')\n        computed[p[0]] = eval(p[1])\n\n    if set(computed) != set(expected):\n        sys.exit(\"ERROR: computed keys != expected keys: %r != %r\" % (\n            sorted(computed), sorted(expected)))\n    for k, v in iteritems(expected):\n        if computed[k] != v:\n            sys.exit(\"ERROR: %r: %r != %r\" % (k, computed[k], dict(v)))\n    print('All is ok!')\n\n\nif __name__ == '__main__':\n    main(sys.argv[1], sys.argv[2])\n"
  },
  {
    "path": "examples/avro/py/color_count.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n# DOCS_INCLUDE_START\nfrom collections import Counter\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pp\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, ctx):\n        user = ctx.value\n        color = user['favorite_color']\n        if color is not None:\n            ctx.emit(user['office'], Counter({color: 1}))\n\n\nclass Reducer(api.Reducer):\n\n    def reduce(self, ctx):\n        s = sum(ctx.values, Counter())\n        ctx.emit('', {'office': ctx.key, 'counts': s})\n\n\ndef __main__():\n    pp.run_task(pp.Factory(Mapper, reducer_class=Reducer))\n"
  },
  {
    "path": "examples/avro/py/create_input.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport random\nimport sys\n\noffices = ['office-%s' % i for i in range(3)]\ncolors = ['red', 'blue', 'yellow', 'orange', 'maroon', 'green']\nnames = ['Alyssa', 'John', 'Kathy', 'Ben', 'Karla', 'Ross', 'Violetta']\n\n\ndef create_input(n, stream):\n    for i in range(n):\n        stream.write(';'.join([\n            random.choice(names),\n            random.choice(offices),\n            random.choice(colors),\n        ]) + '\\n')\n\n\ndef main(n, filename):\n    with open(filename, 'w') as f:\n        create_input(n, f)\n\n\nif __name__ == '__main__':\n    main(int(sys.argv[1]), sys.argv[2])\n"
  },
  {
    "path": "examples/avro/py/gen_data.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport os\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pp\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, ctx):\n        name, length = ctx.value.split(None, 1)\n        length = int(length)\n        ctx.emit('', {'name': name, 'data': os.urandom(length)})\n\n\ndef __main__():\n    pp.run_task(pp.Factory(Mapper))\n"
  },
  {
    "path": "examples/avro/py/generate_avro_users.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport sys\nimport random\n\nimport avro.schema\nfrom avro.datafile import DataFileWriter\nfrom avro.io import DatumWriter\n\nif sys.version_info[0] == 3:\n    xrange = range\n    parse = avro.schema.Parse\nelse:\n    parse = avro.schema.parse\n\nNAME_POOL = ['george', 'john', 'paul', 'ringo']\nOFFICE_POOL = ['office-%d' % _ for _ in xrange(4)]\nCOLOR_POOL = ['black', 'cyan', 'magenta', 'yellow']\n\n\ndef main(argv):\n    try:\n        schema_fn = argv[1]\n        n_users = int(argv[2])\n        avro_fn = argv[3]\n    except IndexError:\n        sys.exit('Usage: %s SCHEMA_FILE N_USERS AVRO_FILE' % argv[0])\n    with open(schema_fn) as f_in:\n        schema = parse(f_in.read())\n    with open(avro_fn, 'wb') as f_out:\n        writer = DataFileWriter(f_out, DatumWriter(), schema)\n        for i in xrange(n_users):\n            writer.append({\n                'name': random.choice(NAME_POOL),\n                'office': random.choice(OFFICE_POOL),\n                'favorite_color': random.choice(COLOR_POOL),\n                'favorite_number': i,\n            })\n        writer.close()\n\n\nif __name__ == '__main__':\n    main(sys.argv)\n"
  },
  {
    "path": "examples/avro/py/kmer_count.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n# DOCS_INCLUDE_START\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pp\n\nWIDTH = 5\n\n\ndef window(s, width):\n    for i in range(len(s) - width + 1):\n        yield s[i: i + width]\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, ctx):\n        seq = ctx.value['sequence']\n        for kmer in window(seq, WIDTH):\n            ctx.emit(kmer, 1)\n\n\nclass Reducer(api.Reducer):\n\n    def reduce(self, ctx):\n        ctx.emit(ctx.key, sum(ctx.values))\n\n\ndef __main__():\n    pp.run_task(pp.Factory(Mapper, reducer_class=Reducer))\n"
  },
  {
    "path": "examples/avro/py/show_kmer_count.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport sys\nimport csv\nfrom operator import itemgetter\n\nLIMIT = 10\n\n\ndef main(argv):\n    with open(argv[1]) as f:\n        reader = csv.reader(f, delimiter='\\t')\n        data = [(k, int(v)) for (k, v) in reader]\n        data.sort(key=itemgetter(1), reverse=True)\n        for i, t in enumerate(data):\n            sys.stdout.write('%s\\t%d\\n' % t)\n            if i + 1 >= LIMIT:\n                break\n\n\nif __name__ == '__main__':\n    main(sys.argv)\n"
  },
  {
    "path": "examples/avro/py/write_avro.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport sys\nimport csv\n\nimport avro.schema\nfrom avro.datafile import DataFileWriter\nfrom avro.io import DatumWriter\n\nparse = avro.schema.Parse if sys.version_info[0] == 3 else avro.schema.parse\nFIELDS = ['name', 'office', 'favorite_color']\n\n\ndef main(schema_fn, csv_fn, avro_fn):\n\n    with open(schema_fn) as f_in:\n        schema = parse(f_in.read())\n\n    with open(csv_fn) as f_in:\n        reader = csv.reader(f_in, delimiter=';')\n        with open(avro_fn, 'wb') as f_out:\n            writer = DataFileWriter(f_out, DatumWriter(), schema)\n            for row in reader:\n                writer.append(dict(zip(FIELDS, row)))\n            writer.close()\n\n\nif __name__ == '__main__':\n    try:\n        schema_fn = sys.argv[1]\n        csv_fn = sys.argv[2]\n        avro_fn = sys.argv[3]\n    except IndexError:\n        sys.exit('Usage: %s SCHEMA_FILE CSV_FILE AVRO_FILE' % sys.argv[0])\n    main(schema_fn, csv_fn, avro_fn)\n"
  },
  {
    "path": "examples/avro/run",
    "content": "#!/usr/bin/env bash\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\n# These examples could be adapted to also run on the local fs, but we have\n# enough coverage from the other ones.\nif [ \"$(hadoop_fs)\" = \"file\" ]; then\n    echo \"default file system is local, skipping all examples\"\n    exit 0\nfi\n\nfor io in \"in\" \"in_out\"; do\n    for mode in \"k\" \"v\" \"kv\"; do\n\t\"${this_dir}\"/run_avro_container_${io} ${mode}\n    done\n    \"${this_dir}\"/run_avro_parquet_${io}\ndone\n\"${this_dir}\"/run_avro_pyrw\n\"${this_dir}\"/run_color_count\n\"${this_dir}\"/run_kmer_count\n"
  },
  {
    "path": "examples/avro/run_avro_container_in",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n. \"${this_dir}/config.sh\"\n\nnargs=1\nif [ $# -ne ${nargs} ]; then\n    die \"Usage: $0 k|v|kv\"\nfi\nmode=$1\nif [ \"${mode}\" == \"k\" ]; then\n    MODULE=avro_key_in\nelif [ \"${mode}\" == \"v\" ]; then\n    MODULE=avro_value_in\nelif [ \"${mode}\" == \"kv\" ]; then\n    MODULE=avro_key_value_in\nelse\n    die \"invalid mode: ${mode}\"\nfi\n\npushd \"${this_dir}\"\nUSER_SCHEMA_FILE=schemas/user.avsc\nPET_SCHEMA_FILE=schemas/pet.avsc\nCSV_INPUT=$(mktemp -d)\nLOCAL_INPUT=$(mktemp -d)\nINPUT=$(basename ${LOCAL_INPUT})\nOUTPUT=results\n\n# --- generate avro input ---\nN=20\nfor i in 1 2; do\n    ${PYTHON} py/create_input.py ${N} \"${CSV_INPUT}/users_${i}.csv\"\ndone\nif [ \"${mode}\" == \"kv\" ]; then\n    for i in 1 2; do\n\t./write_avro_kv \"${USER_SCHEMA_FILE}\" \"${PET_SCHEMA_FILE}\" \\\n          \"${CSV_INPUT}/users_${i}.csv\" \"${LOCAL_INPUT}/users_${i}.avro\"\n    done\nelse\n    for i in 1 2; do\n\t${PYTHON} py/write_avro.py \"${USER_SCHEMA_FILE}\" \\\n          \"${CSV_INPUT}/users_${i}.csv\" \"${LOCAL_INPUT}/users_${i}.avro\"\n    done\nfi\n${HADOOP} fs -mkdir -p /user/\"${USER}\"\n${HADOOP} fs -rm \"${INPUT}\" || :\n${HADOOP} fs -put \"${LOCAL_INPUT}\" \"${INPUT}\"\n\n# --- run cc ---\nMPY=py/\"${MODULE}\".py\nJOBNAME=\"${MODULE}\"-job\nLOGLEVEL=\"DEBUG\"\n\n${HADOOP} fs -rm -r \"/user/${USER}/${OUTPUT}\" || :\n${PYDOOP} submit \\\n    --upload-file-to-cache py/avro_base.py \\\n    --upload-file-to-cache \"${MPY}\" \\\n    --num-reducers 1 \\\n    --avro-input \"${mode}\" \\\n    --log-level \"${LOGLEVEL}\" \\\n    --job-name \"${JOBNAME}\" \\\n    \"${MODULE}\" \"${INPUT}\" \"${OUTPUT}\"\n\n# --- check results ---\nrm -rf \"${OUTPUT}\"\n${HADOOP} fs -get \"${OUTPUT}\"\n${PYTHON} py/check_results.py \"${CSV_INPUT}\" \"${OUTPUT}\"\n\nrm -rf \"${CSV_INPUT}\" \"${LOCAL_INPUT}\" \"${OUTPUT}\"\npopd\n"
  },
  {
    "path": "examples/avro/run_avro_container_in_out",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n. \"${this_dir}/config.sh\"\n\nnargs=1\nif [ $# -ne ${nargs} ]; then\n    die \"Usage: $0 k|v|kv\"\nfi\nmode=$1\nif [ \"${mode}\" == \"k\" ]; then\n    MODULE=avro_key_in_out\nelif [ \"${mode}\" == \"v\" ]; then\n    MODULE=avro_value_in_out\nelif [ \"${mode}\" == \"kv\" ]; then\n    MODULE=avro_key_value_in_out\nelse\n    die \"invalid mode: ${mode}\"\nfi\n\npushd \"${this_dir}\"\nUSER_SCHEMA_FILE=schemas/user.avsc\nPET_SCHEMA_FILE=schemas/pet.avsc\nSTATS_SCHEMA_FILE=schemas/stats.avsc\nSTATS_SCHEMA=$(cat \"${STATS_SCHEMA_FILE}\")\nCSV_INPUT=$(mktemp -d)\nLOCAL_INPUT=$(mktemp -d)\nINPUT=$(basename ${LOCAL_INPUT})\nOUTPUT=results\n\n# --- generate avro input ---\nN=20\nfor i in 1 2; do\n    ${PYTHON} py/create_input.py ${N} \"${CSV_INPUT}/users_${i}.csv\"\ndone\nif [ \"${mode}\" == \"kv\" ]; then\n    for i in 1 2; do\n\t./write_avro_kv \"${USER_SCHEMA_FILE}\" \"${PET_SCHEMA_FILE}\" \\\n          \"${CSV_INPUT}/users_${i}.csv\" \"${LOCAL_INPUT}/users_${i}.avro\"\n    done\nelse\n    for i in 1 2; do\n\t${PYTHON} py/write_avro.py \"${USER_SCHEMA_FILE}\" \\\n          \"${CSV_INPUT}/users_${i}.csv\" \"${LOCAL_INPUT}/users_${i}.avro\"\n    done\nfi\n${HADOOP} fs -mkdir -p /user/\"${USER}\"\n${HADOOP} fs -rm -r \"${INPUT}\" || :\n${HADOOP} fs -put \"${LOCAL_INPUT}\" \"${INPUT}\"\n\n# --- run cc ---\nMPY=py/\"${MODULE}\".py\nJOBNAME=\"${MODULE}\"-job\nLOGLEVEL=\"DEBUG\"\n\n# put the following opts at the end of the command line\n# or the empty string will be parsed as the module arg\nif [ \"${mode}\" == \"k\" ]; then\n    K_SCHEMA_OPT=\"-D pydoop.mapreduce.avro.key.output.schema=${STATS_SCHEMA}\"\n    V_SCHEMA_OPT=\"\"\nelif [ \"${mode}\" == \"v\" ]; then\n    K_SCHEMA_OPT=\"\"\n    V_SCHEMA_OPT=\"-D pydoop.mapreduce.avro.value.output.schema=${STATS_SCHEMA}\"\nelse\n    K_SCHEMA_OPT=\"-D pydoop.mapreduce.avro.key.output.schema=${STATS_SCHEMA}\"\n    V_SCHEMA_OPT=\"-D pydoop.mapreduce.avro.value.output.schema=${STATS_SCHEMA}\"\nfi\n\n${HADOOP} fs -rm -r \"/user/${USER}/${OUTPUT}\" || :\n\n${PYDOOP} submit \\\n    --upload-file-to-cache py/avro_base.py \\\n    --upload-file-to-cache \"${MPY}\" \\\n    --num-reducers 1 \\\n    --avro-input \"${mode}\" \\\n    --avro-output \"${mode}\" \\\n    --log-level \"${LOGLEVEL}\" \\\n    --job-name \"${JOBNAME}\" \\\n    \"${MODULE}\" \"${INPUT}\" \"${OUTPUT}\" \\\n    \"${K_SCHEMA_OPT}\" \"${V_SCHEMA_OPT}\"\n\n# --- dump & check results ---\nDUMP_DIR=$(mktemp -d)\nrm -rf \"${OUTPUT}\"\n${HADOOP} fs -get \"${OUTPUT}\"\nfor f in \"${OUTPUT}\"/part*; do\n    ${PYTHON} py/avro_container_dump_results.py \\\n      \"${f}\" \"${DUMP_DIR}\"/$(basename ${f}).tsv \"${mode}\"\ndone\n${PYTHON} py/check_results.py \"${CSV_INPUT}\" \"${DUMP_DIR}\"\n\nrm -rf \"${CSV_INPUT}\" \"${LOCAL_INPUT}\" \"${OUTPUT}\" \"${DUMP_DIR}\"\npopd\n"
  },
  {
    "path": "examples/avro/run_avro_parquet_in",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n. \"${this_dir}/config.sh\"\n\npushd \"${this_dir}\"\n[ -f \"${JAR_PATH}\" ] || ./build.sh\nSCHEMA_FILE_LOCAL=schemas/user.avsc\nSCHEMA_FILE_HDFS=user.avsc\n\n# --- create input ---\nCSV_INPUT=$(mktemp -d)\nINPUT=$(basename ${CSV_INPUT})\nPARQUETS_DIR=parquets\nN=20\nfor i in 1 2; do\n    ${PYTHON} py/create_input.py ${N} \"${CSV_INPUT}/users_${i}.csv\"\ndone\n\n# --- convert to avro-parquet ---\n${HADOOP} fs -mkdir -p /user/\"${USER}\"\n${HADOOP} fs -rm -r /user/\"${USER}\"/\"${PARQUETS_DIR}\" || :\n${HADOOP} fs -rm -r \"${INPUT}\" || :\n${HADOOP} fs -put \"${CSV_INPUT}\" \"${INPUT}\"\n\n${HADOOP} fs -put -f \"${SCHEMA_FILE_LOCAL}\" \"${SCHEMA_FILE_HDFS}\"\nexport HADOOP_CLASSPATH=$(<\"${CP_PATH}\")\n${HADOOP} jar \"${JAR_PATH}\" it.crs4.pydoop.WriteParquet \\\n    -libjars=\"${HADOOP_CLASSPATH//:/,}\" \\\n    \"${INPUT}\" \"${PARQUETS_DIR}\" \"${SCHEMA_FILE_HDFS}\"\n\n# --- run color count ---\nMODULE=avro_value_in\nMPY=py/\"${MODULE}\".py\nJOBNAME=\"${MODULE}\"-job\nLOGLEVEL=\"DEBUG\"\nUSER_SCHEMA=$(cat \"${SCHEMA_FILE_LOCAL}\")\nINPUT_FORMAT=org.apache.parquet.avro.AvroParquetInputFormat\n\nINPUT=\"${PARQUETS_DIR}\"\nOUTPUT=results\n\n${HADOOP} fs -rm -r /user/\"${USER}\"/\"${OUTPUT}\" || :\n\n${PYDOOP} submit --upload-file-to-cache \"${MPY}\" \\\n  --upload-file-to-cache py/avro_base.py \\\n  --num-reducers 1 \\\n  --input-format \"${INPUT_FORMAT}\" \\\n  --avro-input v \\\n  --libjars \"${JAR_PATH},${HADOOP_CLASSPATH//:/,}\" \\\n  --log-level \"${LOGLEVEL}\" \\\n  --job-name \"${JOBNAME}\" \\\n  \"${MODULE}\" \"${PARQUETS_DIR}\" \"${OUTPUT}\"\n\n# --- check results ---\nrm -rf \"${OUTPUT}\"\n${HADOOP} fs -get /user/\"${USER}\"/\"${OUTPUT}\"\n${PYTHON} py/check_results.py \"${CSV_INPUT}\" \"${OUTPUT}\"\n\nrm -rf \"${CSV_INPUT}\" \"${OUTPUT}\"\npopd\n"
  },
  {
    "path": "examples/avro/run_avro_parquet_in_out",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n. \"${this_dir}/config.sh\"\n\npushd \"${this_dir}\"\n[ -f \"${JAR_PATH}\" ] || ./build.sh\nIN_SCHEMA_FILE_LOCAL=schemas/user.avsc\nIN_SCHEMA_FILE_HDFS=user.avsc\nOUT_SCHEMA_FILE_LOCAL=schemas/stats.avsc\nOUT_SCHEMA_FILE_HDFS=stats.avsc\n\n# --- create input ---\nCSV_INPUT=$(mktemp -d)\nINPUT=$(basename ${CSV_INPUT})\nPARQUETS_DIR=parquets\nN=20\nfor i in 1 2; do\n    ${PYTHON} py/create_input.py ${N} \"${CSV_INPUT}/users_${i}.csv\"\ndone\n\n# --- convert to avro-parquet ---\n${HADOOP} fs -mkdir -p /user/\"${USER}\"\n${HADOOP} fs -rm -r /user/\"${USER}\"/\"${PARQUETS_DIR}\" || :\n${HADOOP} fs -rm -r \"${INPUT}\" || :\n${HADOOP} fs -put \"${CSV_INPUT}\" \"${INPUT}\"\n\n${HADOOP} fs -put -f \"${IN_SCHEMA_FILE_LOCAL}\" \"${IN_SCHEMA_FILE_HDFS}\"\nexport HADOOP_CLASSPATH=$(<\"${CP_PATH}\")\nhadoop jar \"${JAR_PATH}\" it.crs4.pydoop.WriteParquet \\\n    -libjars=\"${HADOOP_CLASSPATH//:/,}\" \\\n    \"${INPUT}\" \"${PARQUETS_DIR}\" \"${IN_SCHEMA_FILE_HDFS}\"\n\n# --- run color count ---\nMODULE=avro_value_in_out\nMPY=py/\"${MODULE}\".py\nJOBNAME=\"${MODULE}\"-job\nLOGLEVEL=\"DEBUG\"\nSTATS_SCHEMA=$(cat \"${OUT_SCHEMA_FILE_LOCAL}\")\nINPUT_FORMAT=org.apache.parquet.avro.AvroParquetInputFormat\nOUTPUT_FORMAT=org.apache.parquet.avro.AvroParquetOutputFormat\nCC_OUTPUT=cc_output\n\n${HADOOP} fs -rm -r /user/\"${USER}\"/\"${CC_OUTPUT}\" || :\n\n${PYDOOP} submit \\\n    -D pydoop.mapreduce.avro.value.output.schema=\"${STATS_SCHEMA}\" \\\n    -D parquet.avro.schema=\"${STATS_SCHEMA}\" \\\n    --upload-file-to-cache py/avro_base.py \\\n    --upload-file-to-cache \"${MPY}\" \\\n    --num-reducers 1 \\\n    --input-format \"${INPUT_FORMAT}\" \\\n    --output-format \"${OUTPUT_FORMAT}\" \\\n    --avro-input v \\\n    --avro-output v \\\n    --libjars \"${JAR_PATH},${HADOOP_CLASSPATH//:/,}\" \\\n    --log-level \"${LOGLEVEL}\" \\\n    --job-name \"${JOBNAME}\" \\\n    \"${MODULE}\" \"${PARQUETS_DIR}\" \"${CC_OUTPUT}\"\n\n# --- dump results ---\nMODULE=avro_parquet_dump_results\nMPY=py/\"${MODULE}\".py\nJOBNAME=\"${MODULE}\"-job\nLOGLEVEL=\"DEBUG\"\nSTATS_SCHEMA=$(cat \"${OUT_SCHEMA_FILE_LOCAL}\")\nINPUT_FORMAT=org.apache.parquet.avro.AvroParquetInputFormat\nOUTPUT=results\n\n${HADOOP} fs -rm -r /user/\"${USER}\"/\"${OUTPUT}\" || :\n${PYDOOP} submit \\\n    --upload-file-to-cache \"${MPY}\" \\\n    --num-reducers 0 \\\n    --input-format \"${INPUT_FORMAT}\" \\\n    --avro-input v \\\n    --libjars \"${JAR_PATH},${HADOOP_CLASSPATH//:/,}\" \\\n    --log-level \"${LOGLEVEL}\" \\\n    --job-name \"${JOBNAME}\" \\\n    \"${MODULE}\" \"${CC_OUTPUT}\" \"${OUTPUT}\"\n\n# --- check results ---\nrm -rf \"${OUTPUT}\"\n${HADOOP} fs -get /user/\"${USER}\"/\"${OUTPUT}\"\n${PYTHON} py/check_results.py \"${CSV_INPUT}\" \"${OUTPUT}\"\n\nrm -rf \"${CSV_INPUT}\" \"${OUTPUT}\"\npopd\n"
  },
  {
    "path": "examples/avro/run_avro_pyrw",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\npushd \"${this_dir}\"\nUSER_SCHEMA_FILE=schemas/user.avsc\nSTATS_SCHEMA_FILE=schemas/stats.avsc\nCSV_INPUT=$(mktemp -d)\nLOCAL_INPUT=$(mktemp -d)\nINPUT=$(basename ${LOCAL_INPUT})\nOUTPUT=results\n\n# --- generate avro input ---\nN=20\nfor i in 1 2; do\n    ${PYTHON} py/create_input.py ${N} \"${CSV_INPUT}/users_${i}.csv\"\n    ${PYTHON} py/write_avro.py \"${USER_SCHEMA_FILE}\" \\\n      \"${CSV_INPUT}/users_${i}.csv\" \"${LOCAL_INPUT}/users_${i}.avro\"\ndone\n\n# --- run cc ---\nMODULE=avro_pyrw\nMPY=py/\"${MODULE}\".py\nJOBNAME=\"${MODULE}\"-job\nLOGLEVEL=\"DEBUG\"\n\n${HADOOP} fs -mkdir -p /user/\"${USER}\"\n${HADOOP} fs -rm \"${INPUT}\" || :\n${HADOOP} fs -put \"${LOCAL_INPUT}\" \"${INPUT}\"\n${HADOOP} fs -rm -r \"/user/${USER}/${OUTPUT}\" || :\n${PYDOOP} submit \\\n    --upload-file-to-cache \"${MPY}\" \\\n    --upload-file-to-cache \"${USER_SCHEMA_FILE}\" \\\n    --upload-file-to-cache \"${STATS_SCHEMA_FILE}\" \\\n    --num-reducers 1 \\\n    --do-not-use-java-record-reader \\\n    --do-not-use-java-record-writer \\\n    --log-level \"${LOGLEVEL}\" \\\n    --job-name \"${JOBNAME}\" \\\n    \"${MODULE}\" \"${INPUT}\" \"${OUTPUT}\"\n\n# --- dump & check results ---\nDUMP_DIR=$(mktemp -d)\nrm -rf \"${OUTPUT}\"\n${HADOOP} fs -get \"${OUTPUT}\"\nfor f in \"${OUTPUT}\"/part*; do\n    ${PYTHON} py/avro_container_dump_results.py \\\n      \"${f}\" \"${DUMP_DIR}\"/$(basename ${f}).tsv\ndone\n${PYTHON} py/check_results.py \"${CSV_INPUT}\" \"${DUMP_DIR}\"\n\nrm -rf \"${CSV_INPUT}\" \"${LOCAL_INPUT}\" \"${OUTPUT}\" \"${DUMP_DIR}\"\npopd\n"
  },
  {
    "path": "examples/avro/run_color_count",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\npushd \"${this_dir}\"\nMODULE=\"color_count\"\nUSER_SCHEMA_FILE=schemas/user.avsc\nSTATS_SCHEMA_FILE=schemas/stats.avsc\nSTATS_SCHEMA=$(cat \"${STATS_SCHEMA_FILE}\")\nLOCAL_INPUT=$(mktemp -d)\nINPUT=$(basename ${LOCAL_INPUT})\nOUTPUT=results\n\n# --- generate avro input ---\nN=20\nfor i in 1 2; do\n    ${PYTHON} py/generate_avro_users.py \"${USER_SCHEMA_FILE}\" ${N} \\\n      \"${LOCAL_INPUT}/users_${i}.avro\"\ndone\n${HADOOP} fs -mkdir -p /user/\"${USER}\"\n${HADOOP} fs -rm -r \"${INPUT}\" || :\n${HADOOP} fs -put \"${LOCAL_INPUT}\" \"${INPUT}\"\n\n# --- run cc ---\nMPY=py/\"${MODULE}\".py\nJOBNAME=\"${MODULE}\"-job\nLOGLEVEL=\"DEBUG\"\n\n${HADOOP} fs -rm -r \"/user/${USER}/${OUTPUT}\" || :\n${PYDOOP} submit \\\n    -D pydoop.mapreduce.avro.value.output.schema=\"${STATS_SCHEMA}\" \\\n    --upload-file-to-cache \"${MPY}\" \\\n    --num-reducers 1 \\\n    --avro-input v \\\n    --avro-output v \\\n    --log-level \"${LOGLEVEL}\" \\\n    --job-name \"${JOBNAME}\" \\\n    \"${MODULE}\" \"${INPUT}\" \"${OUTPUT}\"\n\n# --- dump & check results ---\nDUMP_DIR=$(mktemp -d)\nrm -rf \"${OUTPUT}\"\n${HADOOP} fs -get \"${OUTPUT}\"\nfor f in \"${OUTPUT}\"/part*; do\n    ${PYTHON} py/avro_container_dump_results.py \\\n      \"${f}\" \"${DUMP_DIR}\"/$(basename ${f}).tsv\ndone\n${PYTHON} py/check_cc.py \"${LOCAL_INPUT}\" \"${DUMP_DIR}\"\n\nrm -rf \"${LOCAL_INPUT}\" \"${OUTPUT}\" \"${DUMP_DIR}\"\npopd\n"
  },
  {
    "path": "examples/avro/run_kmer_count",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n. \"${this_dir}/config.sh\"\n\npushd \"${this_dir}\"\n[ -f \"${JAR_PATH}\" ] || ./build.sh\nMODULE=kmer_count\nMPY=py/\"${MODULE}\".py\nJOBNAME=\"${MODULE}\"-job\nLOGLEVEL=\"DEBUG\"\nINPUT_FORMAT=org.apache.parquet.avro.AvroParquetInputFormat\nPROJECTION=$(cat schemas/alignment_record_proj.avsc)\nLOCAL_INPUT=$(mktemp -d)\nINPUT=$(basename ${LOCAL_INPUT})\nOUTPUT=kmer_count\n\nfor i in 1 2; do\n    cp data/mini_aligned_seqs.gz.parquet ${LOCAL_INPUT}/seqs_${i}.gz.parquet\ndone\n\n${HADOOP} fs -mkdir -p /user/\"${USER}\"\n${HADOOP} fs -rm -r \"${INPUT}\" || :\n${HADOOP} fs -put \"${LOCAL_INPUT}\" \"${INPUT}\"\n${HADOOP} fs -rm -r /user/\"${USER}\"/\"${OUTPUT}\" || :\n\nHADOOP_CLASSPATH=$(<\"${CP_PATH}\")\n${PYDOOP} submit \\\n     -D parquet.avro.projection=\"${PROJECTION}\" \\\n    --upload-file-to-cache \"${MPY}\" \\\n    --num-reducers 1 \\\n    --input-format \"${INPUT_FORMAT}\" \\\n    --avro-input v \\\n    --libjars \"${JAR_PATH},${HADOOP_CLASSPATH//:/,}\" \\\n    --log-level \"${LOGLEVEL}\" \\\n    --job-name \"${JOBNAME}\" \\\n    \"${MODULE}\" \"${INPUT}\" \"${OUTPUT}\"\n\nrm -rf \"${OUTPUT}\"\n${HADOOP} fs -get /user/\"${USER}\"/\"${OUTPUT}\"\n${PYTHON} py/show_kmer_count.py \"${OUTPUT}\"/part-r-00000\n\nrm -rf \"${OUTPUT}\" \"${LOCAL_INPUT}\"\npopd\n"
  },
  {
    "path": "examples/avro/schemas/alignment_record.avsc",
    "content": "{\n    \"type\": \"record\",\n    \"name\": \"AlignmentRecord\",\n    \"fields\": [\n        {\n            \"default\": null,\n            \"doc\": \"The reference sequence details for the reference chromosome that\\n   this read is aligned to. If the read is unaligned, this field should\\n   be null.\",\n            \"name\": \"contig\",\n            \"type\": [\n                \"null\",\n                {\n                    \"type\": \"record\",\n                    \"name\": \"Contig\",\n                    \"doc\": \"Record for describing a reference assembly. Not used for storing the contents\\n of said assembly.\\n\\n @see NucleotideContigFragment\",\n                    \"fields\": [\n                        {\n                            \"default\": null,\n                            \"doc\": \"The name of this contig in the assembly (e.g., \\\"chr1\\\").\",\n                            \"name\": \"contigName\",\n                            \"type\": [\"null\", \"string\"]\n                        },\n                        {\n                            \"default\": null,\n                            \"doc\": \"The length of this contig.\",\n                            \"name\": \"contigLength\",\n                            \"type\": [\"null\", \"long\"]\n                        },\n                        {\n                            \"default\": null,\n                            \"doc\": \"The MD5 checksum of the assembly for this contig.\",\n                            \"name\": \"contigMD5\",\n                            \"type\": [\"null\", \"string\"]\n                        },\n                        {\n                            \"default\": null,\n                            \"doc\": \"The URL at which this reference assembly can be found.\",\n                            \"name\": \"referenceURL\",\n                            \"type\": [\"null\", \"string\"]\n                        },\n                        {\n                            \"default\": null,\n                            \"doc\": \"The name of this assembly (e.g., \\\"hg19\\\").\",\n                            \"name\": \"assembly\",\n                            \"type\": [\"null\", \"string\"]\n                        },\n                        {\n                            \"default\": null,\n                            \"doc\": \"The species that this assembly is for.\",\n                            \"name\": \"species\",\n                            \"type\": [\"null\", \"string\"]\n                        }\n                    ]\n                }\n            ]\n        },\n        {\n            \"default\": null,\n            \"doc\": \"0 based reference position for the start of this read's alignment.\\n   Should be null if the read is unaligned.\",\n            \"name\": \"start\",\n            \"type\": [\"null\", \"long\"]\n        },\n        {\n            \"default\": null,\n            \"doc\": \"0 based reference position where this read used to start before\\n   local realignment.\\n   Stores the same data as the OP field in the SAM format.\",\n            \"name\": \"oldPosition\",\n            \"type\": [\"null\", \"long\"]\n        },\n        {\n            \"default\": null,\n            \"doc\": \"0 based reference position for the end of this read's alignment.\\n   Should be null if the read is unaligned.\",\n            \"name\": \"end\",\n            \"type\": [\"null\", \"long\"]\n        },\n        {\n            \"default\": null,\n            \"doc\": \"The global mapping quality of this read.\",\n            \"name\": \"mapq\",\n            \"type\": [\"null\", \"int\"]\n        },\n        {\n            \"default\": null,\n            \"doc\": \"The name of this read. This should be unique within the read group\\n   that this read is from, and can be used to identify other reads that\\n   are derived from a single fragment.\",\n            \"name\": \"readName\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"doc\": \"The bases in this alignment. If the read has been hard clipped, this may\\n   not represent all the bases in the original read.\",\n            \"name\": \"sequence\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"doc\": \"The per-base quality scores in this alignment. If the read has been hard\\n   clipped, this may not represent all the bases in the original read.\\n   Additionally, if the error scores have been recalibrated, this field\\n   will not contain the original base quality scores.\\n\\n   @see origQual\",\n            \"name\": \"qual\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"doc\": \"The Compact Ideosyncratic Gapped Alignment Report (CIGAR) string that\\n   describes the local alignment of this read. Contains {length, operator}\\n   pairs for all contiguous alignment operations. The operators include:\\n   \\n   * M, ALIGNMENT_MATCH: An alignment match indicates that a sequence can be\\n     aligned to the reference without evidence of an INDEL. Unlike the\\n     SEQUENCE_MATCH and SEQUENCE_MISMATCH operators, the ALIGNMENT_MATCH\\n     operator does not indicate whether the reference and read sequences are an\\n     exact match.\\n   * I, INSERT: The insert operator indicates that the read contains evidence of\\n     bases being inserted into the reference.\\n   * D, DELETE: The delete operator indicates that the read contains evidence of\\n     bases being deleted from the reference.\\n   * N, SKIP: The skip operator indicates that this read skips a long segment of\\n     the reference, but the bases have not been deleted. This operator is\\n     commonly used when working with RNA-seq data, where reads may skip long\\n     segments of the reference between exons.\\n   * S, CLIP_SOFT: The soft clip operator indicates that bases at the start/end\\n     of a read have not been considered during alignment. This may occur if the\\n     majority of a read maps, except for low quality bases at the start/end of\\n     a read. Bases that are soft clipped will still be stored in the read.\\n   * H, CLIP_HARD: The hard clip operator indicates that bases at the start/end of\\n     a read have been omitted from this alignment. This may occur if this linear\\n     alignment is part of a chimeric alignment, or if the read has been trimmed\\n     (e.g., during error correction, or to trim poly-A tails for RNA-seq).\\n   * P, PAD: The pad operator indicates that there is padding in an alignment.\\n   * =, SEQUENCE_MATCH: This operator indicates that this portion of the aligned\\n     sequence exactly matches the reference (e.g., all bases are equal to the\\n     reference bases).\\n   * X, SEQUENCE_MISMATCH: This operator indicates that this portion of the \\n     aligned sequence is an alignment match to the reference, but a sequence\\n     mismatch (e.g., the bases are not equal to the reference). This can\\n     indicate a SNP or a read error.\",\n            \"name\": \"cigar\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"doc\": \"Stores the CIGAR string present before local indel realignment.\\n   Stores the same data as the OC field in the SAM format.\\n\\n   @see cigar\",\n            \"name\": \"oldCigar\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": 0,\n            \"doc\": \"The number of bases in this read/alignment that have been trimmed from the\\n   start of the read. By default, this is equal to 0. If the value is non-zero,\\n   that means that the start of the read has been hard-clipped.\\n\\n   @see cigar\",\n            \"name\": \"basesTrimmedFromStart\",\n            \"type\": [\"int\", \"null\"]\n        },\n        {\n            \"default\": 0,\n            \"doc\": \"The number of bases in this read/alignment that have been trimmed from the\\n   end of the read. By default, this is equal to 0. If the value is non-zero,\\n   that means that the end of the read has been hard-clipped.\\n\\n   @see cigar\",\n            \"name\": \"basesTrimmedFromEnd\",\n            \"type\": [\"int\", \"null\"]\n        },\n        {\n            \"default\": false,\n            \"name\": \"readPaired\",\n            \"type\": [\"boolean\", \"null\"]\n        },\n        {\n            \"default\": false,\n            \"name\": \"properPair\",\n            \"type\": [\"boolean\", \"null\"]\n        },\n        {\n            \"default\": false,\n            \"name\": \"readMapped\",\n            \"type\": [\"boolean\", \"null\"]\n        },\n        {\n            \"default\": false,\n            \"name\": \"mateMapped\",\n            \"type\": [\"boolean\", \"null\"]\n        },\n        {\n            \"default\": false,\n            \"name\": \"firstOfPair\",\n            \"type\": [\"boolean\", \"null\"]\n        },\n        {\n            \"default\": false,\n            \"name\": \"secondOfPair\",\n            \"type\": [\"boolean\", \"null\"]\n        },\n        {\n            \"default\": false,\n            \"name\": \"failedVendorQualityChecks\",\n            \"type\": [\"boolean\", \"null\"]\n        },\n        {\n            \"default\": false,\n            \"name\": \"duplicateRead\",\n            \"type\": [\"boolean\", \"null\"]\n        },\n        {\n            \"default\": false,\n            \"doc\": \"True if this alignment is mapped as a reverse compliment. This field\\n   defaults to false.\",\n            \"name\": \"readNegativeStrand\",\n            \"type\": [\"boolean\", \"null\"]\n        },\n        {\n            \"default\": false,\n            \"doc\": \"True if the mate pair of this alignment is mapped as a reverse compliment.\\n   This field defaults to false.\",\n            \"name\": \"mateNegativeStrand\",\n            \"type\": [\"boolean\", \"null\"]\n        },\n        {\n            \"default\": false,\n            \"doc\": \"This field is true if this alignment is either the best linear alignment,\\n   or the first linear alignment in a chimeric alignment. Defaults to false.\\n\\n   @see secondaryAlignment\\n   @see supplementaryAlignment\",\n            \"name\": \"primaryAlignment\",\n            \"type\": [\"boolean\", \"null\"]\n        },\n        {\n            \"default\": false,\n            \"doc\": \"This field is true if this alignment is a lower quality linear alignment\\n   for a multiply-mapped read. Defaults to false.\\n\\n   @see primaryAlignment\\n   @see supplementaryAlignment\",\n            \"name\": \"secondaryAlignment\",\n            \"type\": [\"boolean\", \"null\"]\n        },\n        {\n            \"default\": false,\n            \"doc\": \"This field is true if this alignment is a non-primary linear alignment in\\n   a chimeric alignment. Defaults to false.\\n\\n   @see primaryAlignment\\n   @see secondaryAlignment\",\n            \"name\": \"supplementaryAlignment\",\n            \"type\": [\"boolean\", \"null\"]\n        },\n        {\n            \"default\": null,\n            \"name\": \"mismatchingPositions\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"name\": \"origQual\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"name\": \"attributes\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"name\": \"recordGroupName\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"name\": \"recordGroupSequencingCenter\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"name\": \"recordGroupDescription\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"name\": \"recordGroupRunDateEpoch\",\n            \"type\": [\"null\", \"long\"]\n        },\n        {\n            \"default\": null,\n            \"name\": \"recordGroupFlowOrder\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"name\": \"recordGroupKeySequence\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"name\": \"recordGroupLibrary\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"name\": \"recordGroupPredictedMedianInsertSize\",\n            \"type\": [\"null\", \"int\"]\n        },\n        {\n            \"default\": null,\n            \"name\": \"recordGroupPlatform\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"name\": \"recordGroupPlatformUnit\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"name\": \"recordGroupSample\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": null,\n            \"doc\": \"The start position of the mate of this read. Should be set to null if the\\n   mate is unaligned, or if the mate does not exist.\",\n            \"name\": \"mateAlignmentStart\",\n            \"type\": [\"null\", \"long\"]\n        },\n        {\n            \"default\": null,\n            \"doc\": \"The end position of the mate of this read. Should be set to null if the\\n   mate is unaligned, or if the mate does not exist.\",\n            \"name\": \"mateAlignmentEnd\",\n            \"type\": [\"null\", \"long\"]\n        },\n        {\n            \"default\": null,\n            \"doc\": \"The reference contig of the mate of this read. Should be set to null if the\\n   mate is unaligned, or if the mate does not exist.\",\n            \"name\": \"mateContig\",\n            \"type\": [\"null\", \"Contig\"]\n        }\n    ]\n}\n"
  },
  {
    "path": "examples/avro/schemas/alignment_record_proj.avsc",
    "content": "{\n    \"type\": \"record\",\n    \"name\": \"AlignmentRecord\",\n    \"fields\": [\n        {\n            \"default\": null,\n            \"doc\": \"The global mapping quality of this read.\",\n            \"name\": \"mapq\",\n            \"type\": [\"null\", \"int\"]\n        },\n        {\n            \"default\": null,\n            \"doc\": \"The bases in this alignment. If the read has been hard clipped, this may\\n   not represent all the bases in the original read.\",\n            \"name\": \"sequence\",\n            \"type\": [\"null\", \"string\"]\n        },\n        {\n            \"default\": false,\n            \"name\": \"readMapped\",\n            \"type\": [\"boolean\", \"null\"]\n        }\n    ]\n}\n"
  },
  {
    "path": "examples/avro/schemas/pet.avsc",
    "content": "{\n    \"namespace\": \"example.avro\",\n    \"type\": \"record\",\n    \"name\": \"Pet\",\n    \"fields\": [\n        {\"name\": \"name\", \"type\": \"string\"},\n        {\"name\": \"legs\", \"type\": \"int\"}\n    ]\n}\n"
  },
  {
    "path": "examples/avro/schemas/stats.avsc",
    "content": "{\n \"namespace\": \"example.avro\",\n \"type\": \"record\",\n \"name\": \"Stats\",\n \"fields\": [\n     {\"name\": \"office\", \"type\": \"string\"},\n     {\"name\": \"counts\", \"type\": {\"type\": \"map\", \"values\": \"long\"}}\n ]\n}"
  },
  {
    "path": "examples/avro/schemas/user.avsc",
    "content": "{\n \"namespace\": \"example.avro\",\n \"type\": \"record\",\n \"name\": \"User\",\n \"fields\": [\n     {\"name\": \"office\", \"type\": \"string\"},\n     {\"name\": \"name\", \"type\": \"string\"},\n     {\"name\": \"favorite_number\",  \"type\": [\"int\", \"null\"]},\n     {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n ]\n}"
  },
  {
    "path": "examples/avro/src/main/java/it/crs4/pydoop/WriteKV.java",
    "content": "/** BEGIN_COPYRIGHT\n *\n * Copyright 2009-2026 CRS4.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n * use this file except in compliance with the License. You may obtain a copy\n * of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n * License for the specific language governing permissions and limitations\n * under the License.\n *\n * END_COPYRIGHT\n *\n * Read user data generated by create_input.py and create a key/value\n * avro file with those users as keys.\n */\n\npackage it.crs4.pydoop;\n\nimport java.io.File;\nimport java.io.IOException;\nimport java.io.BufferedReader;\nimport java.io.FileReader;\nimport java.util.List;\nimport java.util.ArrayList;\n\nimport org.apache.avro.Schema;\nimport org.apache.avro.generic.GenericData;\nimport org.apache.avro.generic.GenericRecord;\nimport org.apache.avro.generic.GenericDatumWriter;\nimport org.apache.avro.io.DatumWriter;\nimport org.apache.avro.file.DataFileWriter;\nimport org.apache.avro.hadoop.io.AvroKeyValue;\n\n\nclass WriteKV {\n\n  private static final String DELIMITER = \";\";\n\n  private static GenericRecord buildUser(\n      Schema schema, String name, String office, String color) {\n    GenericRecord user = new GenericData.Record(schema);\n    user.put(\"name\", name);\n    user.put(\"office\", office);\n    if (color != null) user.put(\"favorite_color\", color);\n    return user;\n  }\n\n  private static GenericRecord buildPet(\n      Schema schema, String name, Integer legs) {\n    GenericRecord pet = new GenericData.Record(schema);\n    pet.put(\"name\", name);\n    pet.put(\"legs\", legs);\n    return pet;\n  }\n\n  private static <T> File createFile(File file, Schema schema, T... records)\n      throws IOException {\n    DatumWriter<T> datumWriter = new GenericDatumWriter<T>(schema);\n    DataFileWriter<T> fileWriter = new DataFileWriter<T>(datumWriter);\n    fileWriter.create(schema, file);\n    for (T record: records) {\n      fileWriter.append(record);\n    }\n    fileWriter.close();\n    return file;\n  }\n\n  private static File createInputFile(\n      Schema keySchema, Schema valueSchema, String inFN, String outFN\n  ) throws IOException {\n    Schema keyValueSchema = AvroKeyValue.getSchema(keySchema, valueSchema);\n    List<GenericRecord> records = new ArrayList<GenericRecord>();\n    BufferedReader reader = new BufferedReader(new FileReader(inFN));\n    String line;\n    int i = 0;\n    while ((line = reader.readLine()) != null) {\n      String[] tokens = line.split(DELIMITER);\n      if (tokens.length != 3) {  // name, office, color\n        throw new RuntimeException(\"Bad input format\");\n      }\n      GenericRecord user = buildUser(\n          keySchema, tokens[0], tokens[1], tokens[2]\n      );\n      GenericRecord pet = buildPet(valueSchema, String.format(\"pet-%d\", i), i);\n      AvroKeyValue<GenericRecord, GenericRecord> kv\n          = new AvroKeyValue<GenericRecord, GenericRecord>(\n              new GenericData.Record(keyValueSchema));\n      kv.setKey(user);\n      kv.setValue(pet);\n      records.add(kv.get());\n      i++;\n    }\n    reader.close();\n    return createFile(\n        new File(outFN), keyValueSchema,\n        records.toArray(new GenericRecord[records.size()])\n    );\n  }\n\n  public static void main(String[] args) throws Exception {\n\n    if (args.length < 4) {\n      System.err.println(\n          \"Usage: WriteKV USER_SCHEMA PET_SCHEMA IN_FILE OUT_FILE\"\n      );\n      System.exit(1);\n    }\n    Schema.Parser parser = new Schema.Parser();\n    Schema userSchema = parser.parse(new File(args[0]));\n    Schema petSchema = parser.parse(new File(args[1]));\n\n    File file = createInputFile(userSchema, petSchema, args[2], args[3]);\n    System.out.println(\"wrote \" + file.getName());\n\n  }\n}\n"
  },
  {
    "path": "examples/avro/src/main/java/it/crs4/pydoop/WriteParquet.java",
    "content": "/* BEGIN_COPYRIGHT\n *\n * Copyright 2009-2026 CRS4.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n * use this file except in compliance with the License. You may obtain a copy\n * of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n * License for the specific language governing permissions and limitations\n * under the License.\n *\n * END_COPYRIGHT\n *\n * A MapReduce application that reads ';'-separated text and writes\n * parquet-avro data (i.e., Parquet files that use the Avro object model).\n *\n * Based on Cloudera Parquet examples.\n */\n\npackage it.crs4.pydoop;\n\nimport java.io.IOException;\nimport java.io.InputStream;\n\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.conf.Configured;\nimport org.apache.hadoop.util.Tool;\nimport org.apache.hadoop.util.ToolRunner;\nimport org.apache.hadoop.fs.FileSystem;\nimport org.apache.hadoop.fs.Path;\nimport org.apache.hadoop.io.LongWritable;\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.io.Text;\nimport org.apache.hadoop.mapreduce.lib.input.FileInputFormat;\nimport org.apache.hadoop.mapreduce.lib.input.TextInputFormat;\nimport org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;\nimport org.apache.hadoop.mapreduce.Mapper.Context;\nimport org.apache.hadoop.mapreduce.Job;\nimport org.apache.hadoop.mapreduce.Mapper;\n\nimport org.apache.parquet.Log;\nimport org.apache.parquet.avro.AvroParquetOutputFormat;\n\nimport org.apache.avro.generic.GenericData.Record;\nimport org.apache.avro.Schema;\n\n\npublic class WriteParquet extends Configured implements Tool {\n\n  private static final Log LOG = Log.getLog(WriteParquet.class);\n\n  // FIXME: not needed, we're calling setSchema below\n  private static final String SCHEMA_PATH_KEY = \"paexample.schema.path\";\n\n  private static Schema getSchema(Configuration conf)\n      throws IOException {\n    Path schemaPath = new Path(conf.get(SCHEMA_PATH_KEY));\n    FileSystem fs = FileSystem.get(conf);\n    InputStream in = fs.open(schemaPath);\n    Schema schema = new Schema.Parser().parse(in);\n    in.close();\n    return schema;\n  }\n\n  public static class WriteUserMap\n      extends Mapper<LongWritable, Text, NullWritable, Record> {\n\n    private Schema schema;\n\n    @Override\n    public void setup(Context context)\n        throws IOException, InterruptedException {\n      schema = getSchema(context.getConfiguration());\n    }\n\n    @Override\n    public void map(LongWritable key, Text value, Context context)\n        throws IOException, InterruptedException {\n      NullWritable outKey = NullWritable.get();\n      Record user = new Record(schema);\n      String[] elements = value.toString().split(\";\");\n      user.put(\"name\", elements[0]);\n      user.put(\"office\", elements[1]);\n      user.put(\"favorite_color\", elements[2]);\n      context.write(null, user);\n    }\n  }\n\n  public int run(String[] args) throws Exception {\n\n    if (args.length < 3) {\n      System.err.println(\n        \"Usage: WriteParquet <input path> <output path> <schema path>\"\n      );\n      return -1;\n    }\n    Path inputPath = new Path(args[0]);\n    Path outputPath = new Path(args[1]);\n    String schemaPathName = args[2];\n\n    Configuration conf = getConf();\n    conf.set(SCHEMA_PATH_KEY, schemaPathName);\n    Schema schema = getSchema(conf);\n\n    Job job = new Job(conf);\n    job.setJarByClass(getClass());\n    job.setJobName(getClass().getName());\n\n    AvroParquetOutputFormat.setSchema(job, schema);\n\n    job.setMapperClass(WriteUserMap.class);\n    job.setNumReduceTasks(0);\n    job.setInputFormatClass(TextInputFormat.class);\n    job.setOutputFormatClass(AvroParquetOutputFormat.class);\n\n    FileInputFormat.setInputPaths(job, inputPath);\n    FileOutputFormat.setOutputPath(job, outputPath);\n\n    job.waitForCompletion(true);\n\n    return 0;\n  }\n\n  public static void main(String[] args) throws Exception {\n    try {\n      int res = ToolRunner.run(new Configuration(),\n                               new WriteParquet(), args);\n      System.exit(res);\n    } catch (Exception e) {\n      e.printStackTrace();\n      System.exit(255);\n    }\n  }\n}\n"
  },
  {
    "path": "examples/avro/write_avro_kv",
    "content": "#!/bin/bash\n\n# args: KEY_SCHEMA_FILE, VALUE_SCHEMA_FILE, CSV_IN_FILE AVRO_OUT_FILE\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/config.sh\"\n\npushd \"${this_dir}\"\n[ -f \"${CLASS_DIR}/it/crs4/pydoop/WriteKV.class\" ] || ./build.sh\njava -cp \"${CLASS_DIR}:$(<${CP_PATH})\" it.crs4.pydoop.WriteKV $*\npopd\n"
  },
  {
    "path": "examples/c++/HadoopPipes.cc",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#include \"hadoop/Pipes.hh\"\n#include \"hadoop/SerialUtils.hh\"\n#include \"hadoop/StringUtils.hh\"\n\n#include <map>\n#include <vector>\n\n#include <errno.h>\n#include <netinet/in.h>\n#include <stdint.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <strings.h>\n#include <unistd.h>\n#include <sys/socket.h>\n#include <pthread.h>\n#include <iostream>\n#include <fstream>\n\n#include <openssl/hmac.h>\n#include <openssl/buffer.h>\n\nusing std::map;\nusing std::string;\nusing std::vector;\n\nusing namespace HadoopUtils;\n\nnamespace HadoopPipes {\n\n  class JobConfImpl: public JobConf {\n  private:\n    map<string, string> values;\n  public:\n    void set(const string& key, const string& value) {\n      values[key] = value;\n    }\n\n    virtual bool hasKey(const string& key) const {\n      return values.find(key) != values.end();\n    }\n\n    virtual const string& get(const string& key) const {\n      map<string,string>::const_iterator itr = values.find(key);\n      if (itr == values.end()) {\n        throw Error(\"Key \" + key + \" not found in JobConf\");\n      }\n      return itr->second;\n    }\n\n    virtual int getInt(const string& key) const {\n      const string& val = get(key);\n      return toInt(val);\n    }\n\n    virtual float getFloat(const string& key) const {\n      const string& val = get(key);\n      return toFloat(val);\n    }\n\n    virtual bool getBoolean(const string&key) const {\n      const string& val = get(key);\n      return toBool(val);\n    }\n  };\n\n  class DownwardProtocol {\n  public:\n    virtual void start(int protocol) = 0;\n    virtual void setJobConf(vector<string> values) = 0;\n    virtual void setInputTypes(string keyType, string valueType) = 0;\n    virtual void runMap(string inputSplit, int numReduces, bool pipedInput)= 0;\n    virtual void mapItem(const string& key, const string& value) = 0;\n    virtual void runReduce(int reduce, bool pipedOutput) = 0;\n    virtual void reduceKey(const string& key) = 0;\n    virtual void reduceValue(const string& value) = 0;\n    virtual void close() = 0;\n    virtual void abort() = 0;\n    virtual ~DownwardProtocol() {}\n  };\n\n  class UpwardProtocol {\n  public:\n    virtual void output(const string& key, const string& value) = 0;\n    virtual void partitionedOutput(int reduce, const string& key,\n                                   const string& value) = 0;\n    virtual void status(const string& message) = 0;\n    virtual void progress(float progress) = 0;\n    virtual void done() = 0;\n    virtual void registerCounter(int id, const string& group, \n                                 const string& name) = 0;\n    virtual void \n      incrementCounter(const TaskContext::Counter* counter, uint64_t amount) = 0;\n    virtual ~UpwardProtocol() {}\n  };\n\n  class Protocol {\n  public:\n    virtual void nextEvent() = 0;\n    virtual UpwardProtocol* getUplink() = 0;\n    virtual ~Protocol() {}\n  };\n\n  class TextUpwardProtocol: public UpwardProtocol {\n  private:\n    FILE* stream;\n    static const char fieldSeparator = '\\t';\n    static const char lineSeparator = '\\n';\n\n    void writeBuffer(const string& buffer) {\n      fputs(quoteString(buffer, \"\\t\\n\").c_str(), stream);\n    }\n\n  public:\n    TextUpwardProtocol(FILE* _stream): stream(_stream) {}\n    \n    virtual void output(const string& key, const string& value) {\n      fprintf(stream, \"output%c\", fieldSeparator);\n      writeBuffer(key);\n      fprintf(stream, \"%c\", fieldSeparator);\n      writeBuffer(value);\n      fprintf(stream, \"%c\", lineSeparator);\n    }\n\n    virtual void partitionedOutput(int reduce, const string& key,\n                                   const string& value) {\n      fprintf(stream, \"parititionedOutput%c%d%c\", fieldSeparator, reduce, \n              fieldSeparator);\n      writeBuffer(key);\n      fprintf(stream, \"%c\", fieldSeparator);\n      writeBuffer(value);\n      fprintf(stream, \"%c\", lineSeparator);\n    }\n\n    virtual void status(const string& message) {\n      fprintf(stream, \"status%c%s%c\", fieldSeparator, message.c_str(), \n              lineSeparator);\n    }\n\n    virtual void progress(float progress) {\n      fprintf(stream, \"progress%c%f%c\", fieldSeparator, progress, \n              lineSeparator);\n    }\n\n    virtual void registerCounter(int id, const string& group, \n                                 const string& name) {\n      fprintf(stream, \"registerCounter%c%d%c%s%c%s%c\", fieldSeparator, id,\n              fieldSeparator, group.c_str(), fieldSeparator, name.c_str(), \n              lineSeparator);\n    }\n\n    virtual void incrementCounter(const TaskContext::Counter* counter, \n                                  uint64_t amount) {\n      fprintf(stream, \"incrCounter%c%d%c%ld%c\", fieldSeparator, counter->getId(), \n              fieldSeparator, (long)amount, lineSeparator);\n    }\n    \n    virtual void done() {\n      fprintf(stream, \"done%c\", lineSeparator);\n    }\n  };\n\n  class TextProtocol: public Protocol {\n  private:\n    FILE* downStream;\n    DownwardProtocol* handler;\n    UpwardProtocol* uplink;\n    string key;\n    string value;\n\n    int readUpto(string& buffer, const char* limit) {\n      int ch;\n      buffer.clear();\n      while ((ch = getc(downStream)) != -1) {\n        if (strchr(limit, ch) != NULL) {\n          return ch;\n        }\n        buffer += ch;\n      }\n      return -1;\n    }\n\n    static const char* delim;\n  public:\n\n    TextProtocol(FILE* down, DownwardProtocol* _handler, FILE* up) {\n      downStream = down;\n      uplink = new TextUpwardProtocol(up);\n      handler = _handler;\n    }\n\n    UpwardProtocol* getUplink() {\n      return uplink;\n    }\n\n    virtual void nextEvent() {\n      string command;\n      string arg;\n      int sep;\n      sep = readUpto(command, delim);\n      if (command == \"mapItem\") {\n        HADOOP_ASSERT(sep == '\\t', \"Short text protocol command \" + command);\n        sep = readUpto(key, delim);\n        HADOOP_ASSERT(sep == '\\t', \"Short text protocol command \" + command);\n        sep = readUpto(value, delim);\n        HADOOP_ASSERT(sep == '\\n', \"Long text protocol command \" + command);\n        handler->mapItem(key, value);\n      } else if (command == \"reduceValue\") {\n        HADOOP_ASSERT(sep == '\\t', \"Short text protocol command \" + command);\n        sep = readUpto(value, delim);\n        HADOOP_ASSERT(sep == '\\n', \"Long text protocol command \" + command);\n        handler->reduceValue(value);\n      } else if (command == \"reduceKey\") {\n        HADOOP_ASSERT(sep == '\\t', \"Short text protocol command \" + command);\n        sep = readUpto(key, delim);\n        HADOOP_ASSERT(sep == '\\n', \"Long text protocol command \" + command);\n        handler->reduceKey(key);\n      } else if (command == \"start\") {\n        HADOOP_ASSERT(sep == '\\t', \"Short text protocol command \" + command);\n        sep = readUpto(arg, delim);\n        HADOOP_ASSERT(sep == '\\n', \"Long text protocol command \" + command);\n        handler->start(toInt(arg));\n      } else if (command == \"setJobConf\") {\n        HADOOP_ASSERT(sep == '\\t', \"Short text protocol command \" + command);\n        sep = readUpto(arg, delim);\n        int len = toInt(arg);\n        vector<string> values(len);\n        for(int i=0; i < len; ++i) {\n          HADOOP_ASSERT(sep == '\\t', \"Short text protocol command \" + command);\n          sep = readUpto(arg, delim);\n          values.push_back(arg);\n        }\n        HADOOP_ASSERT(sep == '\\n', \"Long text protocol command \" + command);\n        handler->setJobConf(values);\n      } else if (command == \"setInputTypes\") {\n        HADOOP_ASSERT(sep == '\\t', \"Short text protocol command \" + command);\n        sep = readUpto(key, delim);\n        HADOOP_ASSERT(sep == '\\t', \"Short text protocol command \" + command);\n        sep = readUpto(value, delim);\n        HADOOP_ASSERT(sep == '\\n', \"Long text protocol command \" + command);\n        handler->setInputTypes(key, value);\n      } else if (command == \"runMap\") {\n        string split;\n        HADOOP_ASSERT(sep == '\\t', \"Short text protocol command \" + command);\n        sep = readUpto(split, delim);\n        string reduces;\n        HADOOP_ASSERT(sep == '\\t', \"Short text protocol command \" + command);\n        sep = readUpto(reduces, delim);\n        HADOOP_ASSERT(sep == '\\t', \"Short text protocol command \" + command);\n        sep = readUpto(arg, delim);\n        HADOOP_ASSERT(sep == '\\n', \"Long text protocol command \" + command);\n        handler->runMap(split, toInt(reduces), toBool(arg));\n      } else if (command == \"runReduce\") {\n        HADOOP_ASSERT(sep == '\\t', \"Short text protocol command \" + command);\n        sep = readUpto(arg, delim);\n        HADOOP_ASSERT(sep == '\\t', \"Short text protocol command \" + command);\n        string piped;\n        sep = readUpto(piped, delim);\n        HADOOP_ASSERT(sep == '\\n', \"Long text protocol command \" + command);\n        handler->runReduce(toInt(arg), toBool(piped));\n      } else if (command == \"abort\") { \n        HADOOP_ASSERT(sep == '\\n', \"Long text protocol command \" + command);\n        handler->abort();\n      } else if (command == \"close\") {\n        HADOOP_ASSERT(sep == '\\n', \"Long text protocol command \" + command);\n        handler->close();\n      } else {\n        throw Error(\"Illegal text protocol command \" + command);\n      }\n    }\n\n    ~TextProtocol() {\n      delete uplink;\n    }\n  };\n  const char* TextProtocol::delim = \"\\t\\n\";\n\n  enum MESSAGE_TYPE {START_MESSAGE, SET_JOB_CONF, SET_INPUT_TYPES, RUN_MAP, \n                     MAP_ITEM, RUN_REDUCE, REDUCE_KEY, REDUCE_VALUE, \n                     CLOSE, ABORT, AUTHENTICATION_REQ,\n                     OUTPUT=50, PARTITIONED_OUTPUT, STATUS, PROGRESS, DONE,\n                     REGISTER_COUNTER, INCREMENT_COUNTER, AUTHENTICATION_RESP};\n\n  class BinaryUpwardProtocol: public UpwardProtocol {\n  private:\n    FileOutStream* stream;\n  public:\n    BinaryUpwardProtocol(FILE* _stream) {\n      stream = new FileOutStream();\n      HADOOP_ASSERT(stream->open(_stream), \"problem opening stream\");\n    }\n\n    virtual void authenticate(const string &responseDigest) {\n      serializeInt(AUTHENTICATION_RESP, *stream);\n      serializeString(responseDigest, *stream);\n      stream->flush();\n    }\n\n    virtual void output(const string& key, const string& value) {\n      serializeInt(OUTPUT, *stream);\n      serializeString(key, *stream);\n      serializeString(value, *stream);\n    }\n\n    virtual void partitionedOutput(int reduce, const string& key,\n                                   const string& value) {\n      serializeInt(PARTITIONED_OUTPUT, *stream);\n      serializeInt(reduce, *stream);\n      serializeString(key, *stream);\n      serializeString(value, *stream);\n    }\n\n    virtual void status(const string& message) {\n      serializeInt(STATUS, *stream);\n      serializeString(message, *stream);\n    }\n\n    virtual void progress(float progress) {\n      serializeInt(PROGRESS, *stream);\n      serializeFloat(progress, *stream);\n      stream->flush();\n    }\n\n    virtual void done() {\n      serializeInt(DONE, *stream);\n    }\n\n    virtual void registerCounter(int id, const string& group, \n                                 const string& name) {\n      serializeInt(REGISTER_COUNTER, *stream);\n      serializeInt(id, *stream);\n      serializeString(group, *stream);\n      serializeString(name, *stream);\n    }\n\n    virtual void incrementCounter(const TaskContext::Counter* counter, \n                                  uint64_t amount) {\n      serializeInt(INCREMENT_COUNTER, *stream);\n      serializeInt(counter->getId(), *stream);\n      serializeLong(amount, *stream);\n    }\n    \n    ~BinaryUpwardProtocol() {\n      delete stream;\n    }\n  };\n\n  class BinaryProtocol: public Protocol {\n  private:\n    FileInStream* downStream;\n    DownwardProtocol* handler;\n    BinaryUpwardProtocol * uplink;\n    string key;\n    string value;\n    string password;\n    bool authDone;\n    void getPassword(string &password) {\n      const char *passwordFile = getenv(\"hadoop.pipes.shared.secret.location\");\n      if (passwordFile == NULL) {\n        return;\n      }\n      std::ifstream fstr(passwordFile, std::fstream::binary);\n      if (fstr.fail()) {\n        std::cerr << \"Could not open the password file\" << std::endl;\n        return;\n      } \n      unsigned char * passBuff = new unsigned char [512];\n      fstr.read((char *)passBuff, 512);\n      int passwordLength = fstr.gcount();\n      fstr.close();\n      passBuff[passwordLength] = 0;\n      password.replace(0, passwordLength, (const char *) passBuff, passwordLength);\n      delete [] passBuff;\n      return; \n    }\n\n    void verifyDigestAndRespond(string& digest, string& challenge) {\n      if (password.empty()) {\n        //password can be empty if process is running in debug mode from\n        //command file.\n        authDone = true;\n        return;\n      }\n\n      if (!verifyDigest(password, digest, challenge)) {\n        std::cerr << \"Server failed to authenticate. Exiting\" << std::endl;\n        exit(-1);\n      }\n      authDone = true;\n      string responseDigest = createDigest(password, digest);\n      uplink->authenticate(responseDigest);\n    }\n\n    bool verifyDigest(string &password, string& digest, string& challenge) {\n      string expectedDigest = createDigest(password, challenge);\n      if (digest == expectedDigest) {\n        return true;\n      } else {\n        return false;\n      }\n    }\n\n    string createDigest(string &password, string& msg) {\n#if OPENSSL_VERSION_NUMBER < 0x10100000L\n      HMAC_CTX ctx;\n      unsigned char digest[EVP_MAX_MD_SIZE];\n      HMAC_Init(&ctx, (const unsigned char *)password.c_str(), \n          password.length(), EVP_sha1());\n      HMAC_Update(&ctx, (const unsigned char *)msg.c_str(), msg.length());\n      unsigned int digestLen;\n      HMAC_Final(&ctx, digest, &digestLen);\n      HMAC_cleanup(&ctx);\n#else\n      HMAC_CTX *ctx = HMAC_CTX_new();\n      unsigned char digest[EVP_MAX_MD_SIZE];\n      HMAC_Init_ex(ctx, (const unsigned char *)password.c_str(),\n          password.length(), EVP_sha1(), NULL);\n      HMAC_Update(ctx, (const unsigned char *)msg.c_str(), msg.length());\n      unsigned int digestLen;\n      HMAC_Final(ctx, digest, &digestLen);\n      HMAC_CTX_free(ctx);\n#endif\n      //now apply base64 encoding\n      BIO *bmem, *b64;\n      BUF_MEM *bptr;\n\n      b64 = BIO_new(BIO_f_base64());\n      bmem = BIO_new(BIO_s_mem());\n      b64 = BIO_push(b64, bmem);\n      BIO_write(b64, digest, digestLen);\n      BIO_flush(b64);\n      BIO_get_mem_ptr(b64, &bptr);\n\n      char digestBuffer[bptr->length];\n      memcpy(digestBuffer, bptr->data, bptr->length-1);\n      digestBuffer[bptr->length-1] = 0;\n      BIO_free_all(b64);\n\n      return string(digestBuffer);\n    }\n\n  public:\n    BinaryProtocol(FILE* down, DownwardProtocol* _handler, FILE* up) {\n      downStream = new FileInStream();\n      downStream->open(down);\n      uplink = new BinaryUpwardProtocol(up);\n      handler = _handler;\n      authDone = false;\n      getPassword(password);\n    }\n\n    UpwardProtocol* getUplink() {\n      return uplink;\n    }\n\n    virtual void nextEvent() {\n      int32_t cmd;\n      cmd = deserializeInt(*downStream);\n      if (!authDone && cmd != AUTHENTICATION_REQ) {\n        //Authentication request must be the first message if\n        //authentication is not complete\n        std::cerr << \"Command:\" << cmd << \"received before authentication. \" \n            << \"Exiting..\" << std::endl;\n        exit(-1);\n      }\n      switch (cmd) {\n      case AUTHENTICATION_REQ: {\n        string digest;\n        string challenge;\n        deserializeString(digest, *downStream);\n        deserializeString(challenge, *downStream);\n        verifyDigestAndRespond(digest, challenge);\n        break;\n      }\n      case START_MESSAGE: {\n        int32_t prot;\n        prot = deserializeInt(*downStream);\n        handler->start(prot);\n        break;\n      }\n      case SET_JOB_CONF: {\n        int32_t entries;\n        entries = deserializeInt(*downStream);\n        vector<string> result(entries);\n        for(int i=0; i < entries; ++i) {\n          string item;\n          deserializeString(item, *downStream);\n          result.push_back(item);\n        }\n        handler->setJobConf(result);\n        break;\n      }\n      case SET_INPUT_TYPES: {\n        string keyType;\n        string valueType;\n        deserializeString(keyType, *downStream);\n        deserializeString(valueType, *downStream);\n        handler->setInputTypes(keyType, valueType);\n        break;\n      }\n      case RUN_MAP: {\n        string split;\n        int32_t numReduces;\n        int32_t piped;\n        deserializeString(split, *downStream);\n        numReduces = deserializeInt(*downStream);\n        piped = deserializeInt(*downStream);\n        handler->runMap(split, numReduces, piped);\n        break;\n      }\n      case MAP_ITEM: {\n        deserializeString(key, *downStream);\n        deserializeString(value, *downStream);\n        handler->mapItem(key, value);\n        break;\n      }\n      case RUN_REDUCE: {\n        int32_t reduce;\n        int32_t piped;\n        reduce = deserializeInt(*downStream);\n        piped = deserializeInt(*downStream);\n        handler->runReduce(reduce, piped);\n        break;\n      }\n      case REDUCE_KEY: {\n        deserializeString(key, *downStream);\n        handler->reduceKey(key);\n        break;\n      }\n      case REDUCE_VALUE: {\n        deserializeString(value, *downStream);\n        handler->reduceValue(value);\n        break;\n      }\n      case CLOSE:\n        handler->close();\n        break;\n      case ABORT:\n        handler->abort();\n        break;\n      default:\n        HADOOP_ASSERT(false, \"Unknown binary command \" + toString(cmd));\n      }\n    }\n\n    virtual ~BinaryProtocol() {\n      delete downStream;\n      delete uplink;\n    }\n  };\n\n  /**\n   * Define a context object to give to combiners that will let them\n   * go through the values and emit their results correctly.\n   */\n  class CombineContext: public ReduceContext {\n  private:\n    ReduceContext* baseContext;\n    Partitioner* partitioner;\n    int numReduces;\n    UpwardProtocol* uplink;\n    bool firstKey;\n    bool firstValue;\n    map<string, vector<string> >::iterator keyItr;\n    map<string, vector<string> >::iterator endKeyItr;\n    vector<string>::iterator valueItr;\n    vector<string>::iterator endValueItr;\n\n  public:\n    CombineContext(ReduceContext* _baseContext,\n                   Partitioner* _partitioner,\n                   int _numReduces,\n                   UpwardProtocol* _uplink,\n                   map<string, vector<string> >& data) {\n      baseContext = _baseContext;\n      partitioner = _partitioner;\n      numReduces = _numReduces;\n      uplink = _uplink;\n      keyItr = data.begin();\n      endKeyItr = data.end();\n      firstKey = true;\n      firstValue = true;\n    }\n\n    virtual const JobConf* getJobConf() {\n      return baseContext->getJobConf();\n    }\n\n    virtual const std::string& getInputKey() {\n      return keyItr->first;\n    }\n\n    virtual const std::string& getInputValue() {\n      return *valueItr;\n    }\n\n    virtual void emit(const std::string& key, const std::string& value) {\n      if (partitioner != NULL) {\n        uplink->partitionedOutput(partitioner->partition(key, numReduces),\n                                  key, value);\n      } else {\n        uplink->output(key, value);\n      }\n    }\n\n    virtual void progress() {\n      baseContext->progress();\n    }\n\n    virtual void setStatus(const std::string& status) {\n      baseContext->setStatus(status);\n    }\n\n    bool nextKey() {\n      if (firstKey) {\n        firstKey = false;\n      } else {\n        ++keyItr;\n      }\n      if (keyItr != endKeyItr) {\n        valueItr = keyItr->second.begin();\n        endValueItr = keyItr->second.end();\n        firstValue = true;\n        return true;\n      }\n      return false;\n    }\n\n    virtual bool nextValue() {\n      if (firstValue) {\n        firstValue = false;\n      } else {\n        ++valueItr;\n      }\n      return valueItr != endValueItr;\n    }\n    \n    virtual Counter* getCounter(const std::string& group, \n                               const std::string& name) {\n      return baseContext->getCounter(group, name);\n    }\n\n    virtual void incrementCounter(const Counter* counter, uint64_t amount) {\n      baseContext->incrementCounter(counter, amount);\n    }\n  };\n\n  /**\n   * A RecordWriter that will take the map outputs, buffer them up and then\n   * combine then when the buffer is full.\n   */\n  class CombineRunner: public RecordWriter {\n  private:\n    map<string, vector<string> > data;\n    int64_t spillSize;\n    int64_t numBytes;\n    ReduceContext* baseContext;\n    Partitioner* partitioner;\n    int numReduces;\n    UpwardProtocol* uplink;\n    Reducer* combiner;\n  public:\n    CombineRunner(int64_t _spillSize, ReduceContext* _baseContext, \n                  Reducer* _combiner, UpwardProtocol* _uplink, \n                  Partitioner* _partitioner, int _numReduces) {\n      numBytes = 0;\n      spillSize = _spillSize;\n      baseContext = _baseContext;\n      partitioner = _partitioner;\n      numReduces = _numReduces;\n      uplink = _uplink;\n      combiner = _combiner;\n    }\n\n    virtual void emit(const std::string& key,\n                      const std::string& value) {\n      numBytes += key.length() + value.length();\n      data[key].push_back(value);\n      if (numBytes >= spillSize) {\n        spillAll();\n      }\n    }\n\n    virtual void close() {\n      spillAll();\n    }\n\n  private:\n    void spillAll() {\n      CombineContext context(baseContext, partitioner, numReduces, \n                             uplink, data);\n      while (context.nextKey()) {\n        combiner->reduce(context);\n      }\n      data.clear();\n      numBytes = 0;\n    }\n  };\n\n  class TaskContextImpl: public MapContext, public ReduceContext, \n                         public DownwardProtocol {\n  private:\n    bool done;\n    JobConf* jobConf;\n    string key;\n    const string* newKey;\n    const string* value;\n    bool hasTask;\n    bool isNewKey;\n    bool isNewValue;\n    string* inputKeyClass;\n    string* inputValueClass;\n    string status;\n    float progressFloat;\n    uint64_t lastProgress;\n    bool statusSet;\n    Protocol* protocol;\n    UpwardProtocol *uplink;\n    string* inputSplit;\n    RecordReader* reader;\n    Mapper* mapper;\n    Reducer* reducer;\n    RecordWriter* writer;\n    Partitioner* partitioner;\n    int numReduces;\n    const Factory* factory;\n    pthread_mutex_t mutexDone;\n    std::vector<int> registeredCounterIds;\n\n  public:\n\n    TaskContextImpl(const Factory& _factory) {\n      statusSet = false;\n      done = false;\n      newKey = NULL;\n      factory = &_factory;\n      jobConf = NULL;\n      inputKeyClass = NULL;\n      inputValueClass = NULL;\n      inputSplit = NULL;\n      mapper = NULL;\n      reducer = NULL;\n      reader = NULL;\n      writer = NULL;\n      partitioner = NULL;\n      protocol = NULL;\n      isNewKey = false;\n      isNewValue = false;\n      lastProgress = 0;\n      progressFloat = 0.0f;\n      hasTask = false;\n      pthread_mutex_init(&mutexDone, NULL);\n    }\n\n    void setProtocol(Protocol* _protocol, UpwardProtocol* _uplink) {\n\n      protocol = _protocol;\n      uplink = _uplink;\n    }\n\n    virtual void start(int protocol) {\n      if (protocol != 0) {\n        throw Error(\"Protocol version \" + toString(protocol) + \n                    \" not supported\");\n      }\n    }\n\n    virtual void setJobConf(vector<string> values) {\n      int len = values.size();\n      JobConfImpl* result = new JobConfImpl();\n      HADOOP_ASSERT(len % 2 == 0, \"Odd length of job conf values\");\n      for(int i=0; i < len; i += 2) {\n        result->set(values[i], values[i+1]);\n      }\n      jobConf = result;\n    }\n\n    virtual void setInputTypes(string keyType, string valueType) {\n      inputKeyClass = new string(keyType);\n      inputValueClass = new string(valueType);\n    }\n\n    virtual void runMap(string _inputSplit, int _numReduces, bool pipedInput) {\n      inputSplit = new string(_inputSplit);\n      reader = factory->createRecordReader(*this);\n      HADOOP_ASSERT((reader == NULL) == pipedInput,\n                    pipedInput ? \"RecordReader defined when not needed.\":\n                    \"RecordReader not defined\");\n      if (reader != NULL) {\n        value = new string();\n      }\n      mapper = factory->createMapper(*this);\n      numReduces = _numReduces;\n      if (numReduces != 0) { \n        reducer = factory->createCombiner(*this);\n        partitioner = factory->createPartitioner(*this);\n      }\n      if (reducer != NULL) {\n        int64_t spillSize = 100;\n        if (jobConf->hasKey(\"mapreduce.task.io.sort.mb\")) {\n          spillSize = jobConf->getInt(\"mapreduce.task.io.sort.mb\");\n        }\n        writer = new CombineRunner(spillSize * 1024 * 1024, this, reducer, \n                                   uplink, partitioner, numReduces);\n      }\n      hasTask = true;\n    }\n\n    virtual void mapItem(const string& _key, const string& _value) {\n      newKey = &_key;\n      value = &_value;\n      isNewKey = true;\n    }\n\n    virtual void runReduce(int reduce, bool pipedOutput) {\n      reducer = factory->createReducer(*this);\n      writer = factory->createRecordWriter(*this);\n      HADOOP_ASSERT((writer == NULL) == pipedOutput,\n                    pipedOutput ? \"RecordWriter defined when not needed.\":\n                    \"RecordWriter not defined\");\n      hasTask = true;\n    }\n\n    virtual void reduceKey(const string& _key) {\n      isNewKey = true;\n      newKey = &_key;\n    }\n\n    virtual void reduceValue(const string& _value) {\n      isNewValue = true;\n      value = &_value;\n    }\n    \n    virtual bool isDone() {\n      pthread_mutex_lock(&mutexDone);\n      bool doneCopy = done;\n      pthread_mutex_unlock(&mutexDone);\n      return doneCopy;\n    }\n\n    virtual void close() {\n      pthread_mutex_lock(&mutexDone);\n      done = true;\n      pthread_mutex_unlock(&mutexDone);\n    }\n\n    virtual void abort() {\n      throw Error(\"Aborted by driver\");\n    }\n\n    void waitForTask() {\n      while (!done && !hasTask) {\n        protocol->nextEvent();\n      }\n    }\n\n    bool nextKey() {\n      if (reader == NULL) {\n        while (!isNewKey) {\n          nextValue();\n          if (done) {\n            return false;\n          }\n        }\n        key = *newKey;\n      } else {\n        if (!reader->next(key, const_cast<string&>(*value))) {\n          pthread_mutex_lock(&mutexDone);\n          done = true;\n          pthread_mutex_unlock(&mutexDone);\n          return false;\n        }\n        progressFloat = reader->getProgress();\n      }\n      isNewKey = false;\n      if (mapper != NULL) {\n        mapper->map(*this);\n      } else {\n        reducer->reduce(*this);\n      }\n      return true;\n    }\n\n    /**\n     * Advance to the next value.\n     */\n    virtual bool nextValue() {\n      if (isNewKey || done) {\n        return false;\n      }\n      isNewValue = false;\n      progress();\n      protocol->nextEvent();\n      return isNewValue;\n    }\n\n    /**\n     * Get the JobConf for the current task.\n     */\n    virtual JobConf* getJobConf() {\n      return jobConf;\n    }\n\n    /**\n     * Get the current key. \n     * @return the current key or NULL if called before the first map or reduce\n     */\n    virtual const string& getInputKey() {\n      return key;\n    }\n\n    /**\n     * Get the current value. \n     * @return the current value or NULL if called before the first map or \n     *    reduce\n     */\n    virtual const string& getInputValue() {\n      return *value;\n    }\n\n    /**\n     * Mark your task as having made progress without changing the status \n     * message.\n     */\n    virtual void progress() {\n      if (uplink != 0) {\n        uint64_t now = getCurrentMillis();\n        if (now - lastProgress > 1000) {\n          lastProgress = now;\n          if (statusSet) {\n            uplink->status(status);\n            statusSet = false;\n          }\n          uplink->progress(progressFloat);\n        }\n      }\n    }\n\n    /**\n     * Set the status message and call progress.\n     */\n    virtual void setStatus(const string& status) {\n      this->status = status;\n      statusSet = true;\n      progress();\n    }\n\n    /**\n     * Get the name of the key class of the input to this task.\n     */\n    virtual const string& getInputKeyClass() {\n      return *inputKeyClass;\n    }\n\n    /**\n     * Get the name of the value class of the input to this task.\n     */\n    virtual const string& getInputValueClass() {\n      return *inputValueClass;\n    }\n\n    /**\n     * Access the InputSplit of the mapper.\n     */\n    virtual const std::string& getInputSplit() {\n      return *inputSplit;\n    }\n\n    virtual void emit(const string& key, const string& value) {\n      progress();\n      if (writer != NULL) {\n        writer->emit(key, value);\n      } else if (partitioner != NULL) {\n        int part = partitioner->partition(key, numReduces);\n        uplink->partitionedOutput(part, key, value);\n      } else {\n        uplink->output(key, value);\n      }\n    }\n\n    /**\n     * Register a counter with the given group and name.\n     */\n    virtual Counter* getCounter(const std::string& group, \n                               const std::string& name) {\n      int id = registeredCounterIds.size();\n      registeredCounterIds.push_back(id);\n      uplink->registerCounter(id, group, name);\n      return new Counter(id);\n    }\n\n    /**\n     * Increment the value of the counter with the given amount.\n     */\n    virtual void incrementCounter(const Counter* counter, uint64_t amount) {\n      uplink->incrementCounter(counter, amount); \n    }\n\n    void closeAll() {\n      if (reader) {\n        reader->close();\n      }\n      if (mapper) {\n        mapper->close();\n      }\n      if (reducer) {\n        reducer->close();\n      }\n      if (writer) {\n        writer->close();\n      }\n    }\n\n    virtual ~TaskContextImpl() {\n      delete jobConf;\n      delete inputKeyClass;\n      delete inputValueClass;\n      delete inputSplit;\n      if (reader) {\n        delete value;\n      }\n      delete reader;\n      delete mapper;\n      delete reducer;\n      delete writer;\n      delete partitioner;\n      pthread_mutex_destroy(&mutexDone);\n    }\n  };\n\n  /**\n   * Ping the parent every 5 seconds to know if it is alive \n   */\n  void* ping(void* ptr) {\n    TaskContextImpl* context = (TaskContextImpl*) ptr;\n    char* portStr = getenv(\"mapreduce.pipes.command.port\");\n    int MAX_RETRIES = 3;\n    int remaining_retries = MAX_RETRIES;\n    while (!context->isDone()) {\n      try{\n        sleep(5);\n        int sock = -1;\n        if (portStr) {\n          sock = socket(PF_INET, SOCK_STREAM, 0);\n          HADOOP_ASSERT(sock != - 1,\n                        string(\"problem creating socket: \") + strerror(errno));\n          sockaddr_in addr;\n          addr.sin_family = AF_INET;\n          addr.sin_port = htons(toInt(portStr));\n          addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);\n          HADOOP_ASSERT(connect(sock, (sockaddr*) &addr, sizeof(addr)) == 0,\n                        string(\"problem connecting command socket: \") +\n                        strerror(errno));\n\n        }\n        if (sock != -1) {\n          int result = shutdown(sock, SHUT_RDWR);\n          HADOOP_ASSERT(result == 0, \"problem shutting socket\");\n          result = close(sock);\n          HADOOP_ASSERT(result == 0, \"problem closing socket\");\n        }\n        remaining_retries = MAX_RETRIES;\n      } catch (Error& err) {\n        if (!context->isDone()) {\n          fprintf(stderr, \"Hadoop Pipes Exception: in ping %s\\n\", \n                err.getMessage().c_str());\n          remaining_retries -= 1;\n          if (remaining_retries == 0) {\n            exit(1);\n          }\n        } else {\n          return NULL;\n        }\n      }\n    }\n    return NULL;\n  }\n\n  /**\n   * Run the assigned task in the framework.\n   * The user's main function should set the various functions using the \n   * set* functions above and then call this.\n   * @return true, if the task succeeded.\n   */\n  bool runTask(const Factory& factory) {\n    try {\n      TaskContextImpl* context = new TaskContextImpl(factory);\n      Protocol* connection;\n      char* portStr = getenv(\"mapreduce.pipes.command.port\");\n      int sock = -1;\n      FILE* stream = NULL;\n      FILE* outStream = NULL;\n      char *bufin = NULL;\n      char *bufout = NULL;\n      if (portStr) {\n        sock = socket(PF_INET, SOCK_STREAM, 0);\n        HADOOP_ASSERT(sock != - 1,\n                      string(\"problem creating socket: \") + strerror(errno));\n        sockaddr_in addr;\n        addr.sin_family = AF_INET;\n        addr.sin_port = htons(toInt(portStr));\n        addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);\n        HADOOP_ASSERT(connect(sock, (sockaddr*) &addr, sizeof(addr)) == 0,\n                      string(\"problem connecting command socket: \") +\n                      strerror(errno));\n\n        stream = fdopen(sock, \"r\");\n        outStream = fdopen(sock, \"w\");\n\n        // increase buffer size\n        int bufsize = 128*1024;\n        int setbuf;\n        bufin = new char[bufsize];\n        bufout = new char[bufsize];\n        setbuf = setvbuf(stream, bufin, _IOFBF, bufsize);\n        HADOOP_ASSERT(setbuf == 0, string(\"problem with setvbuf for inStream: \")\n                                     + strerror(errno));\n        setbuf = setvbuf(outStream, bufout, _IOFBF, bufsize);\n        HADOOP_ASSERT(setbuf == 0, string(\"problem with setvbuf for outStream: \")\n                                     + strerror(errno));\n        connection = new BinaryProtocol(stream, context, outStream);\n      } else if (getenv(\"mapreduce.pipes.commandfile\")) {\n        char* filename = getenv(\"mapreduce.pipes.commandfile\");\n        string outFilename = filename;\n        outFilename += \".out\";\n        stream = fopen(filename, \"r\");\n        outStream = fopen(outFilename.c_str(), \"w\");\n        connection = new BinaryProtocol(stream, context, outStream);\n      } else {\n        connection = new TextProtocol(stdin, context, stdout);\n      }\n      context->setProtocol(connection, connection->getUplink());\n      pthread_t pingThread;\n      pthread_create(&pingThread, NULL, ping, (void*)(context));\n      context->waitForTask();\n      while (!context->isDone()) {\n        context->nextKey();\n      }\n      context->closeAll();\n      connection->getUplink()->done();\n      pthread_join(pingThread,NULL);\n      delete context;\n      delete connection;\n      if (stream != NULL) {\n        fflush(stream);\n      }\n      if (outStream != NULL) {\n        fflush(outStream);\n      }\n      fflush(stdout);\n      if (sock != -1) {\n        int result = shutdown(sock, SHUT_RDWR);\n        HADOOP_ASSERT(result == 0, \"problem shutting socket\");\n        result = close(sock);\n        HADOOP_ASSERT(result == 0, \"problem closing socket\");\n      }\n      if (stream != NULL) {\n        //fclose(stream);\n      }\n      if (outStream != NULL) {\n        //fclose(outStream);\n      } \n      delete[] bufin;\n      delete[] bufout;\n      return true;\n    } catch (Error& err) {\n      fprintf(stderr, \"Hadoop Pipes Exception: %s\\n\", \n              err.getMessage().c_str());\n      return false;\n    }\n  }\n}\n\n"
  },
  {
    "path": "examples/c++/Makefile",
    "content": "# yum install openssl-devel\n\nCXXFLAGS := -pthread -g -pipe -Iinclude\nLDFLAGS := -pthread\nLDLIBS := -lcrypto\n\nall: wordcount\n\nwordcount: wordcount.o StringUtils.o SerialUtils.o HadoopPipes.o\n\t$(CXX) $(LDFLAGS) -o $@ $^ $(LDLIBS)\n"
  },
  {
    "path": "examples/c++/README.txt",
    "content": "C++ word count implementation, mostly for comparison purposes. Not run\ntogether with other examples and/or tests by default. Includes the C++\npipes source so that we can just build and link everything together\ninto the executable task implementation.\n\nRequirements: openssl dev version (e.g., yum install openssl-devel).\n\nNOTE: the map function splits input values on space chars, unlike the\nJava and Python versions, which split on multiple whitespace chars. This\ncan lead to a slightly different output, depending on the input text.\n"
  },
  {
    "path": "examples/c++/SerialUtils.cc",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n#include \"hadoop/SerialUtils.hh\"\n#include \"hadoop/StringUtils.hh\"\n\n#include <errno.h>\n#include <rpc/types.h>\n#include <rpc/xdr.h>\n#include <string>\n#include <string.h>\n\nusing std::string;\n\nnamespace HadoopUtils {\n\n  Error::Error(const std::string& msg): error(msg) {\n  }\n\n  Error::Error(const std::string& msg, \n               const std::string& file, int line, \n               const std::string& function) {\n    error = msg + \" at \" + file + \":\" + toString(line) + \n            \" in \" + function;\n  }\n\n  const std::string& Error::getMessage() const {\n    return error;\n  }\n\n  FileInStream::FileInStream()\n  {\n    mFile = NULL;\n    isOwned = false;\n  }\n\n  bool FileInStream::open(const std::string& name)\n  {\n    mFile = fopen(name.c_str(), \"rb\");\n    isOwned = true;\n    return (mFile != NULL);\n  }\n\n  bool FileInStream::open(FILE* file)\n  {\n    mFile = file;\n    isOwned = false;\n    return (mFile != NULL);\n  }\n\n  void FileInStream::read(void *buf, size_t len)\n  {\n    size_t result = fread(buf, len, 1, mFile);\n    if (result == 0) {\n      if (feof(mFile)) {\n        HADOOP_ASSERT(false, \"end of file\");\n      } else {\n        HADOOP_ASSERT(false, string(\"read error on file: \") + strerror(errno));\n      }\n    }\n  }\n\n  bool FileInStream::skip(size_t nbytes)\n  {\n    return (0==fseek(mFile, nbytes, SEEK_CUR));\n  }\n\n  bool FileInStream::close()\n  {\n    int ret = 0;\n    if (mFile != NULL && isOwned) {\n      ret = fclose(mFile);\n    }\n    mFile = NULL;\n    return (ret==0);\n  }\n\n  FileInStream::~FileInStream()\n  {\n    if (mFile != NULL) {\n      close();\n    }\n  }\n\n  FileOutStream::FileOutStream()\n  {\n    mFile = NULL;\n    isOwned = false;\n  }\n\n  bool FileOutStream::open(const std::string& name, bool overwrite)\n  {\n    if (!overwrite) {\n      mFile = fopen(name.c_str(), \"rb\");\n      if (mFile != NULL) {\n        fclose(mFile);\n        return false;\n      }\n    }\n    mFile = fopen(name.c_str(), \"wb\");\n    isOwned = true;\n    return (mFile != NULL);\n  }\n\n  bool FileOutStream::open(FILE* file)\n  {\n    mFile = file;\n    isOwned = false;\n    return (mFile != NULL);\n  }\n\n  void FileOutStream::write(const void* buf, size_t len)\n  {\n    size_t result = fwrite(buf, len, 1, mFile);\n    HADOOP_ASSERT(result == 1,\n                  string(\"write error to file: \") + strerror(errno));\n  }\n\n  bool FileOutStream::advance(size_t nbytes)\n  {\n    return (0==fseek(mFile, nbytes, SEEK_CUR));\n  }\n\n  bool FileOutStream::close()\n  {\n    int ret = 0;\n    if (mFile != NULL && isOwned) {\n      ret = fclose(mFile);\n    }\n    mFile = NULL;\n    return (ret == 0);\n  }\n\n  void FileOutStream::flush()\n  {\n    fflush(mFile);\n  }\n\n  FileOutStream::~FileOutStream()\n  {\n    if (mFile != NULL) {\n      close();\n    }\n  }\n\n  StringInStream::StringInStream(const std::string& str): buffer(str) {\n    itr = buffer.begin();\n  }\n\n  void StringInStream::read(void *buf, size_t buflen) {\n    size_t bytes = 0;\n    char* output = (char*) buf;\n    std::string::const_iterator end = buffer.end();\n    while (bytes < buflen) {\n      output[bytes++] = *itr;\n      ++itr;\n      if (itr == end) {\n        break;\n      }\n    }\n    HADOOP_ASSERT(bytes == buflen, \"unexpected end of string reached\");\n  }\n\n  void serializeInt(int32_t t, OutStream& stream) {\n    serializeLong(t,stream);\n  }\n\n  void serializeLong(int64_t t, OutStream& stream)\n  {\n    if (t >= -112 && t <= 127) {\n      int8_t b = t;\n      stream.write(&b, 1);\n      return;\n    }\n        \n    int8_t len = -112;\n    if (t < 0) {\n      t ^= -1ll; // reset the sign bit\n      len = -120;\n    }\n        \n    uint64_t tmp = t;\n    while (tmp != 0) {\n      tmp = tmp >> 8;\n      len--;\n    }\n  \n    stream.write(&len, 1);      \n    len = (len < -120) ? -(len + 120) : -(len + 112);\n        \n    for (uint32_t idx = len; idx != 0; idx--) {\n      uint32_t shiftbits = (idx - 1) * 8;\n      uint64_t mask = 0xFFll << shiftbits;\n      uint8_t b = (t & mask) >> shiftbits;\n      stream.write(&b, 1);\n    }\n  }\n\n  int32_t deserializeInt(InStream& stream) {\n    return deserializeLong(stream);\n  }\n\n  int64_t deserializeLong(InStream& stream)\n  {\n    int8_t b;\n    stream.read(&b, 1);\n    if (b >= -112) {\n      return b;\n    }\n    bool negative;\n    int len;\n    if (b < -120) {\n      negative = true;\n      len = -120 - b;\n    } else {\n      negative = false;\n      len = -112 - b;\n    }\n    uint8_t barr[len];\n    stream.read(barr, len);\n    int64_t t = 0;\n    for (int idx = 0; idx < len; idx++) {\n      t = t << 8;\n      t |= (barr[idx] & 0xFF);\n    }\n    if (negative) {\n      t ^= -1ll;\n    }\n    return t;\n  }\n\n  void serializeFloat(float t, OutStream& stream)\n  {\n    char buf[sizeof(float)];\n    XDR xdrs;\n    xdrmem_create(&xdrs, buf, sizeof(float), XDR_ENCODE);\n    xdr_float(&xdrs, &t);\n    stream.write(buf, sizeof(float));\n  }\n\n  float deserializeFloat(InStream& stream)\n  {\n    float f;\n    deserializeFloat(f, stream);\n    return f;\n  }\n\n  void deserializeFloat(float& t, InStream& stream)\n  {\n    char buf[sizeof(float)];\n    stream.read(buf, sizeof(float));\n    XDR xdrs;\n    xdrmem_create(&xdrs, buf, sizeof(float), XDR_DECODE);\n    xdr_float(&xdrs, &t);\n  }\n\n  void serializeString(const std::string& t, OutStream& stream)\n  {\n    serializeInt(t.length(), stream);\n    if (t.length() > 0) {\n      stream.write(t.data(), t.length());\n    }\n  }\n\n  void deserializeString(std::string& t, InStream& stream)\n  {\n    int32_t len = deserializeInt(stream);\n    if (len > 0) {\n      // resize the string to the right length\n      t.resize(len);\n      // read into the string in 64k chunks\n      const int bufSize = 65536;\n      int offset = 0;\n      char buf[bufSize];\n      while (len > 0) {\n        int chunkLength = len > bufSize ? bufSize : len;\n        stream.read(buf, chunkLength);\n        t.replace(offset, chunkLength, buf, chunkLength);\n        offset += chunkLength;\n        len -= chunkLength;\n      }\n    } else {\n      t.clear();\n    }\n  }\n\n}\n"
  },
  {
    "path": "examples/c++/StringUtils.cc",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n#include \"hadoop/StringUtils.hh\"\n#include \"hadoop/SerialUtils.hh\"\n\n#include <errno.h>\n#include <stdint.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <strings.h>\n#include <sys/time.h>\n\nusing std::string;\nusing std::vector;\n\nnamespace HadoopUtils {\n\n  string toString(int32_t x) {\n    char str[100];\n    sprintf(str, \"%d\", x);\n    return str;\n  }\n\n  int toInt(const string& val) {\n    int result;\n    char trash;\n    int num = sscanf(val.c_str(), \"%d%c\", &result, &trash);\n    HADOOP_ASSERT(num == 1,\n                  \"Problem converting \" + val + \" to integer.\");\n    return result;\n  }\n\n  float toFloat(const string& val) {\n    float result;\n    char trash;\n    int num = sscanf(val.c_str(), \"%f%c\", &result, &trash);\n    HADOOP_ASSERT(num == 1,\n                  \"Problem converting \" + val + \" to float.\");\n    return result;\n  }\n\n  bool toBool(const string& val) {\n    if (val == \"true\") {\n      return true;\n    } else if (val == \"false\") {\n      return false;\n    } else {\n      HADOOP_ASSERT(false,\n                    \"Problem converting \" + val + \" to boolean.\");\n    }\n  }\n\n  /**\n   * Get the current time in the number of milliseconds since 1970.\n   */\n  uint64_t getCurrentMillis() {\n    struct timeval tv;\n    struct timezone tz;\n    int sys = gettimeofday(&tv, &tz);\n    HADOOP_ASSERT(sys != -1, strerror(errno));\n    return tv.tv_sec * 1000 + tv.tv_usec / 1000;\n  }\n\n  vector<string> splitString(const std::string& str,\n\t\t\t     const char* separator) {\n    vector<string> result;\n    string::size_type prev_pos=0;\n    string::size_type pos=0;\n    while ((pos = str.find_first_of(separator, prev_pos)) != string::npos) {\n      if (prev_pos < pos) {\n\tresult.push_back(str.substr(prev_pos, pos-prev_pos));\n      }\n      prev_pos = pos + 1;\n    }\n    if (prev_pos < str.size()) {\n      result.push_back(str.substr(prev_pos));\n    }\n    return result;\n  }\n\n  string quoteString(const string& str,\n                     const char* deliminators) {\n    \n    string result(str);\n    for(int i=result.length() -1; i >= 0; --i) {\n      char ch = result[i];\n      if (!isprint(ch) ||\n          ch == '\\\\' || \n          strchr(deliminators, ch)) {\n        switch (ch) {\n        case '\\\\':\n          result.replace(i, 1, \"\\\\\\\\\");\n          break;\n        case '\\t':\n          result.replace(i, 1, \"\\\\t\");\n          break;\n        case '\\n':\n          result.replace(i, 1, \"\\\\n\");\n          break;\n        case ' ':\n          result.replace(i, 1, \"\\\\s\");\n          break;\n        default:\n          char buff[4];\n          sprintf(buff, \"\\\\%02x\", static_cast<unsigned char>(result[i]));\n          result.replace(i, 1, buff);\n        }\n      }\n    }\n    return result;\n  }\n\n  string unquoteString(const string& str) {\n    string result(str);\n    string::size_type current = result.find('\\\\');\n    while (current != string::npos) {\n      if (current + 1 < result.size()) {\n        char new_ch;\n        int num_chars;\n        if (isxdigit(result[current+1])) {\n          num_chars = 2;\n          HADOOP_ASSERT(current + num_chars < result.size(),\n                     \"escape pattern \\\\<hex><hex> is missing second digit in '\"\n                     + str + \"'\");\n          char sub_str[3];\n          sub_str[0] = result[current+1];\n          sub_str[1] = result[current+2];\n          sub_str[2] = '\\0';\n          char* end_ptr = NULL;\n          long int int_val = strtol(sub_str, &end_ptr, 16);\n          HADOOP_ASSERT(*end_ptr == '\\0' && int_val >= 0,\n                     \"escape pattern \\\\<hex><hex> is broken in '\" + str + \"'\");\n          new_ch = static_cast<char>(int_val);\n        } else {\n          num_chars = 1;\n          switch(result[current+1]) {\n          case '\\\\':\n            new_ch = '\\\\';\n            break;\n          case 't':\n            new_ch = '\\t';\n            break;\n          case 'n':\n            new_ch = '\\n';\n            break;\n          case 's':\n            new_ch = ' ';\n            break;\n          default:\n            string msg(\"unknow n escape character '\");\n            msg += result[current+1];\n            HADOOP_ASSERT(false, msg + \"' found in '\" + str + \"'\");\n          }\n        }\n        result.replace(current, 1 + num_chars, 1, new_ch);\n        current = result.find('\\\\', current+1);\n      } else {\n        HADOOP_ASSERT(false, \"trailing \\\\ in '\" + str + \"'\");\n      }\n    }\n    return result;\n  }\n\n}\n"
  },
  {
    "path": "examples/c++/include/hadoop/Pipes.hh",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n#ifndef HADOOP_PIPES_HH\n#define HADOOP_PIPES_HH\n\n#ifdef SWIG\n%module (directors=\"1\") HadoopPipes\n%include \"std_string.i\"\n%feature(\"director\") Mapper;\n%feature(\"director\") Reducer;\n%feature(\"director\") Partitioner;\n%feature(\"director\") RecordReader;\n%feature(\"director\") RecordWriter;\n%feature(\"director\") Factory;\n#else\n#include <string>\n#endif\n\n#include <stdint.h>\n\nnamespace HadoopPipes {\n\n/**\n * This interface defines the interface between application code and the \n * foreign code interface to Hadoop Map/Reduce.\n */\n\n/**\n * A JobConf defines the properties for a job.\n */\nclass JobConf {\npublic:\n  virtual bool hasKey(const std::string& key) const = 0;\n  virtual const std::string& get(const std::string& key) const = 0;\n  virtual int getInt(const std::string& key) const = 0;\n  virtual float getFloat(const std::string& key) const = 0;\n  virtual bool getBoolean(const std::string&key) const = 0;\n  virtual ~JobConf() {}\n};\n\n/**\n * Task context provides the information about the task and job.\n */\nclass TaskContext {\npublic:\n  /**\n   * Counter to keep track of a property and its value.\n   */\n  class Counter {\n  private:\n    int id;\n  public:\n    Counter(int counterId) : id(counterId) {}\n    Counter(const Counter& counter) : id(counter.id) {}\n\n    int getId() const { return id; }\n  };\n  \n  /**\n   * Get the JobConf for the current task.\n   */\n  virtual const JobConf* getJobConf() = 0;\n\n  /**\n   * Get the current key. \n   * @return the current key\n   */\n  virtual const std::string& getInputKey() = 0;\n\n  /**\n   * Get the current value. \n   * @return the current value\n   */\n  virtual const std::string& getInputValue() = 0;\n\n  /**\n   * Generate an output record\n   */\n  virtual void emit(const std::string& key, const std::string& value) = 0;\n\n  /**\n   * Mark your task as having made progress without changing the status \n   * message.\n   */\n  virtual void progress() = 0;\n\n  /**\n   * Set the status message and call progress.\n   */\n  virtual void setStatus(const std::string& status) = 0;\n\n  /**\n   * Register a counter with the given group and name.\n   */\n  virtual Counter* \n    getCounter(const std::string& group, const std::string& name) = 0;\n\n  /**\n   * Increment the value of the counter with the given amount.\n   */\n  virtual void incrementCounter(const Counter* counter, uint64_t amount) = 0;\n  \n  virtual ~TaskContext() {}\n};\n\nclass MapContext: public TaskContext {\npublic:\n\n  /**\n   * Access the InputSplit of the mapper.\n   */\n  virtual const std::string& getInputSplit() = 0;\n\n  /**\n   * Get the name of the key class of the input to this task.\n   */\n  virtual const std::string& getInputKeyClass() = 0;\n\n  /**\n   * Get the name of the value class of the input to this task.\n   */\n  virtual const std::string& getInputValueClass() = 0;\n\n};\n\nclass ReduceContext: public TaskContext {\npublic:\n  /**\n   * Advance to the next value.\n   */\n  virtual bool nextValue() = 0;\n};\n\nclass Closable {\npublic:\n  virtual void close() {}\n  virtual ~Closable() {}\n};\n\n/**\n * The application's mapper class to do map.\n */\nclass Mapper: public Closable {\npublic:\n  virtual void map(MapContext& context) = 0;\n};\n\n/**\n * The application's reducer class to do reduce.\n */\nclass Reducer: public Closable {\npublic:\n  virtual void reduce(ReduceContext& context) = 0;\n};\n\n/**\n * User code to decide where each key should be sent.\n */\nclass Partitioner {\npublic:\n  virtual int partition(const std::string& key, int numOfReduces) = 0;\n  virtual ~Partitioner() {}\n};\n\n/**\n * For applications that want to read the input directly for the map function\n * they can define RecordReaders in C++.\n */\nclass RecordReader: public Closable {\npublic:\n  virtual bool next(std::string& key, std::string& value) = 0;\n\n  /**\n   * The progress of the record reader through the split as a value between\n   * 0.0 and 1.0.\n   */\n  virtual float getProgress() = 0;\n};\n\n/**\n * An object to write key/value pairs as they are emited from the reduce.\n */\nclass RecordWriter: public Closable {\npublic:\n  virtual void emit(const std::string& key,\n                    const std::string& value) = 0;\n};\n\n/**\n * A factory to create the necessary application objects.\n */\nclass Factory {\npublic:\n  virtual Mapper* createMapper(MapContext& context) const = 0;\n  virtual Reducer* createReducer(ReduceContext& context) const = 0;\n\n  /**\n   * Create a combiner, if this application has one.\n   * @return the new combiner or NULL, if one is not needed\n   */\n  virtual Reducer* createCombiner(MapContext& context) const {\n    return NULL; \n  }\n\n  /**\n   * Create an application partitioner object.\n   * @return the new partitioner or NULL, if the default partitioner should be \n   *     used.\n   */\n  virtual Partitioner* createPartitioner(MapContext& context) const {\n    return NULL;\n  }\n\n  /**\n   * Create an application record reader.\n   * @return the new RecordReader or NULL, if the Java RecordReader should be\n   *    used.\n   */\n  virtual RecordReader* createRecordReader(MapContext& context) const {\n    return NULL; \n  }\n\n  /**\n   * Create an application record writer.\n   * @return the new RecordWriter or NULL, if the Java RecordWriter should be\n   *    used.\n   */\n  virtual RecordWriter* createRecordWriter(ReduceContext& context) const {\n    return NULL;\n  }\n\n  virtual ~Factory() {}\n};\n\n/**\n * Run the assigned task in the framework.\n * The user's main function should set the various functions using the \n * set* functions above and then call this.\n * @return true, if the task succeeded.\n */\nbool runTask(const Factory& factory);\n\n}\n\n#endif\n"
  },
  {
    "path": "examples/c++/include/hadoop/SerialUtils.hh",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n#ifndef HADOOP_SERIAL_UTILS_HH\n#define HADOOP_SERIAL_UTILS_HH\n\n#include <string>\n#include <stdint.h>\n\nnamespace HadoopUtils {\n\n  /**\n   * A simple exception class that records a message for the user.\n   */\n  class Error {\n  private:\n    std::string error;\n  public:\n\n    /**\n     * Create an error object with the given message.\n     */\n    Error(const std::string& msg);\n\n    /**\n     * Construct an error object with the given message that was created on\n     * the given file, line, and functino.\n     */\n    Error(const std::string& msg, \n          const std::string& file, int line, const std::string& function);\n\n    /**\n     * Get the error message.\n     */\n    const std::string& getMessage() const;\n  };\n\n  /**\n   * Check to make sure that the condition is true, and throw an exception\n   * if it is not. The exception will contain the message and a description\n   * of the source location.\n   */\n  #define HADOOP_ASSERT(CONDITION, MESSAGE) \\\n    { \\\n      if (!(CONDITION)) { \\\n        throw HadoopUtils::Error((MESSAGE), __FILE__, __LINE__, \\\n                                    __func__); \\\n      } \\\n    }\n\n  /**\n   * An interface for an input stream.\n   */\n  class InStream {\n  public:\n    /**\n     * Reads len bytes from the stream into the buffer.\n     * @param buf the buffer to read into\n     * @param buflen the length of the buffer\n     * @throws Error if there are problems reading\n     */\n    virtual void read(void *buf, size_t len) = 0;\n    virtual ~InStream() {}\n  };\n\n  /**\n   * An interface for an output stream.\n   */\n  class OutStream {\n  public:\n    /**\n     * Write the given buffer to the stream.\n     * @param buf the data to write\n     * @param len the number of bytes to write\n     * @throws Error if there are problems writing\n     */\n    virtual void write(const void *buf, size_t len) = 0;\n    /**\n     * Flush the data to the underlying store.\n     */\n    virtual void flush() = 0;\n    virtual ~OutStream() {}\n  };\n\n  /**\n   * A class to read a file as a stream.\n   */\n  class FileInStream : public InStream {\n  public:\n    FileInStream();\n    bool open(const std::string& name);\n    bool open(FILE* file);\n    void read(void *buf, size_t buflen);\n    bool skip(size_t nbytes);\n    bool close();\n    virtual ~FileInStream();\n  private:\n    /**\n     * The file to write to.\n     */\n    FILE *mFile;\n    /**\n     * Does is this class responsible for closing the FILE*?\n     */\n    bool isOwned;\n  };\n\n  /**\n   * A class to write a stream to a file.\n   */\n  class FileOutStream: public OutStream {\n  public:\n\n    /**\n     * Create a stream that isn't bound to anything.\n     */\n    FileOutStream();\n\n    /**\n     * Create the given file, potentially overwriting an existing file.\n     */\n    bool open(const std::string& name, bool overwrite);\n    bool open(FILE* file);\n    void write(const void* buf, size_t len);\n    bool advance(size_t nbytes);\n    void flush();\n    bool close();\n    virtual ~FileOutStream();\n  private:\n    FILE *mFile;\n    bool isOwned;\n  };\n\n  /**\n   * A stream that reads from a string.\n   */\n  class StringInStream: public InStream {\n  public:\n    StringInStream(const std::string& str);\n    virtual void read(void *buf, size_t buflen);\n  private:\n    const std::string& buffer;\n    std::string::const_iterator itr;\n  };\n\n  void serializeInt(int32_t t, OutStream& stream);\n  int32_t deserializeInt(InStream& stream);\n  void serializeLong(int64_t t, OutStream& stream);\n  int64_t deserializeLong(InStream& stream);\n  void serializeFloat(float t, OutStream& stream);\n  void deserializeFloat(float& t, InStream& stream);\n  float deserializeFloat(InStream& stream);\n  void serializeString(const std::string& t, OutStream& stream);\n  void deserializeString(std::string& t, InStream& stream);\n}\n\n#endif\n"
  },
  {
    "path": "examples/c++/include/hadoop/StringUtils.hh",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n#ifndef HADOOP_STRING_UTILS_HH\n#define HADOOP_STRING_UTILS_HH\n\n#include <stdint.h>\n#include <string>\n#include <vector>\n\nnamespace HadoopUtils {\n\n  /**\n   * Convert an integer to a string.\n   */\n  std::string toString(int32_t x);\n\n  /**\n   * Convert a string to an integer.\n   * @throws Error if the string is not a valid integer\n   */\n  int32_t toInt(const std::string& val);\n\n  /**\n   * Convert the string to a float.\n   * @throws Error if the string is not a valid float\n   */\n  float toFloat(const std::string& val);\n\n  /**\n   * Convert the string to a boolean.\n   * @throws Error if the string is not a valid boolean value\n   */\n  bool toBool(const std::string& val);\n\n  /**\n   * Get the current time in the number of milliseconds since 1970.\n   */\n  uint64_t getCurrentMillis();\n\n  /**\n   * Split a string into \"words\". Multiple deliminators are treated as a single\n   * word break, so no zero-length words are returned.\n   * @param str the string to split\n   * @param separator a list of characters that divide words\n   */\n  std::vector<std::string> splitString(const std::string& str,\n                                       const char* separator);\n\n  /**\n   * Quote a string to avoid \"\\\", non-printable characters, and the \n   * deliminators.\n   * @param str the string to quote\n   * @param deliminators the set of characters to always quote\n   */\n  std::string quoteString(const std::string& str,\n                          const char* deliminators);\n\n  /**\n   * Unquote the given string to return the original string.\n   * @param str the string to unquote\n   */\n  std::string unquoteString(const std::string& str);\n\n}\n\n#endif\n"
  },
  {
    "path": "examples/c++/include/hadoop/TemplateFactory.hh",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n#ifndef HADOOP_PIPES_TEMPLATE_FACTORY_HH\n#define HADOOP_PIPES_TEMPLATE_FACTORY_HH\n\nnamespace HadoopPipes {\n\n  template <class mapper, class reducer>\n  class TemplateFactory2: public Factory {\n  public:\n    Mapper* createMapper(MapContext& context) const {\n      return new mapper(context);\n    }\n    Reducer* createReducer(ReduceContext& context) const {\n      return new reducer(context);\n    }\n  };\n\n  template <class mapper, class reducer, class partitioner>\n  class TemplateFactory3: public TemplateFactory2<mapper,reducer> {\n  public:\n    Partitioner* createPartitioner(MapContext& context) const {\n      return new partitioner(context);\n    }\n  };\n\n  template <class mapper, class reducer>\n  class TemplateFactory3<mapper, reducer, void>\n      : public TemplateFactory2<mapper,reducer> {\n  };\n\n  template <class mapper, class reducer, class partitioner, class combiner>\n  class TemplateFactory4\n   : public TemplateFactory3<mapper,reducer,partitioner>{\n  public:\n    Reducer* createCombiner(MapContext& context) const {\n      return new combiner(context);\n    }\n  };\n\n  template <class mapper, class reducer, class partitioner>\n  class TemplateFactory4<mapper,reducer,partitioner,void>\n   : public TemplateFactory3<mapper,reducer,partitioner>{\n  };\n\n  template <class mapper, class reducer, class partitioner, \n            class combiner, class recordReader>\n  class TemplateFactory5\n   : public TemplateFactory4<mapper,reducer,partitioner,combiner>{\n  public:\n    RecordReader* createRecordReader(MapContext& context) const {\n      return new recordReader(context);\n    }\n  };\n\n  template <class mapper, class reducer, class partitioner,class combiner>\n  class TemplateFactory5<mapper,reducer,partitioner,combiner,void>\n   : public TemplateFactory4<mapper,reducer,partitioner,combiner>{\n  };\n\n  template <class mapper, class reducer, class partitioner=void, \n            class combiner=void, class recordReader=void, \n            class recordWriter=void> \n  class TemplateFactory\n   : public TemplateFactory5<mapper,reducer,partitioner,combiner,recordReader>{\n  public:\n    RecordWriter* createRecordWriter(ReduceContext& context) const {\n      return new recordWriter(context);\n    }\n  };\n\n  template <class mapper, class reducer, class partitioner, \n            class combiner, class recordReader>\n  class TemplateFactory<mapper, reducer, partitioner, combiner, recordReader, \n                        void>\n   : public TemplateFactory5<mapper,reducer,partitioner,combiner,recordReader>{\n  };\n\n}\n\n#endif\n"
  },
  {
    "path": "examples/c++/wordcount.cc",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\n#include <iostream>\n#include <sstream>\n#include <stdexcept>\n#include <string>\n\n#include \"hadoop/Pipes.hh\"\n#include \"hadoop/TemplateFactory.hh\"\n#include \"hadoop/StringUtils.hh\"\n\n# define INT64_SIZE sizeof(int64_t)\n\n\nint64_t deserializeLongWritable(std::string s) {\n  int64_t rval = 0;\n  if (s.size() < INT64_SIZE) {\n    throw std::invalid_argument(\"not enough bytes\");\n  }\n  for (std::size_t i = 0; i < INT64_SIZE; ++i) {\n    rval = (rval << INT64_SIZE) | static_cast<unsigned char>(s[i]);\n  }\n  return rval;\n}\n\n\nclass Mapper: public HadoopPipes::Mapper {\n\npublic:\n  Mapper(HadoopPipes::TaskContext &context) { }\n\n  void map(HadoopPipes::MapContext &context) {\n    int64_t key = deserializeLongWritable(context.getInputKey());\n    std::cerr << \"key (ignored): \" << key << \"\\n\";\n    std::stringstream ss(context.getInputValue());\n    std::string item;\n    while (std::getline(ss, item, ' ')) {\n      context.emit(item, \"1\");\n    }\n  }\n\n};\n\n\nclass Reducer: public HadoopPipes::Reducer {\n\npublic:\n  Reducer(HadoopPipes::TaskContext &context) { }\n\n  void reduce(HadoopPipes::ReduceContext &context) {\n    int sum = 0;\n    while (context.nextValue()) {\n      sum += HadoopUtils::toInt(context.getInputValue());\n    }\n    context.emit(context.getInputKey(), HadoopUtils::toString(sum));\n  }\n};\n\n\nint main(int argc, char *argv[]) {\n  return HadoopPipes::runTask(HadoopPipes::TemplateFactory<Mapper, Reducer>());\n}\n"
  },
  {
    "path": "examples/config.sh",
    "content": "[ -n \"${PYDOOP_EXAMPLES:-}\" ] && return || readonly PYDOOP_EXAMPLES=1\n\ndie() {\n    echo $1 1>&2\n    exit 1\n}\n\nexport USER=\"${USER:-$(whoami)}\"\nexport HADOOP=\"${HADOOP:-hadoop}\"\nexport HDFS=\"${HDFS:-hdfs}\"\nexport MAPRED=\"${MAPRED:-mapred}\"\nexport YARN=\"${YARN:-yarn}\"\nexport PYTHON=\"${PYTHON:-python}\"\nexport PY_VER=$(\"${PYTHON}\" -c 'import sys; print(sys.version_info[0])')\nexport PYDOOP=\"pydoop${PY_VER}\"\n\nensure_dfs_home() {\n    ${HDFS} dfs -mkdir -p /user/${USER}\n}\n\nhadoop_fs() {\n    ${HDFS} getconf -confKey fs.defaultFS | cut -d : -f 1\n}\n\nexport -f die ensure_dfs_home hadoop_fs\n"
  },
  {
    "path": "examples/hdfs/common.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport os\n\nMB = 2**20\nTEST_ROOT = os.getenv(\"TEST_ROOT\", \"pydoop_test_tree\")\n\n\ndef isdir(fs, d):\n    try:\n        info = fs.get_path_info(d)\n    except IOError:\n        return False\n    return info['kind'] == 'directory'\n"
  },
  {
    "path": "examples/hdfs/repl_session.py",
    "content": "\"\"\"\\\n# DOCS_INCLUDE_START\n>>> import pydoop.hdfs as hdfs\n>>> hdfs.mkdir('test')\n>>> hdfs.dump('hello, world', 'test/hello.txt')\n>>> hdfs.load('test/hello.txt')\nb'hello, world'\n>>> hdfs.load('test/hello.txt', mode='rt')\n'hello, world'\n>>> [hdfs.path.basename(_) for _ in hdfs.ls('test')]\n['hello.txt']\n>>> hdfs.stat('test/hello.txt').st_size\n12\n>>> hdfs.path.isdir('test')\nTrue\n>>> hdfs.path.isfile('test')\nFalse\n>>> hdfs.path.basename('test/hello.txt')\n'hello.txt'\n>>> hdfs.cp('test', 'test.copy')\n>>> [hdfs.path.basename(_) for _ in hdfs.ls('test.copy')]\n['hello.txt']\n>>> hdfs.get('test/hello.txt', '/tmp/hello.txt')\n>>> with open('/tmp/hello.txt') as f:\n...     f.read()\n...\n'hello, world'\n>>> hdfs.put('/tmp/hello.txt', 'test.copy/hello.txt.copy')\n>>> for x in sorted(hdfs.ls('test.copy')): print(repr(hdfs.path.basename(x)))\n...\n'hello.txt'\n'hello.txt.copy'\n>>> with hdfs.open('test/hello.txt', 'r') as fi:\n...     fi.read(3)\n...\nb'hel'\n>>> with hdfs.open('test/hello.txt', 'rt') as fi:\n...     fi.read(3)\n...\n'hel'\n\n# DOCS_INCLUDE_END\n\"\"\"\n\n\ndef clean():\n    for path in \"test\", \"test.copy\", \"file:/tmp/hello.txt\":\n        try:\n            hdfs.rm(path)\n        except OSError:\n            pass\n\n\nif __name__ == \"__main__\":\n    import doctest\n    import pydoop.hdfs as hdfs\n    clean()\n    doctest.testmod(verbose=True)\n    clean()\n"
  },
  {
    "path": "examples/hdfs/run",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n# \n# Copyright 2009-2026 CRS4.\n# \n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n# \n#   http://www.apache.org/licenses/LICENSE-2.0\n# \n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n# \n# END_COPYRIGHT\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\nexport TEST_ROOT=\"${TEST_ROOT:-pydoop_test_tree}\"\nDEPTH=${1:-3}\nSPAN=${2:-4}\n\nif [ \"$(hadoop_fs)\" != \"file\" ]; then\n    echo \"Waiting for HDFS to exit safe mode...\"\n    \"${HDFS}\" dfsadmin -safemode wait\nfi\n\nWD=$(mktemp -d)\npushd \"${WD}\"\n\necho \"Generating tree (depth=${DEPTH}, span=${SPAN})...\"\n\"${PYTHON}\" \"${this_dir}\"/treegen.py ${DEPTH} ${SPAN}\n\necho \"Computing usage by block size...\"\n\"${PYTHON}\" \"${this_dir}\"/treewalk.py\n\necho \"Cleaning up...\"\n${HDFS} dfs -rm -r -f \"${TEST_ROOT}\"\n\nif (( ${PY_VER} >= 3 )); then\n    echo \"Checking REPL example...\"\n    ${PYTHON} \"${this_dir}\"/repl_session.py\nfi\n\npopd\nrm -rf \"${WD}\"\n"
  },
  {
    "path": "examples/hdfs/treegen.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nGenerate an HDFS tree containing files of different block size.\n\"\"\"\n\nimport sys\nimport random\n\nimport pydoop.hdfs as hdfs\n\nfrom common import isdir, MB, TEST_ROOT\n\n\nBS_RANGE = [_ * MB for _ in range(50, 101, 10)]\n\n\ndef treegen(fs, root, depth, span):\n    if isdir(fs, root) and depth > 0:\n        for i in range(span):\n            path = u\"%s/%d_%d\" % (root, depth, i)\n            kind = 'file' if i else 'directory'\n            if kind == 'file':\n                bs = random.sample(BS_RANGE, 1)[0]\n                sys.stderr.write(\n                    \"%s %s %d\\n\" % (kind[0].upper(), path, (bs / MB))\n                )\n                with fs.open_file(path, \"wt\", blocksize=bs) as f:\n                    f.write(path)\n            else:\n                sys.stderr.write(\"%s %s 0\\n\" % (kind[0].upper(), path))\n                fs.create_directory(path)\n                treegen(fs, path, depth - 1, span)\n\n\ndef main(argv):\n\n    try:\n        depth = int(argv[1])\n        span = int(argv[2])\n    except IndexError:\n        print(\"Usage: python %s DEPTH SPAN\" % argv[0])\n        sys.exit(2)\n\n    fs = hdfs.hdfs()\n    try:\n        root = \"%s/%s\" % (fs.working_directory(), TEST_ROOT)\n        try:\n            fs.delete(root)\n        except IOError:\n            pass\n        fs.create_directory(root)\n        treegen(fs, root, depth, span)\n    finally:\n        fs.close()\n\n\nif __name__ == \"__main__\":\n    main(sys.argv)\n"
  },
  {
    "path": "examples/hdfs/treewalk.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nTraverse an HDFS tree and output disk space usage by block size.\n\"\"\"\n# DOCS_INCLUDE_START\nimport pydoop.hdfs as hdfs\nfrom common import MB, TEST_ROOT\n\n\ndef usage_by_bs(fs, root):\n    stats = {}\n    for info in fs.walk(root):\n        if info['kind'] == 'directory':\n            continue\n        bs = int(info['block_size'])\n        size = int(info['size'])\n        stats[bs] = stats.get(bs, 0) + size\n    return stats\n\n\nif __name__ == \"__main__\":\n    with hdfs.hdfs() as fs:\n        root = \"%s/%s\" % (fs.working_directory(), TEST_ROOT)\n        print(\"BS(MB)\\tBYTES\")\n        for k, v in usage_by_bs(fs, root).items():\n            print(\"%.1f\\t%d\" % (k / float(MB), v))\n"
  },
  {
    "path": "examples/input/alice_1.txt",
    "content": "Project Gutenberg's Alice's Adventures in Wonderland, by Lewis Carroll\r\n\r\nThis eBook is for the use of anyone anywhere at no cost and with\r\nalmost no restrictions whatsoever.  You may copy it, give it away or\r\nre-use it under the terms of the Project Gutenberg License included\r\nwith this eBook or online at www.gutenberg.org\r\n\r\n\r\nTitle: Alice's Adventures in Wonderland\r\n\r\nAuthor: Lewis Carroll\r\n\r\nPosting Date: June 25, 2008 [EBook #11]\r\nRelease Date: March, 1994\r\n\r\nLanguage: English\r\n\r\nCharacter set encoding: ASCII\r\n\r\n*** START OF THIS PROJECT GUTENBERG EBOOK ALICE'S ADVENTURES IN WONDERLAND ***\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\nALICE'S ADVENTURES IN WONDERLAND\r\n\r\nLewis Carroll\r\n\r\nTHE MILLENNIUM FULCRUM EDITION 3.0\r\n\r\n\r\n\r\n\r\nCHAPTER I. Down the Rabbit-Hole\r\n\r\nAlice was beginning to get very tired of sitting by her sister on the\r\nbank, and of having nothing to do: once or twice she had peeped into the\r\nbook her sister was reading, but it had no pictures or conversations in\r\nit, 'and what is the use of a book,' thought Alice 'without pictures or\r\nconversation?'\r\n\r\nSo she was considering in her own mind (as well as she could, for the\r\nhot day made her feel very sleepy and stupid), whether the pleasure\r\nof making a daisy-chain would be worth the trouble of getting up and\r\npicking the daisies, when suddenly a White Rabbit with pink eyes ran\r\nclose by her.\r\n\r\nThere was nothing so VERY remarkable in that; nor did Alice think it so\r\nVERY much out of the way to hear the Rabbit say to itself, 'Oh dear!\r\nOh dear! I shall be late!' (when she thought it over afterwards, it\r\noccurred to her that she ought to have wondered at this, but at the time\r\nit all seemed quite natural); but when the Rabbit actually TOOK A WATCH\r\nOUT OF ITS WAISTCOAT-POCKET, and looked at it, and then hurried on,\r\nAlice started to her feet, for it flashed across her mind that she had\r\nnever before seen a rabbit with either a waistcoat-pocket, or a watch\r\nto take out of it, and burning with curiosity, she ran across the field\r\nafter it, and fortunately was just in time to see it pop down a large\r\nrabbit-hole under the hedge.\r\n\r\nIn another moment down went Alice after it, never once considering how\r\nin the world she was to get out again.\r\n\r\nThe rabbit-hole went straight on like a tunnel for some way, and then\r\ndipped suddenly down, so suddenly that Alice had not a moment to think\r\nabout stopping herself before she found herself falling down a very deep\r\nwell.\r\n\r\nEither the well was very deep, or she fell very slowly, for she had\r\nplenty of time as she went down to look about her and to wonder what was\r\ngoing to happen next. First, she tried to look down and make out what\r\nshe was coming to, but it was too dark to see anything; then she\r\nlooked at the sides of the well, and noticed that they were filled with\r\ncupboards and book-shelves; here and there she saw maps and pictures\r\nhung upon pegs. She took down a jar from one of the shelves as\r\nshe passed; it was labelled 'ORANGE MARMALADE', but to her great\r\ndisappointment it was empty: she did not like to drop the jar for fear\r\nof killing somebody, so managed to put it into one of the cupboards as\r\nshe fell past it.\r\n\r\n'Well!' thought Alice to herself, 'after such a fall as this, I shall\r\nthink nothing of tumbling down stairs! How brave they'll all think me at\r\nhome! Why, I wouldn't say anything about it, even if I fell off the top\r\nof the house!' (Which was very likely true.)\r\n\r\nDown, down, down. Would the fall NEVER come to an end! 'I wonder how\r\nmany miles I've fallen by this time?' she said aloud. 'I must be getting\r\nsomewhere near the centre of the earth. Let me see: that would be four\r\nthousand miles down, I think--' (for, you see, Alice had learnt several\r\nthings of this sort in her lessons in the schoolroom, and though this\r\nwas not a VERY good opportunity for showing off her knowledge, as there\r\nwas no one to listen to her, still it was good practice to say it over)\r\n'--yes, that's about the right distance--but then I wonder what Latitude\r\nor Longitude I've got to?' (Alice had no idea what Latitude was, or\r\nLongitude either, but thought they were nice grand words to say.)\r\n\r\nPresently she began again. 'I wonder if I shall fall right THROUGH the\r\nearth! How funny it'll seem to come out among the people that walk with\r\ntheir heads downward! The Antipathies, I think--' (she was rather glad\r\nthere WAS no one listening, this time, as it didn't sound at all the\r\nright word) '--but I shall have to ask them what the name of the country\r\nis, you know. Please, Ma'am, is this New Zealand or Australia?' (and\r\nshe tried to curtsey as she spoke--fancy CURTSEYING as you're falling\r\nthrough the air! Do you think you could manage it?) 'And what an\r\nignorant little girl she'll think me for asking! No, it'll never do to\r\nask: perhaps I shall see it written up somewhere.'\r\n\r\nDown, down, down. There was nothing else to do, so Alice soon began\r\ntalking again. 'Dinah'll miss me very much to-night, I should think!'\r\n(Dinah was the cat.) 'I hope they'll remember her saucer of milk at\r\ntea-time. Dinah my dear! I wish you were down here with me! There are no\r\nmice in the air, I'm afraid, but you might catch a bat, and that's very\r\nlike a mouse, you know. But do cats eat bats, I wonder?' And here Alice\r\nbegan to get rather sleepy, and went on saying to herself, in a dreamy\r\nsort of way, 'Do cats eat bats? Do cats eat bats?' and sometimes, 'Do\r\nbats eat cats?' for, you see, as she couldn't answer either question,\r\nit didn't much matter which way she put it. She felt that she was dozing\r\noff, and had just begun to dream that she was walking hand in hand with\r\nDinah, and saying to her very earnestly, 'Now, Dinah, tell me the truth:\r\ndid you ever eat a bat?' when suddenly, thump! thump! down she came upon\r\na heap of sticks and dry leaves, and the fall was over.\r\n\r\nAlice was not a bit hurt, and she jumped up on to her feet in a moment:\r\nshe looked up, but it was all dark overhead; before her was another\r\nlong passage, and the White Rabbit was still in sight, hurrying down it.\r\nThere was not a moment to be lost: away went Alice like the wind, and\r\nwas just in time to hear it say, as it turned a corner, 'Oh my ears\r\nand whiskers, how late it's getting!' She was close behind it when she\r\nturned the corner, but the Rabbit was no longer to be seen: she found\r\nherself in a long, low hall, which was lit up by a row of lamps hanging\r\nfrom the roof.\r\n\r\nThere were doors all round the hall, but they were all locked; and when\r\nAlice had been all the way down one side and up the other, trying every\r\ndoor, she walked sadly down the middle, wondering how she was ever to\r\nget out again.\r\n\r\nSuddenly she came upon a little three-legged table, all made of solid\r\nglass; there was nothing on it except a tiny golden key, and Alice's\r\nfirst thought was that it might belong to one of the doors of the hall;\r\nbut, alas! either the locks were too large, or the key was too small,\r\nbut at any rate it would not open any of them. However, on the second\r\ntime round, she came upon a low curtain she had not noticed before, and\r\nbehind it was a little door about fifteen inches high: she tried the\r\nlittle golden key in the lock, and to her great delight it fitted!\r\n\r\nAlice opened the door and found that it led into a small passage, not\r\nmuch larger than a rat-hole: she knelt down and looked along the passage\r\ninto the loveliest garden you ever saw. How she longed to get out of\r\nthat dark hall, and wander about among those beds of bright flowers and\r\nthose cool fountains, but she could not even get her head through the\r\ndoorway; 'and even if my head would go through,' thought poor Alice, 'it\r\nwould be of very little use without my shoulders. Oh, how I wish I could\r\nshut up like a telescope! I think I could, if I only know how to begin.'\r\nFor, you see, so many out-of-the-way things had happened lately,\r\nthat Alice had begun to think that very few things indeed were really\r\nimpossible.\r\n\r\nThere seemed to be no use in waiting by the little door, so she went\r\nback to the table, half hoping she might find another key on it, or at\r\nany rate a book of rules for shutting people up like telescopes: this\r\ntime she found a little bottle on it, ('which certainly was not here\r\nbefore,' said Alice,) and round the neck of the bottle was a paper\r\nlabel, with the words 'DRINK ME' beautifully printed on it in large\r\nletters.\r\n\r\nIt was all very well to say 'Drink me,' but the wise little Alice was\r\nnot going to do THAT in a hurry. 'No, I'll look first,' she said, 'and\r\nsee whether it's marked \"poison\" or not'; for she had read several nice\r\nlittle histories about children who had got burnt, and eaten up by wild\r\nbeasts and other unpleasant things, all because they WOULD not remember\r\nthe simple rules their friends had taught them: such as, that a red-hot\r\npoker will burn you if you hold it too long; and that if you cut your\r\nfinger VERY deeply with a knife, it usually bleeds; and she had never\r\nforgotten that, if you drink much from a bottle marked 'poison,' it is\r\nalmost certain to disagree with you, sooner or later.\r\n\r\nHowever, this bottle was NOT marked 'poison,' so Alice ventured to taste\r\nit, and finding it very nice, (it had, in fact, a sort of mixed flavour\r\nof cherry-tart, custard, pine-apple, roast turkey, toffee, and hot\r\nbuttered toast,) she very soon finished it off.\r\n\r\n  *    *    *    *    *    *    *\r\n\r\n    *    *    *    *    *    *\r\n\r\n  *    *    *    *    *    *    *\r\n\r\n'What a curious feeling!' said Alice; 'I must be shutting up like a\r\ntelescope.'\r\n\r\nAnd so it was indeed: she was now only ten inches high, and her face\r\nbrightened up at the thought that she was now the right size for going\r\nthrough the little door into that lovely garden. First, however, she\r\nwaited for a few minutes to see if she was going to shrink any further:\r\nshe felt a little nervous about this; 'for it might end, you know,' said\r\nAlice to herself, 'in my going out altogether, like a candle. I wonder\r\nwhat I should be like then?' And she tried to fancy what the flame of a\r\ncandle is like after the candle is blown out, for she could not remember\r\never having seen such a thing.\r\n\r\nAfter a while, finding that nothing more happened, she decided on going\r\ninto the garden at once; but, alas for poor Alice! when she got to the\r\ndoor, she found she had forgotten the little golden key, and when she\r\nwent back to the table for it, she found she could not possibly reach\r\nit: she could see it quite plainly through the glass, and she tried her\r\nbest to climb up one of the legs of the table, but it was too slippery;\r\nand when she had tired herself out with trying, the poor little thing\r\nsat down and cried.\r\n\r\n'Come, there's no use in crying like that!' said Alice to herself,\r\nrather sharply; 'I advise you to leave off this minute!' She generally\r\ngave herself very good advice, (though she very seldom followed it),\r\nand sometimes she scolded herself so severely as to bring tears into\r\nher eyes; and once she remembered trying to box her own ears for having\r\ncheated herself in a game of croquet she was playing against herself,\r\nfor this curious child was very fond of pretending to be two people.\r\n'But it's no use now,' thought poor Alice, 'to pretend to be two people!\r\nWhy, there's hardly enough of me left to make ONE respectable person!'\r\n\r\nSoon her eye fell on a little glass box that was lying under the table:\r\nshe opened it, and found in it a very small cake, on which the words\r\n'EAT ME' were beautifully marked in currants. 'Well, I'll eat it,' said\r\nAlice, 'and if it makes me grow larger, I can reach the key; and if it\r\nmakes me grow smaller, I can creep under the door; so either way I'll\r\nget into the garden, and I don't care which happens!'\r\n\r\nShe ate a little bit, and said anxiously to herself, 'Which way? Which\r\nway?', holding her hand on the top of her head to feel which way it was\r\ngrowing, and she was quite surprised to find that she remained the same\r\nsize: to be sure, this generally happens when one eats cake, but Alice\r\nhad got so much into the way of expecting nothing but out-of-the-way\r\nthings to happen, that it seemed quite dull and stupid for life to go on\r\nin the common way.\r\n\r\nSo she set to work, and very soon finished off the cake.\r\n\r\n  *    *    *    *    *    *    *\r\n\r\n    *    *    *    *    *    *\r\n\r\n  *    *    *    *    *    *    *\r\n\r\n\r\n\r\n\r\nCHAPTER II. The Pool of Tears\r\n\r\n'Curiouser and curiouser!' cried Alice (she was so much surprised, that\r\nfor the moment she quite forgot how to speak good English); 'now I'm\r\nopening out like the largest telescope that ever was! Good-bye, feet!'\r\n(for when she looked down at her feet, they seemed to be almost out of\r\nsight, they were getting so far off). 'Oh, my poor little feet, I wonder\r\nwho will put on your shoes and stockings for you now, dears? I'm sure\r\n_I_ shan't be able! I shall be a great deal too far off to trouble\r\nmyself about you: you must manage the best way you can;--but I must be\r\nkind to them,' thought Alice, 'or perhaps they won't walk the way I want\r\nto go! Let me see: I'll give them a new pair of boots every Christmas.'\r\n\r\nAnd she went on planning to herself how she would manage it. 'They must\r\ngo by the carrier,' she thought; 'and how funny it'll seem, sending\r\npresents to one's own feet! And how odd the directions will look!\r\n\r\n     ALICE'S RIGHT FOOT, ESQ.\r\n       HEARTHRUG,\r\n         NEAR THE FENDER,\r\n           (WITH ALICE'S LOVE).\r\n\r\nOh dear, what nonsense I'm talking!'\r\n\r\nJust then her head struck against the roof of the hall: in fact she was\r\nnow more than nine feet high, and she at once took up the little golden\r\nkey and hurried off to the garden door.\r\n\r\nPoor Alice! It was as much as she could do, lying down on one side, to\r\nlook through into the garden with one eye; but to get through was more\r\nhopeless than ever: she sat down and began to cry again.\r\n\r\n'You ought to be ashamed of yourself,' said Alice, 'a great girl like\r\nyou,' (she might well say this), 'to go on crying in this way! Stop this\r\nmoment, I tell you!' But she went on all the same, shedding gallons of\r\ntears, until there was a large pool all round her, about four inches\r\ndeep and reaching half down the hall.\r\n\r\nAfter a time she heard a little pattering of feet in the distance, and\r\nshe hastily dried her eyes to see what was coming. It was the White\r\nRabbit returning, splendidly dressed, with a pair of white kid gloves in\r\none hand and a large fan in the other: he came trotting along in a great\r\nhurry, muttering to himself as he came, 'Oh! the Duchess, the Duchess!\r\nOh! won't she be savage if I've kept her waiting!' Alice felt so\r\ndesperate that she was ready to ask help of any one; so, when the Rabbit\r\ncame near her, she began, in a low, timid voice, 'If you please, sir--'\r\nThe Rabbit started violently, dropped the white kid gloves and the fan,\r\nand skurried away into the darkness as hard as he could go.\r\n\r\nAlice took up the fan and gloves, and, as the hall was very hot, she\r\nkept fanning herself all the time she went on talking: 'Dear, dear! How\r\nqueer everything is to-day! And yesterday things went on just as usual.\r\nI wonder if I've been changed in the night? Let me think: was I the\r\nsame when I got up this morning? I almost think I can remember feeling a\r\nlittle different. But if I'm not the same, the next question is, Who\r\nin the world am I? Ah, THAT'S the great puzzle!' And she began thinking\r\nover all the children she knew that were of the same age as herself, to\r\nsee if she could have been changed for any of them.\r\n\r\n'I'm sure I'm not Ada,' she said, 'for her hair goes in such long\r\nringlets, and mine doesn't go in ringlets at all; and I'm sure I can't\r\nbe Mabel, for I know all sorts of things, and she, oh! she knows such a\r\nvery little! Besides, SHE'S she, and I'm I, and--oh dear, how puzzling\r\nit all is! I'll try if I know all the things I used to know. Let me\r\nsee: four times five is twelve, and four times six is thirteen, and\r\nfour times seven is--oh dear! I shall never get to twenty at that rate!\r\nHowever, the Multiplication Table doesn't signify: let's try Geography.\r\nLondon is the capital of Paris, and Paris is the capital of Rome, and\r\nRome--no, THAT'S all wrong, I'm certain! I must have been changed for\r\nMabel! I'll try and say \"How doth the little--\"' and she crossed her\r\nhands on her lap as if she were saying lessons, and began to repeat it,\r\nbut her voice sounded hoarse and strange, and the words did not come the\r\nsame as they used to do:--\r\n\r\n     'How doth the little crocodile\r\n      Improve his shining tail,\r\n     And pour the waters of the Nile\r\n      On every golden scale!\r\n\r\n     'How cheerfully he seems to grin,\r\n      How neatly spread his claws,\r\n     And welcome little fishes in\r\n      With gently smiling jaws!'\r\n\r\n'I'm sure those are not the right words,' said poor Alice, and her eyes\r\nfilled with tears again as she went on, 'I must be Mabel after all, and\r\nI shall have to go and live in that poky little house, and have next to\r\nno toys to play with, and oh! ever so many lessons to learn! No, I've\r\nmade up my mind about it; if I'm Mabel, I'll stay down here! It'll be no\r\nuse their putting their heads down and saying \"Come up again, dear!\" I\r\nshall only look up and say \"Who am I then? Tell me that first, and then,\r\nif I like being that person, I'll come up: if not, I'll stay down here\r\ntill I'm somebody else\"--but, oh dear!' cried Alice, with a sudden burst\r\nof tears, 'I do wish they WOULD put their heads down! I am so VERY tired\r\nof being all alone here!'\r\n\r\nAs she said this she looked down at her hands, and was surprised to see\r\nthat she had put on one of the Rabbit's little white kid gloves while\r\nshe was talking. 'How CAN I have done that?' she thought. 'I must\r\nbe growing small again.' She got up and went to the table to measure\r\nherself by it, and found that, as nearly as she could guess, she was now\r\nabout two feet high, and was going on shrinking rapidly: she soon found\r\nout that the cause of this was the fan she was holding, and she dropped\r\nit hastily, just in time to avoid shrinking away altogether.\r\n\r\n'That WAS a narrow escape!' said Alice, a good deal frightened at the\r\nsudden change, but very glad to find herself still in existence; 'and\r\nnow for the garden!' and she ran with all speed back to the little door:\r\nbut, alas! the little door was shut again, and the little golden key was\r\nlying on the glass table as before, 'and things are worse than ever,'\r\nthought the poor child, 'for I never was so small as this before, never!\r\nAnd I declare it's too bad, that it is!'\r\n\r\nAs she said these words her foot slipped, and in another moment, splash!\r\nshe was up to her chin in salt water. Her first idea was that she\r\nhad somehow fallen into the sea, 'and in that case I can go back by\r\nrailway,' she said to herself. (Alice had been to the seaside once in\r\nher life, and had come to the general conclusion, that wherever you go\r\nto on the English coast you find a number of bathing machines in the\r\nsea, some children digging in the sand with wooden spades, then a row\r\nof lodging houses, and behind them a railway station.) However, she soon\r\nmade out that she was in the pool of tears which she had wept when she\r\nwas nine feet high.\r\n\r\n'I wish I hadn't cried so much!' said Alice, as she swam about, trying\r\nto find her way out. 'I shall be punished for it now, I suppose, by\r\nbeing drowned in my own tears! That WILL be a queer thing, to be sure!\r\nHowever, everything is queer to-day.'\r\n\r\nJust then she heard something splashing about in the pool a little way\r\noff, and she swam nearer to make out what it was: at first she thought\r\nit must be a walrus or hippopotamus, but then she remembered how small\r\nshe was now, and she soon made out that it was only a mouse that had\r\nslipped in like herself.\r\n\r\n'Would it be of any use, now,' thought Alice, 'to speak to this mouse?\r\nEverything is so out-of-the-way down here, that I should think very\r\nlikely it can talk: at any rate, there's no harm in trying.' So she\r\nbegan: 'O Mouse, do you know the way out of this pool? I am very tired\r\nof swimming about here, O Mouse!' (Alice thought this must be the right\r\nway of speaking to a mouse: she had never done such a thing before, but\r\nshe remembered having seen in her brother's Latin Grammar, 'A mouse--of\r\na mouse--to a mouse--a mouse--O mouse!') The Mouse looked at her rather\r\ninquisitively, and seemed to her to wink with one of its little eyes,\r\nbut it said nothing.\r\n\r\n'Perhaps it doesn't understand English,' thought Alice; 'I daresay it's\r\na French mouse, come over with William the Conqueror.' (For, with all\r\nher knowledge of history, Alice had no very clear notion how long ago\r\nanything had happened.) So she began again: 'Ou est ma chatte?' which\r\nwas the first sentence in her French lesson-book. The Mouse gave a\r\nsudden leap out of the water, and seemed to quiver all over with fright.\r\n'Oh, I beg your pardon!' cried Alice hastily, afraid that she had hurt\r\nthe poor animal's feelings. 'I quite forgot you didn't like cats.'\r\n\r\n'Not like cats!' cried the Mouse, in a shrill, passionate voice. 'Would\r\nYOU like cats if you were me?'\r\n\r\n'Well, perhaps not,' said Alice in a soothing tone: 'don't be angry\r\nabout it. And yet I wish I could show you our cat Dinah: I think you'd\r\ntake a fancy to cats if you could only see her. She is such a dear quiet\r\nthing,' Alice went on, half to herself, as she swam lazily about in the\r\npool, 'and she sits purring so nicely by the fire, licking her paws and\r\nwashing her face--and she is such a nice soft thing to nurse--and she's\r\nsuch a capital one for catching mice--oh, I beg your pardon!' cried\r\nAlice again, for this time the Mouse was bristling all over, and she\r\nfelt certain it must be really offended. 'We won't talk about her any\r\nmore if you'd rather not.'\r\n\r\n'We indeed!' cried the Mouse, who was trembling down to the end of his\r\ntail. 'As if I would talk on such a subject! Our family always HATED\r\ncats: nasty, low, vulgar things! Don't let me hear the name again!'\r\n\r\n'I won't indeed!' said Alice, in a great hurry to change the subject of\r\nconversation. 'Are you--are you fond--of--of dogs?' The Mouse did not\r\nanswer, so Alice went on eagerly: 'There is such a nice little dog near\r\nour house I should like to show you! A little bright-eyed terrier, you\r\nknow, with oh, such long curly brown hair! And it'll fetch things when\r\nyou throw them, and it'll sit up and beg for its dinner, and all sorts\r\nof things--I can't remember half of them--and it belongs to a farmer,\r\nyou know, and he says it's so useful, it's worth a hundred pounds! He\r\nsays it kills all the rats and--oh dear!' cried Alice in a sorrowful\r\ntone, 'I'm afraid I've offended it again!' For the Mouse was swimming\r\naway from her as hard as it could go, and making quite a commotion in\r\nthe pool as it went.\r\n\r\nSo she called softly after it, 'Mouse dear! Do come back again, and we\r\nwon't talk about cats or dogs either, if you don't like them!' When the\r\nMouse heard this, it turned round and swam slowly back to her: its\r\nface was quite pale (with passion, Alice thought), and it said in a low\r\ntrembling voice, 'Let us get to the shore, and then I'll tell you my\r\nhistory, and you'll understand why it is I hate cats and dogs.'\r\n\r\nIt was high time to go, for the pool was getting quite crowded with the\r\nbirds and animals that had fallen into it: there were a Duck and a Dodo,\r\na Lory and an Eaglet, and several other curious creatures. Alice led the\r\nway, and the whole party swam to the shore.\r\n\r\n\r\n\r\n\r\nCHAPTER III. A Caucus-Race and a Long Tale\r\n\r\nThey were indeed a queer-looking party that assembled on the bank--the\r\nbirds with draggled feathers, the animals with their fur clinging close\r\nto them, and all dripping wet, cross, and uncomfortable.\r\n\r\nThe first question of course was, how to get dry again: they had a\r\nconsultation about this, and after a few minutes it seemed quite natural\r\nto Alice to find herself talking familiarly with them, as if she had\r\nknown them all her life. Indeed, she had quite a long argument with the\r\nLory, who at last turned sulky, and would only say, 'I am older than\r\nyou, and must know better'; and this Alice would not allow without\r\nknowing how old it was, and, as the Lory positively refused to tell its\r\nage, there was no more to be said.\r\n\r\nAt last the Mouse, who seemed to be a person of authority among them,\r\ncalled out, 'Sit down, all of you, and listen to me! I'LL soon make you\r\ndry enough!' They all sat down at once, in a large ring, with the Mouse\r\nin the middle. Alice kept her eyes anxiously fixed on it, for she felt\r\nsure she would catch a bad cold if she did not get dry very soon.\r\n\r\n'Ahem!' said the Mouse with an important air, 'are you all ready? This\r\nis the driest thing I know. Silence all round, if you please! \"William\r\nthe Conqueror, whose cause was favoured by the pope, was soon submitted\r\nto by the English, who wanted leaders, and had been of late much\r\naccustomed to usurpation and conquest. Edwin and Morcar, the earls of\r\nMercia and Northumbria--\"'\r\n\r\n'Ugh!' said the Lory, with a shiver.\r\n\r\n'I beg your pardon!' said the Mouse, frowning, but very politely: 'Did\r\nyou speak?'\r\n\r\n'Not I!' said the Lory hastily.\r\n\r\n'I thought you did,' said the Mouse. '--I proceed. \"Edwin and Morcar,\r\nthe earls of Mercia and Northumbria, declared for him: and even Stigand,\r\nthe patriotic archbishop of Canterbury, found it advisable--\"'\r\n\r\n'Found WHAT?' said the Duck.\r\n\r\n'Found IT,' the Mouse replied rather crossly: 'of course you know what\r\n\"it\" means.'\r\n\r\n'I know what \"it\" means well enough, when I find a thing,' said the\r\nDuck: 'it's generally a frog or a worm. The question is, what did the\r\narchbishop find?'\r\n\r\nThe Mouse did not notice this question, but hurriedly went on, '\"--found\r\nit advisable to go with Edgar Atheling to meet William and offer him the\r\ncrown. William's conduct at first was moderate. But the insolence of his\r\nNormans--\" How are you getting on now, my dear?' it continued, turning\r\nto Alice as it spoke.\r\n\r\n'As wet as ever,' said Alice in a melancholy tone: 'it doesn't seem to\r\ndry me at all.'\r\n\r\n'In that case,' said the Dodo solemnly, rising to its feet, 'I move\r\nthat the meeting adjourn, for the immediate adoption of more energetic\r\nremedies--'\r\n\r\n'Speak English!' said the Eaglet. 'I don't know the meaning of half\r\nthose long words, and, what's more, I don't believe you do either!' And\r\nthe Eaglet bent down its head to hide a smile: some of the other birds\r\ntittered audibly.\r\n\r\n'What I was going to say,' said the Dodo in an offended tone, 'was, that\r\nthe best thing to get us dry would be a Caucus-race.'\r\n\r\n'What IS a Caucus-race?' said Alice; not that she wanted much to know,\r\nbut the Dodo had paused as if it thought that SOMEBODY ought to speak,\r\nand no one else seemed inclined to say anything.\r\n\r\n'Why,' said the Dodo, 'the best way to explain it is to do it.' (And, as\r\nyou might like to try the thing yourself, some winter day, I will tell\r\nyou how the Dodo managed it.)\r\n\r\nFirst it marked out a race-course, in a sort of circle, ('the exact\r\nshape doesn't matter,' it said,) and then all the party were placed\r\nalong the course, here and there. There was no 'One, two, three, and\r\naway,' but they began running when they liked, and left off when they\r\nliked, so that it was not easy to know when the race was over. However,\r\nwhen they had been running half an hour or so, and were quite dry again,\r\nthe Dodo suddenly called out 'The race is over!' and they all crowded\r\nround it, panting, and asking, 'But who has won?'\r\n\r\nThis question the Dodo could not answer without a great deal of thought,\r\nand it sat for a long time with one finger pressed upon its forehead\r\n(the position in which you usually see Shakespeare, in the pictures\r\nof him), while the rest waited in silence. At last the Dodo said,\r\n'EVERYBODY has won, and all must have prizes.'\r\n\r\n'But who is to give the prizes?' quite a chorus of voices asked.\r\n\r\n'Why, SHE, of course,' said the Dodo, pointing to Alice with one finger;\r\nand the whole party at once crowded round her, calling out in a confused\r\nway, 'Prizes! Prizes!'\r\n\r\nAlice had no idea what to do, and in despair she put her hand in her\r\npocket, and pulled out a box of comfits, (luckily the salt water had\r\nnot got into it), and handed them round as prizes. There was exactly one\r\na-piece all round.\r\n\r\n'But she must have a prize herself, you know,' said the Mouse.\r\n\r\n'Of course,' the Dodo replied very gravely. 'What else have you got in\r\nyour pocket?' he went on, turning to Alice.\r\n\r\n'Only a thimble,' said Alice sadly.\r\n\r\n'Hand it over here,' said the Dodo.\r\n\r\nThen they all crowded round her once more, while the Dodo solemnly\r\npresented the thimble, saying 'We beg your acceptance of this elegant\r\nthimble'; and, when it had finished this short speech, they all cheered.\r\n\r\nAlice thought the whole thing very absurd, but they all looked so grave\r\nthat she did not dare to laugh; and, as she could not think of anything\r\nto say, she simply bowed, and took the thimble, looking as solemn as she\r\ncould.\r\n\r\nThe next thing was to eat the comfits: this caused some noise and\r\nconfusion, as the large birds complained that they could not taste\r\ntheirs, and the small ones choked and had to be patted on the back.\r\nHowever, it was over at last, and they sat down again in a ring, and\r\nbegged the Mouse to tell them something more.\r\n\r\n'You promised to tell me your history, you know,' said Alice, 'and why\r\nit is you hate--C and D,' she added in a whisper, half afraid that it\r\nwould be offended again.\r\n\r\n'Mine is a long and a sad tale!' said the Mouse, turning to Alice, and\r\nsighing.\r\n\r\n'It IS a long tail, certainly,' said Alice, looking down with wonder at\r\nthe Mouse's tail; 'but why do you call it sad?' And she kept on puzzling\r\nabout it while the Mouse was speaking, so that her idea of the tale was\r\nsomething like this:--\r\n\r\n         'Fury said to a\r\n         mouse, That he\r\n        met in the\r\n       house,\r\n     \"Let us\r\n      both go to\r\n       law: I will\r\n        prosecute\r\n         YOU.--Come,\r\n           I'll take no\r\n           denial; We\r\n          must have a\r\n        trial: For\r\n      really this\r\n     morning I've\r\n    nothing\r\n    to do.\"\r\n     Said the\r\n      mouse to the\r\n       cur, \"Such\r\n        a trial,\r\n         dear Sir,\r\n            With\r\n          no jury\r\n        or judge,\r\n       would be\r\n      wasting\r\n      our\r\n      breath.\"\r\n       \"I'll be\r\n        judge, I'll\r\n         be jury,\"\r\n            Said\r\n         cunning\r\n          old Fury:\r\n          \"I'll\r\n          try the\r\n            whole\r\n            cause,\r\n              and\r\n           condemn\r\n           you\r\n          to\r\n           death.\"'\r\n\r\n\r\n'You are not attending!' said the Mouse to Alice severely. 'What are you\r\nthinking of?'\r\n\r\n'I beg your pardon,' said Alice very humbly: 'you had got to the fifth\r\nbend, I think?'\r\n\r\n'I had NOT!' cried the Mouse, sharply and very angrily.\r\n\r\n'A knot!' said Alice, always ready to make herself useful, and looking\r\nanxiously about her. 'Oh, do let me help to undo it!'\r\n\r\n'I shall do nothing of the sort,' said the Mouse, getting up and walking\r\naway. 'You insult me by talking such nonsense!'\r\n\r\n'I didn't mean it!' pleaded poor Alice. 'But you're so easily offended,\r\nyou know!'\r\n\r\nThe Mouse only growled in reply.\r\n\r\n'Please come back and finish your story!' Alice called after it; and the\r\nothers all joined in chorus, 'Yes, please do!' but the Mouse only shook\r\nits head impatiently, and walked a little quicker.\r\n\r\n'What a pity it wouldn't stay!' sighed the Lory, as soon as it was quite\r\nout of sight; and an old Crab took the opportunity of saying to her\r\ndaughter 'Ah, my dear! Let this be a lesson to you never to lose\r\nYOUR temper!' 'Hold your tongue, Ma!' said the young Crab, a little\r\nsnappishly. 'You're enough to try the patience of an oyster!'\r\n\r\n'I wish I had our Dinah here, I know I do!' said Alice aloud, addressing\r\nnobody in particular. 'She'd soon fetch it back!'\r\n\r\n'And who is Dinah, if I might venture to ask the question?' said the\r\nLory.\r\n\r\nAlice replied eagerly, for she was always ready to talk about her pet:\r\n'Dinah's our cat. And she's such a capital one for catching mice you\r\ncan't think! And oh, I wish you could see her after the birds! Why,\r\nshe'll eat a little bird as soon as look at it!'\r\n\r\nThis speech caused a remarkable sensation among the party. Some of the\r\nbirds hurried off at once: one old Magpie began wrapping itself up very\r\ncarefully, remarking, 'I really must be getting home; the night-air\r\ndoesn't suit my throat!' and a Canary called out in a trembling voice to\r\nits children, 'Come away, my dears! It's high time you were all in bed!'\r\nOn various pretexts they all moved off, and Alice was soon left alone.\r\n\r\n'I wish I hadn't mentioned Dinah!' she said to herself in a melancholy\r\ntone. 'Nobody seems to like her, down here, and I'm sure she's the best\r\ncat in the world! Oh, my dear Dinah! I wonder if I shall ever see you\r\nany more!' And here poor Alice began to cry again, for she felt very\r\nlonely and low-spirited. In a little while, however, she again heard\r\na little pattering of footsteps in the distance, and she looked up\r\neagerly, half hoping that the Mouse had changed his mind, and was coming\r\nback to finish his story.\r\n\r\n\r\n\r\n\r\nCHAPTER IV. The Rabbit Sends in a Little Bill\r\n\r\nIt was the White Rabbit, trotting slowly back again, and looking\r\nanxiously about as it went, as if it had lost something; and she heard\r\nit muttering to itself 'The Duchess! The Duchess! Oh my dear paws! Oh\r\nmy fur and whiskers! She'll get me executed, as sure as ferrets are\r\nferrets! Where CAN I have dropped them, I wonder?' Alice guessed in a\r\nmoment that it was looking for the fan and the pair of white kid gloves,\r\nand she very good-naturedly began hunting about for them, but they were\r\nnowhere to be seen--everything seemed to have changed since her swim in\r\nthe pool, and the great hall, with the glass table and the little door,\r\nhad vanished completely.\r\n\r\nVery soon the Rabbit noticed Alice, as she went hunting about, and\r\ncalled out to her in an angry tone, 'Why, Mary Ann, what ARE you doing\r\nout here? Run home this moment, and fetch me a pair of gloves and a fan!\r\nQuick, now!' And Alice was so much frightened that she ran off at once\r\nin the direction it pointed to, without trying to explain the mistake it\r\nhad made.\r\n\r\n'He took me for his housemaid,' she said to herself as she ran. 'How\r\nsurprised he'll be when he finds out who I am! But I'd better take him\r\nhis fan and gloves--that is, if I can find them.' As she said this, she\r\ncame upon a neat little house, on the door of which was a bright brass\r\nplate with the name 'W. RABBIT' engraved upon it. She went in without\r\nknocking, and hurried upstairs, in great fear lest she should meet the\r\nreal Mary Ann, and be turned out of the house before she had found the\r\nfan and gloves.\r\n\r\n'How queer it seems,' Alice said to herself, 'to be going messages for\r\na rabbit! I suppose Dinah'll be sending me on messages next!' And she\r\nbegan fancying the sort of thing that would happen: '\"Miss Alice! Come\r\nhere directly, and get ready for your walk!\" \"Coming in a minute,\r\nnurse! But I've got to see that the mouse doesn't get out.\" Only I don't\r\nthink,' Alice went on, 'that they'd let Dinah stop in the house if it\r\nbegan ordering people about like that!'\r\n\r\nBy this time she had found her way into a tidy little room with a table\r\nin the window, and on it (as she had hoped) a fan and two or three pairs\r\nof tiny white kid gloves: she took up the fan and a pair of the gloves,\r\nand was just going to leave the room, when her eye fell upon a little\r\nbottle that stood near the looking-glass. There was no label this time\r\nwith the words 'DRINK ME,' but nevertheless she uncorked it and put it\r\nto her lips. 'I know SOMETHING interesting is sure to happen,' she said\r\nto herself, 'whenever I eat or drink anything; so I'll just see what\r\nthis bottle does. I do hope it'll make me grow large again, for really\r\nI'm quite tired of being such a tiny little thing!'\r\n\r\nIt did so indeed, and much sooner than she had expected: before she had\r\ndrunk half the bottle, she found her head pressing against the ceiling,\r\nand had to stoop to save her neck from being broken. She hastily put\r\ndown the bottle, saying to herself 'That's quite enough--I hope I shan't\r\ngrow any more--As it is, I can't get out at the door--I do wish I hadn't\r\ndrunk quite so much!'\r\n\r\nAlas! it was too late to wish that! She went on growing, and growing,\r\nand very soon had to kneel down on the floor: in another minute there\r\nwas not even room for this, and she tried the effect of lying down with\r\none elbow against the door, and the other arm curled round her head.\r\nStill she went on growing, and, as a last resource, she put one arm out\r\nof the window, and one foot up the chimney, and said to herself 'Now I\r\ncan do no more, whatever happens. What WILL become of me?'\r\n\r\nLuckily for Alice, the little magic bottle had now had its full effect,\r\nand she grew no larger: still it was very uncomfortable, and, as there\r\nseemed to be no sort of chance of her ever getting out of the room\r\nagain, no wonder she felt unhappy.\r\n\r\n'It was much pleasanter at home,' thought poor Alice, 'when one wasn't\r\nalways growing larger and smaller, and being ordered about by mice and\r\nrabbits. I almost wish I hadn't gone down that rabbit-hole--and yet--and\r\nyet--it's rather curious, you know, this sort of life! I do wonder what\r\nCAN have happened to me! When I used to read fairy-tales, I fancied that\r\nkind of thing never happened, and now here I am in the middle of one!\r\nThere ought to be a book written about me, that there ought! And when I\r\ngrow up, I'll write one--but I'm grown up now,' she added in a sorrowful\r\ntone; 'at least there's no room to grow up any more HERE.'\r\n\r\n'But then,' thought Alice, 'shall I NEVER get any older than I am\r\nnow? That'll be a comfort, one way--never to be an old woman--but\r\nthen--always to have lessons to learn! Oh, I shouldn't like THAT!'\r\n\r\n'Oh, you foolish Alice!' she answered herself. 'How can you learn\r\nlessons in here? Why, there's hardly room for YOU, and no room at all\r\nfor any lesson-books!'\r\n\r\nAnd so she went on, taking first one side and then the other, and making\r\nquite a conversation of it altogether; but after a few minutes she heard\r\na voice outside, and stopped to listen.\r\n\r\n'Mary Ann! Mary Ann!' said the voice. 'Fetch me my gloves this moment!'\r\nThen came a little pattering of feet on the stairs. Alice knew it was\r\nthe Rabbit coming to look for her, and she trembled till she shook the\r\nhouse, quite forgetting that she was now about a thousand times as large\r\nas the Rabbit, and had no reason to be afraid of it.\r\n\r\nPresently the Rabbit came up to the door, and tried to open it; but, as\r\nthe door opened inwards, and Alice's elbow was pressed hard against it,\r\nthat attempt proved a failure. Alice heard it say to itself 'Then I'll\r\ngo round and get in at the window.'\r\n\r\n'THAT you won't' thought Alice, and, after waiting till she fancied\r\nshe heard the Rabbit just under the window, she suddenly spread out her\r\nhand, and made a snatch in the air. She did not get hold of anything,\r\nbut she heard a little shriek and a fall, and a crash of broken glass,\r\nfrom which she concluded that it was just possible it had fallen into a\r\ncucumber-frame, or something of the sort.\r\n\r\nNext came an angry voice--the Rabbit's--'Pat! Pat! Where are you?' And\r\nthen a voice she had never heard before, 'Sure then I'm here! Digging\r\nfor apples, yer honour!'\r\n\r\n'Digging for apples, indeed!' said the Rabbit angrily. 'Here! Come and\r\nhelp me out of THIS!' (Sounds of more broken glass.)\r\n\r\n'Now tell me, Pat, what's that in the window?'\r\n\r\n'Sure, it's an arm, yer honour!' (He pronounced it 'arrum.')\r\n\r\n'An arm, you goose! Who ever saw one that size? Why, it fills the whole\r\nwindow!'\r\n\r\n'Sure, it does, yer honour: but it's an arm for all that.'\r\n\r\n'Well, it's got no business there, at any rate: go and take it away!'\r\n\r\nThere was a long silence after this, and Alice could only hear whispers\r\nnow and then; such as, 'Sure, I don't like it, yer honour, at all, at\r\nall!' 'Do as I tell you, you coward!' and at last she spread out her\r\nhand again, and made another snatch in the air. This time there were\r\nTWO little shrieks, and more sounds of broken glass. 'What a number of\r\ncucumber-frames there must be!' thought Alice. 'I wonder what they'll do\r\nnext! As for pulling me out of the window, I only wish they COULD! I'm\r\nsure I don't want to stay in here any longer!'\r\n\r\nShe waited for some time without hearing anything more: at last came a\r\nrumbling of little cartwheels, and the sound of a good many voices\r\nall talking together: she made out the words: 'Where's the other\r\nladder?--Why, I hadn't to bring but one; Bill's got the other--Bill!\r\nfetch it here, lad!--Here, put 'em up at this corner--No, tie 'em\r\ntogether first--they don't reach half high enough yet--Oh! they'll\r\ndo well enough; don't be particular--Here, Bill! catch hold of this\r\nrope--Will the roof bear?--Mind that loose slate--Oh, it's coming\r\ndown! Heads below!' (a loud crash)--'Now, who did that?--It was Bill, I\r\nfancy--Who's to go down the chimney?--Nay, I shan't! YOU do it!--That I\r\nwon't, then!--Bill's to go down--Here, Bill! the master says you're to\r\ngo down the chimney!'\r\n\r\n'Oh! So Bill's got to come down the chimney, has he?' said Alice to\r\nherself. 'Shy, they seem to put everything upon Bill! I wouldn't be in\r\nBill's place for a good deal: this fireplace is narrow, to be sure; but\r\nI THINK I can kick a little!'\r\n\r\nShe drew her foot as far down the chimney as she could, and waited\r\ntill she heard a little animal (she couldn't guess of what sort it was)\r\nscratching and scrambling about in the chimney close above her: then,\r\nsaying to herself 'This is Bill,' she gave one sharp kick, and waited to\r\nsee what would happen next.\r\n\r\nThe first thing she heard was a general chorus of 'There goes Bill!'\r\nthen the Rabbit's voice along--'Catch him, you by the hedge!' then\r\nsilence, and then another confusion of voices--'Hold up his head--Brandy\r\nnow--Don't choke him--How was it, old fellow? What happened to you? Tell\r\nus all about it!'\r\n\r\nLast came a little feeble, squeaking voice, ('That's Bill,' thought\r\nAlice,) 'Well, I hardly know--No more, thank ye; I'm better now--but I'm\r\na deal too flustered to tell you--all I know is, something comes at me\r\nlike a Jack-in-the-box, and up I goes like a sky-rocket!'\r\n\r\n'So you did, old fellow!' said the others.\r\n\r\n'We must burn the house down!' said the Rabbit's voice; and Alice called\r\nout as loud as she could, 'If you do. I'll set Dinah at you!'\r\n\r\nThere was a dead silence instantly, and Alice thought to herself, 'I\r\nwonder what they WILL do next! If they had any sense, they'd take the\r\nroof off.' After a minute or two, they began moving about again, and\r\nAlice heard the Rabbit say, 'A barrowful will do, to begin with.'\r\n\r\n'A barrowful of WHAT?' thought Alice; but she had not long to doubt,\r\nfor the next moment a shower of little pebbles came rattling in at the\r\nwindow, and some of them hit her in the face. 'I'll put a stop to this,'\r\nshe said to herself, and shouted out, 'You'd better not do that again!'\r\nwhich produced another dead silence.\r\n\r\nAlice noticed with some surprise that the pebbles were all turning into\r\nlittle cakes as they lay on the floor, and a bright idea came into her\r\nhead. 'If I eat one of these cakes,' she thought, 'it's sure to make\r\nSOME change in my size; and as it can't possibly make me larger, it must\r\nmake me smaller, I suppose.'\r\n\r\nSo she swallowed one of the cakes, and was delighted to find that she\r\nbegan shrinking directly. As soon as she was small enough to get through\r\nthe door, she ran out of the house, and found quite a crowd of little\r\nanimals and birds waiting outside. The poor little Lizard, Bill, was\r\nin the middle, being held up by two guinea-pigs, who were giving it\r\nsomething out of a bottle. They all made a rush at Alice the moment she\r\nappeared; but she ran off as hard as she could, and soon found herself\r\nsafe in a thick wood.\r\n\r\n'The first thing I've got to do,' said Alice to herself, as she wandered\r\nabout in the wood, 'is to grow to my right size again; and the second\r\nthing is to find my way into that lovely garden. I think that will be\r\nthe best plan.'\r\n\r\nIt sounded an excellent plan, no doubt, and very neatly and simply\r\narranged; the only difficulty was, that she had not the smallest idea\r\nhow to set about it; and while she was peering about anxiously among\r\nthe trees, a little sharp bark just over her head made her look up in a\r\ngreat hurry.\r\n\r\nAn enormous puppy was looking down at her with large round eyes, and\r\nfeebly stretching out one paw, trying to touch her. 'Poor little thing!'\r\nsaid Alice, in a coaxing tone, and she tried hard to whistle to it; but\r\nshe was terribly frightened all the time at the thought that it might be\r\nhungry, in which case it would be very likely to eat her up in spite of\r\nall her coaxing.\r\n\r\nHardly knowing what she did, she picked up a little bit of stick, and\r\nheld it out to the puppy; whereupon the puppy jumped into the air off\r\nall its feet at once, with a yelp of delight, and rushed at the stick,\r\nand made believe to worry it; then Alice dodged behind a great thistle,\r\nto keep herself from being run over; and the moment she appeared on the\r\nother side, the puppy made another rush at the stick, and tumbled head\r\nover heels in its hurry to get hold of it; then Alice, thinking it was\r\nvery like having a game of play with a cart-horse, and expecting every\r\nmoment to be trampled under its feet, ran round the thistle again; then\r\nthe puppy began a series of short charges at the stick, running a very\r\nlittle way forwards each time and a long way back, and barking hoarsely\r\nall the while, till at last it sat down a good way off, panting, with\r\nits tongue hanging out of its mouth, and its great eyes half shut.\r\n\r\nThis seemed to Alice a good opportunity for making her escape; so she\r\nset off at once, and ran till she was quite tired and out of breath, and\r\ntill the puppy's bark sounded quite faint in the distance.\r\n\r\n'And yet what a dear little puppy it was!' said Alice, as she leant\r\nagainst a buttercup to rest herself, and fanned herself with one of the\r\nleaves: 'I should have liked teaching it tricks very much, if--if I'd\r\nonly been the right size to do it! Oh dear! I'd nearly forgotten that\r\nI've got to grow up again! Let me see--how IS it to be managed? I\r\nsuppose I ought to eat or drink something or other; but the great\r\nquestion is, what?'\r\n\r\nThe great question certainly was, what? Alice looked all round her at\r\nthe flowers and the blades of grass, but she did not see anything that\r\nlooked like the right thing to eat or drink under the circumstances.\r\nThere was a large mushroom growing near her, about the same height as\r\nherself; and when she had looked under it, and on both sides of it, and\r\nbehind it, it occurred to her that she might as well look and see what\r\nwas on the top of it.\r\n\r\nShe stretched herself up on tiptoe, and peeped over the edge of the\r\nmushroom, and her eyes immediately met those of a large caterpillar,\r\nthat was sitting on the top with its arms folded, quietly smoking a long\r\nhookah, and taking not the smallest notice of her or of anything else.\r\n\r\n\r\n\r\n\r\nCHAPTER V. Advice from a Caterpillar\r\n\r\nThe Caterpillar and Alice looked at each other for some time in silence:\r\nat last the Caterpillar took the hookah out of its mouth, and addressed\r\nher in a languid, sleepy voice.\r\n\r\n'Who are YOU?' said the Caterpillar.\r\n\r\nThis was not an encouraging opening for a conversation. Alice replied,\r\nrather shyly, 'I--I hardly know, sir, just at present--at least I know\r\nwho I WAS when I got up this morning, but I think I must have been\r\nchanged several times since then.'\r\n\r\n'What do you mean by that?' said the Caterpillar sternly. 'Explain\r\nyourself!'\r\n\r\n'I can't explain MYSELF, I'm afraid, sir' said Alice, 'because I'm not\r\nmyself, you see.'\r\n\r\n'I don't see,' said the Caterpillar.\r\n\r\n'I'm afraid I can't put it more clearly,' Alice replied very politely,\r\n'for I can't understand it myself to begin with; and being so many\r\ndifferent sizes in a day is very confusing.'\r\n\r\n'It isn't,' said the Caterpillar.\r\n\r\n'Well, perhaps you haven't found it so yet,' said Alice; 'but when you\r\nhave to turn into a chrysalis--you will some day, you know--and then\r\nafter that into a butterfly, I should think you'll feel it a little\r\nqueer, won't you?'\r\n\r\n'Not a bit,' said the Caterpillar.\r\n\r\n'Well, perhaps your feelings may be different,' said Alice; 'all I know\r\nis, it would feel very queer to ME.'\r\n\r\n'You!' said the Caterpillar contemptuously. 'Who are YOU?'\r\n\r\nWhich brought them back again to the beginning of the conversation.\r\nAlice felt a little irritated at the Caterpillar's making such VERY\r\nshort remarks, and she drew herself up and said, very gravely, 'I think,\r\nyou ought to tell me who YOU are, first.'\r\n\r\n'Why?' said the Caterpillar.\r\n\r\nHere was another puzzling question; and as Alice could not think of any\r\ngood reason, and as the Caterpillar seemed to be in a VERY unpleasant\r\nstate of mind, she turned away.\r\n\r\n'Come back!' the Caterpillar called after her. 'I've something important\r\nto say!'\r\n\r\nThis sounded promising, certainly: Alice turned and came back again.\r\n\r\n'Keep your temper,' said the Caterpillar.\r\n\r\n'Is that all?' said Alice, swallowing down her anger as well as she\r\ncould.\r\n\r\n'No,' said the Caterpillar.\r\n\r\nAlice thought she might as well wait, as she had nothing else to do, and\r\nperhaps after all it might tell her something worth hearing. For some\r\nminutes it puffed away without speaking, but at last it unfolded its\r\narms, took the hookah out of its mouth again, and said, 'So you think\r\nyou're changed, do you?'\r\n\r\n'I'm afraid I am, sir,' said Alice; 'I can't remember things as I\r\nused--and I don't keep the same size for ten minutes together!'\r\n\r\n'Can't remember WHAT things?' said the Caterpillar.\r\n\r\n'Well, I've tried to say \"HOW DOTH THE LITTLE BUSY BEE,\" but it all came\r\ndifferent!' Alice replied in a very melancholy voice.\r\n\r\n'Repeat, \"YOU ARE OLD, FATHER WILLIAM,\"' said the Caterpillar.\r\n\r\nAlice folded her hands, and began:--\r\n\r\n   'You are old, Father William,' the young man said,\r\n    'And your hair has become very white;\r\n   And yet you incessantly stand on your head--\r\n    Do you think, at your age, it is right?'\r\n\r\n   'In my youth,' Father William replied to his son,\r\n    'I feared it might injure the brain;\r\n   But, now that I'm perfectly sure I have none,\r\n    Why, I do it again and again.'\r\n\r\n   'You are old,' said the youth, 'as I mentioned before,\r\n    And have grown most uncommonly fat;\r\n   Yet you turned a back-somersault in at the door--\r\n    Pray, what is the reason of that?'\r\n\r\n   'In my youth,' said the sage, as he shook his grey locks,\r\n    'I kept all my limbs very supple\r\n   By the use of this ointment--one shilling the box--\r\n    Allow me to sell you a couple?'\r\n\r\n   'You are old,' said the youth, 'and your jaws are too weak\r\n    For anything tougher than suet;\r\n   Yet you finished the goose, with the bones and the beak--\r\n    Pray how did you manage to do it?'\r\n\r\n   'In my youth,' said his father, 'I took to the law,\r\n    And argued each case with my wife;\r\n   And the muscular strength, which it gave to my jaw,\r\n    Has lasted the rest of my life.'\r\n\r\n   'You are old,' said the youth, 'one would hardly suppose\r\n    That your eye was as steady as ever;\r\n   Yet you balanced an eel on the end of your nose--\r\n    What made you so awfully clever?'\r\n\r\n   'I have answered three questions, and that is enough,'\r\n    Said his father; 'don't give yourself airs!\r\n   Do you think I can listen all day to such stuff?\r\n    Be off, or I'll kick you down stairs!'\r\n\r\n\r\n'That is not said right,' said the Caterpillar.\r\n\r\n'Not QUITE right, I'm afraid,' said Alice, timidly; 'some of the words\r\nhave got altered.'\r\n\r\n'It is wrong from beginning to end,' said the Caterpillar decidedly, and\r\nthere was silence for some minutes.\r\n\r\nThe Caterpillar was the first to speak.\r\n\r\n'What size do you want to be?' it asked.\r\n\r\n'Oh, I'm not particular as to size,' Alice hastily replied; 'only one\r\ndoesn't like changing so often, you know.'\r\n\r\n'I DON'T know,' said the Caterpillar.\r\n\r\nAlice said nothing: she had never been so much contradicted in her life\r\nbefore, and she felt that she was losing her temper.\r\n\r\n'Are you content now?' said the Caterpillar.\r\n\r\n'Well, I should like to be a LITTLE larger, sir, if you wouldn't mind,'\r\nsaid Alice: 'three inches is such a wretched height to be.'\r\n\r\n'It is a very good height indeed!' said the Caterpillar angrily, rearing\r\nitself upright as it spoke (it was exactly three inches high).\r\n\r\n'But I'm not used to it!' pleaded poor Alice in a piteous tone. And\r\nshe thought of herself, 'I wish the creatures wouldn't be so easily\r\noffended!'\r\n\r\n'You'll get used to it in time,' said the Caterpillar; and it put the\r\nhookah into its mouth and began smoking again.\r\n\r\nThis time Alice waited patiently until it chose to speak again. In\r\na minute or two the Caterpillar took the hookah out of its mouth\r\nand yawned once or twice, and shook itself. Then it got down off the\r\nmushroom, and crawled away in the grass, merely remarking as it went,\r\n'One side will make you grow taller, and the other side will make you\r\ngrow shorter.'\r\n\r\n'One side of WHAT? The other side of WHAT?' thought Alice to herself.\r\n\r\n'Of the mushroom,' said the Caterpillar, just as if she had asked it\r\naloud; and in another moment it was out of sight.\r\n\r\nAlice remained looking thoughtfully at the mushroom for a minute, trying\r\nto make out which were the two sides of it; and as it was perfectly\r\nround, she found this a very difficult question. However, at last she\r\nstretched her arms round it as far as they would go, and broke off a bit\r\nof the edge with each hand.\r\n\r\n'And now which is which?' she said to herself, and nibbled a little of\r\nthe right-hand bit to try the effect: the next moment she felt a violent\r\nblow underneath her chin: it had struck her foot!\r\n\r\nShe was a good deal frightened by this very sudden change, but she felt\r\nthat there was no time to be lost, as she was shrinking rapidly; so she\r\nset to work at once to eat some of the other bit. Her chin was pressed\r\nso closely against her foot, that there was hardly room to open her\r\nmouth; but she did it at last, and managed to swallow a morsel of the\r\nlefthand bit.\r\n\r\n\r\n  *    *    *    *    *    *    *\r\n\r\n    *    *    *    *    *    *\r\n\r\n  *    *    *    *    *    *    *\r\n\r\n'Come, my head's free at last!' said Alice in a tone of delight, which\r\nchanged into alarm in another moment, when she found that her shoulders\r\nwere nowhere to be found: all she could see, when she looked down, was\r\nan immense length of neck, which seemed to rise like a stalk out of a\r\nsea of green leaves that lay far below her.\r\n\r\n'What CAN all that green stuff be?' said Alice. 'And where HAVE my\r\nshoulders got to? And oh, my poor hands, how is it I can't see you?'\r\nShe was moving them about as she spoke, but no result seemed to follow,\r\nexcept a little shaking among the distant green leaves.\r\n\r\nAs there seemed to be no chance of getting her hands up to her head, she\r\ntried to get her head down to them, and was delighted to find that her\r\nneck would bend about easily in any direction, like a serpent. She had\r\njust succeeded in curving it down into a graceful zigzag, and was going\r\nto dive in among the leaves, which she found to be nothing but the tops\r\nof the trees under which she had been wandering, when a sharp hiss made\r\nher draw back in a hurry: a large pigeon had flown into her face, and\r\nwas beating her violently with its wings.\r\n\r\n'Serpent!' screamed the Pigeon.\r\n\r\n'I'm NOT a serpent!' said Alice indignantly. 'Let me alone!'\r\n\r\n'Serpent, I say again!' repeated the Pigeon, but in a more subdued tone,\r\nand added with a kind of sob, 'I've tried every way, and nothing seems\r\nto suit them!'\r\n\r\n'I haven't the least idea what you're talking about,' said Alice.\r\n\r\n'I've tried the roots of trees, and I've tried banks, and I've tried\r\nhedges,' the Pigeon went on, without attending to her; 'but those\r\nserpents! There's no pleasing them!'\r\n\r\nAlice was more and more puzzled, but she thought there was no use in\r\nsaying anything more till the Pigeon had finished.\r\n\r\n'As if it wasn't trouble enough hatching the eggs,' said the Pigeon;\r\n'but I must be on the look-out for serpents night and day! Why, I\r\nhaven't had a wink of sleep these three weeks!'\r\n\r\n'I'm very sorry you've been annoyed,' said Alice, who was beginning to\r\nsee its meaning.\r\n\r\n'And just as I'd taken the highest tree in the wood,' continued the\r\nPigeon, raising its voice to a shriek, 'and just as I was thinking I\r\nshould be free of them at last, they must needs come wriggling down from\r\nthe sky! Ugh, Serpent!'\r\n\r\n'But I'm NOT a serpent, I tell you!' said Alice. 'I'm a--I'm a--'\r\n\r\n'Well! WHAT are you?' said the Pigeon. 'I can see you're trying to\r\ninvent something!'\r\n\r\n'I--I'm a little girl,' said Alice, rather doubtfully, as she remembered\r\nthe number of changes she had gone through that day.\r\n\r\n'A likely story indeed!' said the Pigeon in a tone of the deepest\r\ncontempt. 'I've seen a good many little girls in my time, but never ONE\r\nwith such a neck as that! No, no! You're a serpent; and there's no use\r\ndenying it. I suppose you'll be telling me next that you never tasted an\r\negg!'\r\n\r\n'I HAVE tasted eggs, certainly,' said Alice, who was a very truthful\r\nchild; 'but little girls eat eggs quite as much as serpents do, you\r\nknow.'\r\n\r\n'I don't believe it,' said the Pigeon; 'but if they do, why then they're\r\na kind of serpent, that's all I can say.'\r\n\r\nThis was such a new idea to Alice, that she was quite silent for a\r\nminute or two, which gave the Pigeon the opportunity of adding, 'You're\r\nlooking for eggs, I know THAT well enough; and what does it matter to me\r\nwhether you're a little girl or a serpent?'\r\n\r\n'It matters a good deal to ME,' said Alice hastily; 'but I'm not looking\r\nfor eggs, as it happens; and if I was, I shouldn't want YOURS: I don't\r\nlike them raw.'\r\n\r\n'Well, be off, then!' said the Pigeon in a sulky tone, as it settled\r\ndown again into its nest. Alice crouched down among the trees as well as\r\nshe could, for her neck kept getting entangled among the branches, and\r\nevery now and then she had to stop and untwist it. After a while she\r\nremembered that she still held the pieces of mushroom in her hands, and\r\nshe set to work very carefully, nibbling first at one and then at the\r\nother, and growing sometimes taller and sometimes shorter, until she had\r\nsucceeded in bringing herself down to her usual height.\r\n\r\nIt was so long since she had been anything near the right size, that it\r\nfelt quite strange at first; but she got used to it in a few minutes,\r\nand began talking to herself, as usual. 'Come, there's half my plan done\r\nnow! How puzzling all these changes are! I'm never sure what I'm going\r\nto be, from one minute to another! However, I've got back to my right\r\nsize: the next thing is, to get into that beautiful garden--how IS that\r\nto be done, I wonder?' As she said this, she came suddenly upon an open\r\nplace, with a little house in it about four feet high. 'Whoever lives\r\nthere,' thought Alice, 'it'll never do to come upon them THIS size: why,\r\nI should frighten them out of their wits!' So she began nibbling at the\r\nrighthand bit again, and did not venture to go near the house till she\r\nhad brought herself down to nine inches high.\r\n\r\n\r\n\r\n\r\nCHAPTER VI. Pig and Pepper\r\n\r\nFor a minute or two she stood looking at the house, and wondering what\r\nto do next, when suddenly a footman in livery came running out of the\r\nwood--(she considered him to be a footman because he was in livery:\r\notherwise, judging by his face only, she would have called him a\r\nfish)--and rapped loudly at the door with his knuckles. It was opened\r\nby another footman in livery, with a round face, and large eyes like a\r\nfrog; and both footmen, Alice noticed, had powdered hair that curled all\r\nover their heads. She felt very curious to know what it was all about,\r\nand crept a little way out of the wood to listen.\r\n\r\nThe Fish-Footman began by producing from under his arm a great letter,\r\nnearly as large as himself, and this he handed over to the other,\r\nsaying, in a solemn tone, 'For the Duchess. An invitation from the Queen\r\nto play croquet.' The Frog-Footman repeated, in the same solemn tone,\r\nonly changing the order of the words a little, 'From the Queen. An\r\ninvitation for the Duchess to play croquet.'\r\n\r\nThen they both bowed low, and their curls got entangled together.\r\n\r\nAlice laughed so much at this, that she had to run back into the\r\nwood for fear of their hearing her; and when she next peeped out the\r\nFish-Footman was gone, and the other was sitting on the ground near the\r\ndoor, staring stupidly up into the sky.\r\n\r\nAlice went timidly up to the door, and knocked.\r\n\r\n'There's no sort of use in knocking,' said the Footman, 'and that for\r\ntwo reasons. First, because I'm on the same side of the door as you\r\nare; secondly, because they're making such a noise inside, no one could\r\npossibly hear you.' And certainly there was a most extraordinary noise\r\ngoing on within--a constant howling and sneezing, and every now and then\r\na great crash, as if a dish or kettle had been broken to pieces.\r\n\r\n'Please, then,' said Alice, 'how am I to get in?'\r\n\r\n'There might be some sense in your knocking,' the Footman went on\r\nwithout attending to her, 'if we had the door between us. For instance,\r\nif you were INSIDE, you might knock, and I could let you out, you know.'\r\nHe was looking up into the sky all the time he was speaking, and this\r\nAlice thought decidedly uncivil. 'But perhaps he can't help it,' she\r\nsaid to herself; 'his eyes are so VERY nearly at the top of his head.\r\nBut at any rate he might answer questions.--How am I to get in?' she\r\nrepeated, aloud.\r\n\r\n'I shall sit here,' the Footman remarked, 'till tomorrow--'\r\n\r\nAt this moment the door of the house opened, and a large plate came\r\nskimming out, straight at the Footman's head: it just grazed his nose,\r\nand broke to pieces against one of the trees behind him.\r\n\r\n'--or next day, maybe,' the Footman continued in the same tone, exactly\r\nas if nothing had happened.\r\n\r\n'How am I to get in?' asked Alice again, in a louder tone.\r\n\r\n'ARE you to get in at all?' said the Footman. 'That's the first\r\nquestion, you know.'\r\n\r\nIt was, no doubt: only Alice did not like to be told so. 'It's really\r\ndreadful,' she muttered to herself, 'the way all the creatures argue.\r\nIt's enough to drive one crazy!'\r\n\r\nThe Footman seemed to think this a good opportunity for repeating his\r\nremark, with variations. 'I shall sit here,' he said, 'on and off, for\r\ndays and days.'\r\n\r\n'But what am I to do?' said Alice.\r\n\r\n'Anything you like,' said the Footman, and began whistling.\r\n\r\n'Oh, there's no use in talking to him,' said Alice desperately: 'he's\r\nperfectly idiotic!' And she opened the door and went in.\r\n\r\nThe door led right into a large kitchen, which was full of smoke from\r\none end to the other: the Duchess was sitting on a three-legged stool in\r\nthe middle, nursing a baby; the cook was leaning over the fire, stirring\r\na large cauldron which seemed to be full of soup.\r\n\r\n'There's certainly too much pepper in that soup!' Alice said to herself,\r\nas well as she could for sneezing.\r\n\r\nThere was certainly too much of it in the air. Even the Duchess\r\nsneezed occasionally; and as for the baby, it was sneezing and howling\r\nalternately without a moment's pause. The only things in the kitchen\r\nthat did not sneeze, were the cook, and a large cat which was sitting on\r\nthe hearth and grinning from ear to ear.\r\n\r\n'Please would you tell me,' said Alice, a little timidly, for she was\r\nnot quite sure whether it was good manners for her to speak first, 'why\r\nyour cat grins like that?'\r\n\r\n'It's a Cheshire cat,' said the Duchess, 'and that's why. Pig!'\r\n\r\nShe said the last word with such sudden violence that Alice quite\r\njumped; but she saw in another moment that it was addressed to the baby,\r\nand not to her, so she took courage, and went on again:--\r\n\r\n'I didn't know that Cheshire cats always grinned; in fact, I didn't know\r\nthat cats COULD grin.'\r\n\r\n'They all can,' said the Duchess; 'and most of 'em do.'\r\n\r\n'I don't know of any that do,' Alice said very politely, feeling quite\r\npleased to have got into a conversation.\r\n\r\n'You don't know much,' said the Duchess; 'and that's a fact.'\r\n\r\nAlice did not at all like the tone of this remark, and thought it would\r\nbe as well to introduce some other subject of conversation. While she\r\nwas trying to fix on one, the cook took the cauldron of soup off the\r\nfire, and at once set to work throwing everything within her reach at\r\nthe Duchess and the baby--the fire-irons came first; then followed a\r\nshower of saucepans, plates, and dishes. The Duchess took no notice of\r\nthem even when they hit her; and the baby was howling so much already,\r\nthat it was quite impossible to say whether the blows hurt it or not.\r\n\r\n'Oh, PLEASE mind what you're doing!' cried Alice, jumping up and down in\r\nan agony of terror. 'Oh, there goes his PRECIOUS nose'; as an unusually\r\nlarge saucepan flew close by it, and very nearly carried it off.\r\n\r\n'If everybody minded their own business,' the Duchess said in a hoarse\r\ngrowl, 'the world would go round a deal faster than it does.'\r\n\r\n'Which would NOT be an advantage,' said Alice, who felt very glad to get\r\nan opportunity of showing off a little of her knowledge. 'Just think of\r\nwhat work it would make with the day and night! You see the earth takes\r\ntwenty-four hours to turn round on its axis--'\r\n\r\n'Talking of axes,' said the Duchess, 'chop off her head!'\r\n\r\nAlice glanced rather anxiously at the cook, to see if she meant to take\r\nthe hint; but the cook was busily stirring the soup, and seemed not to\r\nbe listening, so she went on again: 'Twenty-four hours, I THINK; or is\r\nit twelve? I--'\r\n\r\n'Oh, don't bother ME,' said the Duchess; 'I never could abide figures!'\r\nAnd with that she began nursing her child again, singing a sort of\r\nlullaby to it as she did so, and giving it a violent shake at the end of\r\nevery line:\r\n\r\n   'Speak roughly to your little boy,\r\n    And beat him when he sneezes:\r\n   He only does it to annoy,\r\n    Because he knows it teases.'\r\n\r\n         CHORUS.\r\n\r\n (In which the cook and the baby joined):--\r\n\r\n       'Wow! wow! wow!'\r\n\r\nWhile the Duchess sang the second verse of the song, she kept tossing\r\nthe baby violently up and down, and the poor little thing howled so,\r\nthat Alice could hardly hear the words:--\r\n\r\n   'I speak severely to my boy,\r\n    I beat him when he sneezes;\r\n   For he can thoroughly enjoy\r\n    The pepper when he pleases!'\r\n\r\n         CHORUS.\r\n\r\n       'Wow! wow! wow!'\r\n\r\n'Here! you may nurse it a bit, if you like!' the Duchess said to Alice,\r\nflinging the baby at her as she spoke. 'I must go and get ready to play\r\ncroquet with the Queen,' and she hurried out of the room. The cook threw\r\na frying-pan after her as she went out, but it just missed her.\r\n\r\nAlice caught the baby with some difficulty, as it was a queer-shaped\r\nlittle creature, and held out its arms and legs in all directions, 'just\r\nlike a star-fish,' thought Alice. The poor little thing was snorting\r\nlike a steam-engine when she caught it, and kept doubling itself up and\r\nstraightening itself out again, so that altogether, for the first minute\r\nor two, it was as much as she could do to hold it.\r\n\r\nAs soon as she had made out the proper way of nursing it, (which was to\r\ntwist it up into a sort of knot, and then keep tight hold of its right\r\near and left foot, so as to prevent its undoing itself,) she carried\r\nit out into the open air. 'IF I don't take this child away with me,'\r\nthought Alice, 'they're sure to kill it in a day or two: wouldn't it be\r\nmurder to leave it behind?' She said the last words out loud, and the\r\nlittle thing grunted in reply (it had left off sneezing by this time).\r\n'Don't grunt,' said Alice; 'that's not at all a proper way of expressing\r\nyourself.'\r\n\r\nThe baby grunted again, and Alice looked very anxiously into its face to\r\nsee what was the matter with it. There could be no doubt that it had\r\na VERY turn-up nose, much more like a snout than a real nose; also its\r\neyes were getting extremely small for a baby: altogether Alice did not\r\nlike the look of the thing at all. 'But perhaps it was only sobbing,'\r\nshe thought, and looked into its eyes again, to see if there were any\r\ntears.\r\n\r\nNo, there were no tears. 'If you're going to turn into a pig, my dear,'\r\nsaid Alice, seriously, 'I'll have nothing more to do with you. Mind\r\nnow!' The poor little thing sobbed again (or grunted, it was impossible\r\nto say which), and they went on for some while in silence.\r\n\r\nAlice was just beginning to think to herself, 'Now, what am I to do with\r\nthis creature when I get it home?' when it grunted again, so violently,\r\nthat she looked down into its face in some alarm. This time there could\r\nbe NO mistake about it: it was neither more nor less than a pig, and she\r\nfelt that it would be quite absurd for her to carry it further.\r\n\r\nSo she set the little creature down, and felt quite relieved to see\r\nit trot away quietly into the wood. 'If it had grown up,' she said\r\nto herself, 'it would have made a dreadfully ugly child: but it makes\r\nrather a handsome pig, I think.' And she began thinking over other\r\nchildren she knew, who might do very well as pigs, and was just saying\r\nto herself, 'if one only knew the right way to change them--' when she\r\nwas a little startled by seeing the Cheshire Cat sitting on a bough of a\r\ntree a few yards off.\r\n\r\nThe Cat only grinned when it saw Alice. It looked good-natured, she\r\nthought: still it had VERY long claws and a great many teeth, so she\r\nfelt that it ought to be treated with respect.\r\n\r\n'Cheshire Puss,' she began, rather timidly, as she did not at all know\r\nwhether it would like the name: however, it only grinned a little wider.\r\n'Come, it's pleased so far,' thought Alice, and she went on. 'Would you\r\ntell me, please, which way I ought to go from here?'\r\n\r\n'That depends a good deal on where you want to get to,' said the Cat.\r\n\r\n'I don't much care where--' said Alice.\r\n\r\n'Then it doesn't matter which way you go,' said the Cat.\r\n\r\n'--so long as I get SOMEWHERE,' Alice added as an explanation.\r\n\r\n'Oh, you're sure to do that,' said the Cat, 'if you only walk long\r\nenough.'\r\n\r\nAlice felt that this could not be denied, so she tried another question.\r\n'What sort of people live about here?'\r\n\r\n'In THAT direction,' the Cat said, waving its right paw round, 'lives\r\na Hatter: and in THAT direction,' waving the other paw, 'lives a March\r\nHare. Visit either you like: they're both mad.'\r\n\r\n'But I don't want to go among mad people,' Alice remarked.\r\n\r\n'Oh, you can't help that,' said the Cat: 'we're all mad here. I'm mad.\r\nYou're mad.'\r\n\r\n'How do you know I'm mad?' said Alice.\r\n\r\n'You must be,' said the Cat, 'or you wouldn't have come here.'\r\n\r\nAlice didn't think that proved it at all; however, she went on 'And how\r\ndo you know that you're mad?'\r\n\r\n'To begin with,' said the Cat, 'a dog's not mad. You grant that?'\r\n\r\n'I suppose so,' said Alice.\r\n\r\n'Well, then,' the Cat went on, 'you see, a dog growls when it's angry,\r\nand wags its tail when it's pleased. Now I growl when I'm pleased, and\r\nwag my tail when I'm angry. Therefore I'm mad.'\r\n\r\n'I call it purring, not growling,' said Alice.\r\n\r\n'Call it what you like,' said the Cat. 'Do you play croquet with the\r\nQueen to-day?'\r\n\r\n'I should like it very much,' said Alice, 'but I haven't been invited\r\nyet.'\r\n\r\n'You'll see me there,' said the Cat, and vanished.\r\n\r\nAlice was not much surprised at this, she was getting so used to queer\r\nthings happening. While she was looking at the place where it had been,\r\nit suddenly appeared again.\r\n\r\n'By-the-bye, what became of the baby?' said the Cat. 'I'd nearly\r\nforgotten to ask.'\r\n\r\n'It turned into a pig,' Alice quietly said, just as if it had come back\r\nin a natural way.\r\n\r\n'I thought it would,' said the Cat, and vanished again.\r\n\r\nAlice waited a little, half expecting to see it again, but it did not\r\nappear, and after a minute or two she walked on in the direction in\r\nwhich the March Hare was said to live. 'I've seen hatters before,' she\r\nsaid to herself; 'the March Hare will be much the most interesting, and\r\nperhaps as this is May it won't be raving mad--at least not so mad as\r\nit was in March.' As she said this, she looked up, and there was the Cat\r\nagain, sitting on a branch of a tree.\r\n\r\n'Did you say pig, or fig?' said the Cat.\r\n\r\n'I said pig,' replied Alice; 'and I wish you wouldn't keep appearing and\r\nvanishing so suddenly: you make one quite giddy.'\r\n\r\n'All right,' said the Cat; and this time it vanished quite slowly,\r\nbeginning with the end of the tail, and ending with the grin, which\r\nremained some time after the rest of it had gone.\r\n\r\n'Well! I've often seen a cat without a grin,' thought Alice; 'but a grin\r\nwithout a cat! It's the most curious thing I ever saw in my life!'\r\n\r\nShe had not gone much farther before she came in sight of the house\r\nof the March Hare: she thought it must be the right house, because the\r\nchimneys were shaped like ears and the roof was thatched with fur. It\r\nwas so large a house, that she did not like to go nearer till she had\r\nnibbled some more of the lefthand bit of mushroom, and raised herself to\r\nabout two feet high: even then she walked up towards it rather timidly,\r\nsaying to herself 'Suppose it should be raving mad after all! I almost\r\nwish I'd gone to see the Hatter instead!'\r\n\r\n\r\n\r\n\r\nCHAPTER VII. A Mad Tea-Party\r\n\r\nThere was a table set out under a tree in front of the house, and the\r\nMarch Hare and the Hatter were having tea at it: a Dormouse was sitting\r\nbetween them, fast asleep, and the other two were using it as a\r\ncushion, resting their elbows on it, and talking over its head. 'Very\r\nuncomfortable for the Dormouse,' thought Alice; 'only, as it's asleep, I\r\nsuppose it doesn't mind.'\r\n\r\nThe table was a large one, but the three were all crowded together at\r\none corner of it: 'No room! No room!' they cried out when they saw Alice\r\ncoming. 'There's PLENTY of room!' said Alice indignantly, and she sat\r\ndown in a large arm-chair at one end of the table.\r\n\r\n'Have some wine,' the March Hare said in an encouraging tone.\r\n\r\nAlice looked all round the table, but there was nothing on it but tea.\r\n'I don't see any wine,' she remarked.\r\n\r\n'There isn't any,' said the March Hare.\r\n\r\n'Then it wasn't very civil of you to offer it,' said Alice angrily.\r\n\r\n'It wasn't very civil of you to sit down without being invited,' said\r\nthe March Hare.\r\n\r\n'I didn't know it was YOUR table,' said Alice; 'it's laid for a great\r\nmany more than three.'\r\n\r\n'Your hair wants cutting,' said the Hatter. He had been looking at Alice\r\nfor some time with great curiosity, and this was his first speech.\r\n\r\n'You should learn not to make personal remarks,' Alice said with some\r\nseverity; 'it's very rude.'\r\n\r\nThe Hatter opened his eyes very wide on hearing this; but all he SAID\r\nwas, 'Why is a raven like a writing-desk?'\r\n\r\n'Come, we shall have some fun now!' thought Alice. 'I'm glad they've\r\nbegun asking riddles.--I believe I can guess that,' she added aloud.\r\n\r\n'Do you mean that you think you can find out the answer to it?' said the\r\nMarch Hare.\r\n\r\n'Exactly so,' said Alice.\r\n\r\n'Then you should say what you mean,' the March Hare went on.\r\n\r\n'I do,' Alice hastily replied; 'at least--at least I mean what I\r\nsay--that's the same thing, you know.'\r\n\r\n'Not the same thing a bit!' said the Hatter. 'You might just as well say\r\nthat \"I see what I eat\" is the same thing as \"I eat what I see\"!'\r\n\r\n'You might just as well say,' added the March Hare, 'that \"I like what I\r\nget\" is the same thing as \"I get what I like\"!'\r\n\r\n'You might just as well say,' added the Dormouse, who seemed to be\r\ntalking in his sleep, 'that \"I breathe when I sleep\" is the same thing\r\nas \"I sleep when I breathe\"!'\r\n\r\n'It IS the same thing with you,' said the Hatter, and here the\r\nconversation dropped, and the party sat silent for a minute, while Alice\r\nthought over all she could remember about ravens and writing-desks,\r\nwhich wasn't much.\r\n\r\nThe Hatter was the first to break the silence. 'What day of the month\r\nis it?' he said, turning to Alice: he had taken his watch out of his\r\npocket, and was looking at it uneasily, shaking it every now and then,\r\nand holding it to his ear.\r\n\r\nAlice considered a little, and then said 'The fourth.'\r\n\r\n'Two days wrong!' sighed the Hatter. 'I told you butter wouldn't suit\r\nthe works!' he added looking angrily at the March Hare.\r\n\r\n'It was the BEST butter,' the March Hare meekly replied.\r\n\r\n'Yes, but some crumbs must have got in as well,' the Hatter grumbled:\r\n'you shouldn't have put it in with the bread-knife.'\r\n\r\nThe March Hare took the watch and looked at it gloomily: then he dipped\r\nit into his cup of tea, and looked at it again: but he could think of\r\nnothing better to say than his first remark, 'It was the BEST butter,\r\nyou know.'\r\n\r\nAlice had been looking over his shoulder with some curiosity. 'What a\r\nfunny watch!' she remarked. 'It tells the day of the month, and doesn't\r\ntell what o'clock it is!'\r\n\r\n'Why should it?' muttered the Hatter. 'Does YOUR watch tell you what\r\nyear it is?'\r\n\r\n'Of course not,' Alice replied very readily: 'but that's because it\r\nstays the same year for such a long time together.'\r\n\r\n'Which is just the case with MINE,' said the Hatter.\r\n\r\nAlice felt dreadfully puzzled. The Hatter's remark seemed to have no\r\nsort of meaning in it, and yet it was certainly English. 'I don't quite\r\nunderstand you,' she said, as politely as she could.\r\n\r\n'The Dormouse is asleep again,' said the Hatter, and he poured a little\r\nhot tea upon its nose.\r\n\r\nThe Dormouse shook its head impatiently, and said, without opening its\r\neyes, 'Of course, of course; just what I was going to remark myself.'\r\n\r\n'Have you guessed the riddle yet?' the Hatter said, turning to Alice\r\nagain.\r\n\r\n'No, I give it up,' Alice replied: 'what's the answer?'\r\n\r\n'I haven't the slightest idea,' said the Hatter.\r\n\r\n'Nor I,' said the March Hare.\r\n\r\nAlice sighed wearily. 'I think you might do something better with the\r\ntime,' she said, 'than waste it in asking riddles that have no answers.'\r\n\r\n'If you knew Time as well as I do,' said the Hatter, 'you wouldn't talk\r\nabout wasting IT. It's HIM.'\r\n\r\n'I don't know what you mean,' said Alice.\r\n\r\n'Of course you don't!' the Hatter said, tossing his head contemptuously.\r\n'I dare say you never even spoke to Time!'\r\n\r\n'Perhaps not,' Alice cautiously replied: 'but I know I have to beat time\r\nwhen I learn music.'\r\n\r\n'Ah! that accounts for it,' said the Hatter. 'He won't stand beating.\r\nNow, if you only kept on good terms with him, he'd do almost anything\r\nyou liked with the clock. For instance, suppose it were nine o'clock in\r\nthe morning, just time to begin lessons: you'd only have to whisper a\r\nhint to Time, and round goes the clock in a twinkling! Half-past one,\r\ntime for dinner!'\r\n\r\n('I only wish it was,' the March Hare said to itself in a whisper.)\r\n\r\n'That would be grand, certainly,' said Alice thoughtfully: 'but then--I\r\nshouldn't be hungry for it, you know.'\r\n\r\n'Not at first, perhaps,' said the Hatter: 'but you could keep it to\r\nhalf-past one as long as you liked.'\r\n\r\n'Is that the way YOU manage?' Alice asked.\r\n\r\nThe Hatter shook his head mournfully. 'Not I!' he replied. 'We\r\nquarrelled last March--just before HE went mad, you know--' (pointing\r\nwith his tea spoon at the March Hare,) '--it was at the great concert\r\ngiven by the Queen of Hearts, and I had to sing\r\n\r\n     \"Twinkle, twinkle, little bat!\r\n     How I wonder what you're at!\"\r\n\r\nYou know the song, perhaps?'\r\n\r\n'I've heard something like it,' said Alice.\r\n\r\n'It goes on, you know,' the Hatter continued, 'in this way:--\r\n\r\n     \"Up above the world you fly,\r\n     Like a tea-tray in the sky.\r\n         Twinkle, twinkle--\"'\r\n\r\nHere the Dormouse shook itself, and began singing in its sleep 'Twinkle,\r\ntwinkle, twinkle, twinkle--' and went on so long that they had to pinch\r\nit to make it stop.\r\n\r\n'Well, I'd hardly finished the first verse,' said the Hatter, 'when the\r\nQueen jumped up and bawled out, \"He's murdering the time! Off with his\r\nhead!\"'\r\n\r\n'How dreadfully savage!' exclaimed Alice.\r\n\r\n'And ever since that,' the Hatter went on in a mournful tone, 'he won't\r\ndo a thing I ask! It's always six o'clock now.'\r\n\r\nA bright idea came into Alice's head. 'Is that the reason so many\r\ntea-things are put out here?' she asked.\r\n\r\n'Yes, that's it,' said the Hatter with a sigh: 'it's always tea-time,\r\nand we've no time to wash the things between whiles.'\r\n\r\n'Then you keep moving round, I suppose?' said Alice.\r\n\r\n'Exactly so,' said the Hatter: 'as the things get used up.'\r\n\r\n'But what happens when you come to the beginning again?' Alice ventured\r\nto ask.\r\n\r\n'Suppose we change the subject,' the March Hare interrupted, yawning.\r\n'I'm getting tired of this. I vote the young lady tells us a story.'\r\n\r\n'I'm afraid I don't know one,' said Alice, rather alarmed at the\r\nproposal.\r\n\r\n'Then the Dormouse shall!' they both cried. 'Wake up, Dormouse!' And\r\nthey pinched it on both sides at once.\r\n\r\nThe Dormouse slowly opened his eyes. 'I wasn't asleep,' he said in a\r\nhoarse, feeble voice: 'I heard every word you fellows were saying.'\r\n\r\n'Tell us a story!' said the March Hare.\r\n\r\n'Yes, please do!' pleaded Alice.\r\n\r\n'And be quick about it,' added the Hatter, 'or you'll be asleep again\r\nbefore it's done.'\r\n\r\n'Once upon a time there were three little sisters,' the Dormouse began\r\nin a great hurry; 'and their names were Elsie, Lacie, and Tillie; and\r\nthey lived at the bottom of a well--'\r\n\r\n'What did they live on?' said Alice, who always took a great interest in\r\nquestions of eating and drinking.\r\n\r\n'They lived on treacle,' said the Dormouse, after thinking a minute or\r\ntwo.\r\n\r\n'They couldn't have done that, you know,' Alice gently remarked; 'they'd\r\nhave been ill.'\r\n\r\n'So they were,' said the Dormouse; 'VERY ill.'\r\n\r\nAlice tried to fancy to herself what such an extraordinary ways of\r\nliving would be like, but it puzzled her too much, so she went on: 'But\r\nwhy did they live at the bottom of a well?'\r\n\r\n'Take some more tea,' the March Hare said to Alice, very earnestly.\r\n\r\n'I've had nothing yet,' Alice replied in an offended tone, 'so I can't\r\ntake more.'\r\n\r\n'You mean you can't take LESS,' said the Hatter: 'it's very easy to take\r\nMORE than nothing.'\r\n\r\n'Nobody asked YOUR opinion,' said Alice.\r\n\r\n'Who's making personal remarks now?' the Hatter asked triumphantly.\r\n\r\nAlice did not quite know what to say to this: so she helped herself\r\nto some tea and bread-and-butter, and then turned to the Dormouse, and\r\nrepeated her question. 'Why did they live at the bottom of a well?'\r\n\r\nThe Dormouse again took a minute or two to think about it, and then\r\nsaid, 'It was a treacle-well.'\r\n\r\n'There's no such thing!' Alice was beginning very angrily, but the\r\nHatter and the March Hare went 'Sh! sh!' and the Dormouse sulkily\r\nremarked, 'If you can't be civil, you'd better finish the story for\r\nyourself.'\r\n\r\n'No, please go on!' Alice said very humbly; 'I won't interrupt again. I\r\ndare say there may be ONE.'\r\n\r\n'One, indeed!' said the Dormouse indignantly. However, he consented to\r\n"
  },
  {
    "path": "examples/input/alice_2.txt",
    "content": "go on. 'And so these three little sisters--they were learning to draw,\r\nyou know--'\r\n\r\n'What did they draw?' said Alice, quite forgetting her promise.\r\n\r\n'Treacle,' said the Dormouse, without considering at all this time.\r\n\r\n'I want a clean cup,' interrupted the Hatter: 'let's all move one place\r\non.'\r\n\r\nHe moved on as he spoke, and the Dormouse followed him: the March Hare\r\nmoved into the Dormouse's place, and Alice rather unwillingly took\r\nthe place of the March Hare. The Hatter was the only one who got any\r\nadvantage from the change: and Alice was a good deal worse off than\r\nbefore, as the March Hare had just upset the milk-jug into his plate.\r\n\r\nAlice did not wish to offend the Dormouse again, so she began very\r\ncautiously: 'But I don't understand. Where did they draw the treacle\r\nfrom?'\r\n\r\n'You can draw water out of a water-well,' said the Hatter; 'so I should\r\nthink you could draw treacle out of a treacle-well--eh, stupid?'\r\n\r\n'But they were IN the well,' Alice said to the Dormouse, not choosing to\r\nnotice this last remark.\r\n\r\n'Of course they were', said the Dormouse; '--well in.'\r\n\r\nThis answer so confused poor Alice, that she let the Dormouse go on for\r\nsome time without interrupting it.\r\n\r\n'They were learning to draw,' the Dormouse went on, yawning and rubbing\r\nits eyes, for it was getting very sleepy; 'and they drew all manner of\r\nthings--everything that begins with an M--'\r\n\r\n'Why with an M?' said Alice.\r\n\r\n'Why not?' said the March Hare.\r\n\r\nAlice was silent.\r\n\r\nThe Dormouse had closed its eyes by this time, and was going off into\r\na doze; but, on being pinched by the Hatter, it woke up again with\r\na little shriek, and went on: '--that begins with an M, such as\r\nmouse-traps, and the moon, and memory, and muchness--you know you say\r\nthings are \"much of a muchness\"--did you ever see such a thing as a\r\ndrawing of a muchness?'\r\n\r\n'Really, now you ask me,' said Alice, very much confused, 'I don't\r\nthink--'\r\n\r\n'Then you shouldn't talk,' said the Hatter.\r\n\r\nThis piece of rudeness was more than Alice could bear: she got up in\r\ngreat disgust, and walked off; the Dormouse fell asleep instantly, and\r\nneither of the others took the least notice of her going, though she\r\nlooked back once or twice, half hoping that they would call after her:\r\nthe last time she saw them, they were trying to put the Dormouse into\r\nthe teapot.\r\n\r\n'At any rate I'll never go THERE again!' said Alice as she picked her\r\nway through the wood. 'It's the stupidest tea-party I ever was at in all\r\nmy life!'\r\n\r\nJust as she said this, she noticed that one of the trees had a door\r\nleading right into it. 'That's very curious!' she thought. 'But\r\neverything's curious today. I think I may as well go in at once.' And in\r\nshe went.\r\n\r\nOnce more she found herself in the long hall, and close to the little\r\nglass table. 'Now, I'll manage better this time,' she said to herself,\r\nand began by taking the little golden key, and unlocking the door that\r\nled into the garden. Then she went to work nibbling at the mushroom (she\r\nhad kept a piece of it in her pocket) till she was about a foot high:\r\nthen she walked down the little passage: and THEN--she found herself at\r\nlast in the beautiful garden, among the bright flower-beds and the cool\r\nfountains.\r\n\r\n\r\n\r\n\r\nCHAPTER VIII. The Queen's Croquet-Ground\r\n\r\nA large rose-tree stood near the entrance of the garden: the roses\r\ngrowing on it were white, but there were three gardeners at it, busily\r\npainting them red. Alice thought this a very curious thing, and she went\r\nnearer to watch them, and just as she came up to them she heard one of\r\nthem say, 'Look out now, Five! Don't go splashing paint over me like\r\nthat!'\r\n\r\n'I couldn't help it,' said Five, in a sulky tone; 'Seven jogged my\r\nelbow.'\r\n\r\nOn which Seven looked up and said, 'That's right, Five! Always lay the\r\nblame on others!'\r\n\r\n'YOU'D better not talk!' said Five. 'I heard the Queen say only\r\nyesterday you deserved to be beheaded!'\r\n\r\n'What for?' said the one who had spoken first.\r\n\r\n'That's none of YOUR business, Two!' said Seven.\r\n\r\n'Yes, it IS his business!' said Five, 'and I'll tell him--it was for\r\nbringing the cook tulip-roots instead of onions.'\r\n\r\nSeven flung down his brush, and had just begun 'Well, of all the unjust\r\nthings--' when his eye chanced to fall upon Alice, as she stood watching\r\nthem, and he checked himself suddenly: the others looked round also, and\r\nall of them bowed low.\r\n\r\n'Would you tell me,' said Alice, a little timidly, 'why you are painting\r\nthose roses?'\r\n\r\nFive and Seven said nothing, but looked at Two. Two began in a low\r\nvoice, 'Why the fact is, you see, Miss, this here ought to have been a\r\nRED rose-tree, and we put a white one in by mistake; and if the Queen\r\nwas to find it out, we should all have our heads cut off, you know.\r\nSo you see, Miss, we're doing our best, afore she comes, to--' At this\r\nmoment Five, who had been anxiously looking across the garden, called\r\nout 'The Queen! The Queen!' and the three gardeners instantly threw\r\nthemselves flat upon their faces. There was a sound of many footsteps,\r\nand Alice looked round, eager to see the Queen.\r\n\r\nFirst came ten soldiers carrying clubs; these were all shaped like\r\nthe three gardeners, oblong and flat, with their hands and feet at the\r\ncorners: next the ten courtiers; these were ornamented all over with\r\ndiamonds, and walked two and two, as the soldiers did. After these came\r\nthe royal children; there were ten of them, and the little dears came\r\njumping merrily along hand in hand, in couples: they were all ornamented\r\nwith hearts. Next came the guests, mostly Kings and Queens, and among\r\nthem Alice recognised the White Rabbit: it was talking in a hurried\r\nnervous manner, smiling at everything that was said, and went by without\r\nnoticing her. Then followed the Knave of Hearts, carrying the King's\r\ncrown on a crimson velvet cushion; and, last of all this grand\r\nprocession, came THE KING AND QUEEN OF HEARTS.\r\n\r\nAlice was rather doubtful whether she ought not to lie down on her face\r\nlike the three gardeners, but she could not remember ever having heard\r\nof such a rule at processions; 'and besides, what would be the use of\r\na procession,' thought she, 'if people had all to lie down upon their\r\nfaces, so that they couldn't see it?' So she stood still where she was,\r\nand waited.\r\n\r\nWhen the procession came opposite to Alice, they all stopped and looked\r\nat her, and the Queen said severely 'Who is this?' She said it to the\r\nKnave of Hearts, who only bowed and smiled in reply.\r\n\r\n'Idiot!' said the Queen, tossing her head impatiently; and, turning to\r\nAlice, she went on, 'What's your name, child?'\r\n\r\n'My name is Alice, so please your Majesty,' said Alice very politely;\r\nbut she added, to herself, 'Why, they're only a pack of cards, after\r\nall. I needn't be afraid of them!'\r\n\r\n'And who are THESE?' said the Queen, pointing to the three gardeners who\r\nwere lying round the rosetree; for, you see, as they were lying on their\r\nfaces, and the pattern on their backs was the same as the rest of the\r\npack, she could not tell whether they were gardeners, or soldiers, or\r\ncourtiers, or three of her own children.\r\n\r\n'How should I know?' said Alice, surprised at her own courage. 'It's no\r\nbusiness of MINE.'\r\n\r\nThe Queen turned crimson with fury, and, after glaring at her for a\r\nmoment like a wild beast, screamed 'Off with her head! Off--'\r\n\r\n'Nonsense!' said Alice, very loudly and decidedly, and the Queen was\r\nsilent.\r\n\r\nThe King laid his hand upon her arm, and timidly said 'Consider, my\r\ndear: she is only a child!'\r\n\r\nThe Queen turned angrily away from him, and said to the Knave 'Turn them\r\nover!'\r\n\r\nThe Knave did so, very carefully, with one foot.\r\n\r\n'Get up!' said the Queen, in a shrill, loud voice, and the three\r\ngardeners instantly jumped up, and began bowing to the King, the Queen,\r\nthe royal children, and everybody else.\r\n\r\n'Leave off that!' screamed the Queen. 'You make me giddy.' And then,\r\nturning to the rose-tree, she went on, 'What HAVE you been doing here?'\r\n\r\n'May it please your Majesty,' said Two, in a very humble tone, going\r\ndown on one knee as he spoke, 'we were trying--'\r\n\r\n'I see!' said the Queen, who had meanwhile been examining the roses.\r\n'Off with their heads!' and the procession moved on, three of the\r\nsoldiers remaining behind to execute the unfortunate gardeners, who ran\r\nto Alice for protection.\r\n\r\n'You shan't be beheaded!' said Alice, and she put them into a large\r\nflower-pot that stood near. The three soldiers wandered about for a\r\nminute or two, looking for them, and then quietly marched off after the\r\nothers.\r\n\r\n'Are their heads off?' shouted the Queen.\r\n\r\n'Their heads are gone, if it please your Majesty!' the soldiers shouted\r\nin reply.\r\n\r\n'That's right!' shouted the Queen. 'Can you play croquet?'\r\n\r\nThe soldiers were silent, and looked at Alice, as the question was\r\nevidently meant for her.\r\n\r\n'Yes!' shouted Alice.\r\n\r\n'Come on, then!' roared the Queen, and Alice joined the procession,\r\nwondering very much what would happen next.\r\n\r\n'It's--it's a very fine day!' said a timid voice at her side. She was\r\nwalking by the White Rabbit, who was peeping anxiously into her face.\r\n\r\n'Very,' said Alice: '--where's the Duchess?'\r\n\r\n'Hush! Hush!' said the Rabbit in a low, hurried tone. He looked\r\nanxiously over his shoulder as he spoke, and then raised himself upon\r\ntiptoe, put his mouth close to her ear, and whispered 'She's under\r\nsentence of execution.'\r\n\r\n'What for?' said Alice.\r\n\r\n'Did you say \"What a pity!\"?' the Rabbit asked.\r\n\r\n'No, I didn't,' said Alice: 'I don't think it's at all a pity. I said\r\n\"What for?\"'\r\n\r\n'She boxed the Queen's ears--' the Rabbit began. Alice gave a little\r\nscream of laughter. 'Oh, hush!' the Rabbit whispered in a frightened\r\ntone. 'The Queen will hear you! You see, she came rather late, and the\r\nQueen said--'\r\n\r\n'Get to your places!' shouted the Queen in a voice of thunder, and\r\npeople began running about in all directions, tumbling up against each\r\nother; however, they got settled down in a minute or two, and the game\r\nbegan. Alice thought she had never seen such a curious croquet-ground in\r\nher life; it was all ridges and furrows; the balls were live hedgehogs,\r\nthe mallets live flamingoes, and the soldiers had to double themselves\r\nup and to stand on their hands and feet, to make the arches.\r\n\r\nThe chief difficulty Alice found at first was in managing her flamingo:\r\nshe succeeded in getting its body tucked away, comfortably enough, under\r\nher arm, with its legs hanging down, but generally, just as she had got\r\nits neck nicely straightened out, and was going to give the hedgehog a\r\nblow with its head, it WOULD twist itself round and look up in her face,\r\nwith such a puzzled expression that she could not help bursting out\r\nlaughing: and when she had got its head down, and was going to begin\r\nagain, it was very provoking to find that the hedgehog had unrolled\r\nitself, and was in the act of crawling away: besides all this, there was\r\ngenerally a ridge or furrow in the way wherever she wanted to send the\r\nhedgehog to, and, as the doubled-up soldiers were always getting up\r\nand walking off to other parts of the ground, Alice soon came to the\r\nconclusion that it was a very difficult game indeed.\r\n\r\nThe players all played at once without waiting for turns, quarrelling\r\nall the while, and fighting for the hedgehogs; and in a very short\r\ntime the Queen was in a furious passion, and went stamping about, and\r\nshouting 'Off with his head!' or 'Off with her head!' about once in a\r\nminute.\r\n\r\nAlice began to feel very uneasy: to be sure, she had not as yet had any\r\ndispute with the Queen, but she knew that it might happen any minute,\r\n'and then,' thought she, 'what would become of me? They're dreadfully\r\nfond of beheading people here; the great wonder is, that there's any one\r\nleft alive!'\r\n\r\nShe was looking about for some way of escape, and wondering whether she\r\ncould get away without being seen, when she noticed a curious appearance\r\nin the air: it puzzled her very much at first, but, after watching it\r\na minute or two, she made it out to be a grin, and she said to herself\r\n'It's the Cheshire Cat: now I shall have somebody to talk to.'\r\n\r\n'How are you getting on?' said the Cat, as soon as there was mouth\r\nenough for it to speak with.\r\n\r\nAlice waited till the eyes appeared, and then nodded. 'It's no use\r\nspeaking to it,' she thought, 'till its ears have come, or at least one\r\nof them.' In another minute the whole head appeared, and then Alice put\r\ndown her flamingo, and began an account of the game, feeling very glad\r\nshe had someone to listen to her. The Cat seemed to think that there was\r\nenough of it now in sight, and no more of it appeared.\r\n\r\n'I don't think they play at all fairly,' Alice began, in rather a\r\ncomplaining tone, 'and they all quarrel so dreadfully one can't hear\r\noneself speak--and they don't seem to have any rules in particular;\r\nat least, if there are, nobody attends to them--and you've no idea how\r\nconfusing it is all the things being alive; for instance, there's the\r\narch I've got to go through next walking about at the other end of the\r\nground--and I should have croqueted the Queen's hedgehog just now, only\r\nit ran away when it saw mine coming!'\r\n\r\n'How do you like the Queen?' said the Cat in a low voice.\r\n\r\n'Not at all,' said Alice: 'she's so extremely--' Just then she noticed\r\nthat the Queen was close behind her, listening: so she went on,\r\n'--likely to win, that it's hardly worth while finishing the game.'\r\n\r\nThe Queen smiled and passed on.\r\n\r\n'Who ARE you talking to?' said the King, going up to Alice, and looking\r\nat the Cat's head with great curiosity.\r\n\r\n'It's a friend of mine--a Cheshire Cat,' said Alice: 'allow me to\r\nintroduce it.'\r\n\r\n'I don't like the look of it at all,' said the King: 'however, it may\r\nkiss my hand if it likes.'\r\n\r\n'I'd rather not,' the Cat remarked.\r\n\r\n'Don't be impertinent,' said the King, 'and don't look at me like that!'\r\nHe got behind Alice as he spoke.\r\n\r\n'A cat may look at a king,' said Alice. 'I've read that in some book,\r\nbut I don't remember where.'\r\n\r\n'Well, it must be removed,' said the King very decidedly, and he called\r\nthe Queen, who was passing at the moment, 'My dear! I wish you would\r\nhave this cat removed!'\r\n\r\nThe Queen had only one way of settling all difficulties, great or small.\r\n'Off with his head!' she said, without even looking round.\r\n\r\n'I'll fetch the executioner myself,' said the King eagerly, and he\r\nhurried off.\r\n\r\nAlice thought she might as well go back, and see how the game was going\r\non, as she heard the Queen's voice in the distance, screaming with\r\npassion. She had already heard her sentence three of the players to be\r\nexecuted for having missed their turns, and she did not like the look\r\nof things at all, as the game was in such confusion that she never knew\r\nwhether it was her turn or not. So she went in search of her hedgehog.\r\n\r\nThe hedgehog was engaged in a fight with another hedgehog, which seemed\r\nto Alice an excellent opportunity for croqueting one of them with the\r\nother: the only difficulty was, that her flamingo was gone across to the\r\nother side of the garden, where Alice could see it trying in a helpless\r\nsort of way to fly up into a tree.\r\n\r\nBy the time she had caught the flamingo and brought it back, the fight\r\nwas over, and both the hedgehogs were out of sight: 'but it doesn't\r\nmatter much,' thought Alice, 'as all the arches are gone from this side\r\nof the ground.' So she tucked it away under her arm, that it might not\r\nescape again, and went back for a little more conversation with her\r\nfriend.\r\n\r\nWhen she got back to the Cheshire Cat, she was surprised to find quite a\r\nlarge crowd collected round it: there was a dispute going on between\r\nthe executioner, the King, and the Queen, who were all talking at once,\r\nwhile all the rest were quite silent, and looked very uncomfortable.\r\n\r\nThe moment Alice appeared, she was appealed to by all three to settle\r\nthe question, and they repeated their arguments to her, though, as they\r\nall spoke at once, she found it very hard indeed to make out exactly\r\nwhat they said.\r\n\r\nThe executioner's argument was, that you couldn't cut off a head unless\r\nthere was a body to cut it off from: that he had never had to do such a\r\nthing before, and he wasn't going to begin at HIS time of life.\r\n\r\nThe King's argument was, that anything that had a head could be\r\nbeheaded, and that you weren't to talk nonsense.\r\n\r\nThe Queen's argument was, that if something wasn't done about it in less\r\nthan no time she'd have everybody executed, all round. (It was this last\r\nremark that had made the whole party look so grave and anxious.)\r\n\r\nAlice could think of nothing else to say but 'It belongs to the Duchess:\r\nyou'd better ask HER about it.'\r\n\r\n'She's in prison,' the Queen said to the executioner: 'fetch her here.'\r\nAnd the executioner went off like an arrow.\r\n\r\n The Cat's head began fading away the moment he was gone, and,\r\nby the time he had come back with the Duchess, it had entirely\r\ndisappeared; so the King and the executioner ran wildly up and down\r\nlooking for it, while the rest of the party went back to the game.\r\n\r\n\r\n\r\n\r\nCHAPTER IX. The Mock Turtle's Story\r\n\r\n'You can't think how glad I am to see you again, you dear old thing!'\r\nsaid the Duchess, as she tucked her arm affectionately into Alice's, and\r\nthey walked off together.\r\n\r\nAlice was very glad to find her in such a pleasant temper, and thought\r\nto herself that perhaps it was only the pepper that had made her so\r\nsavage when they met in the kitchen.\r\n\r\n'When I'M a Duchess,' she said to herself, (not in a very hopeful tone\r\nthough), 'I won't have any pepper in my kitchen AT ALL. Soup does very\r\nwell without--Maybe it's always pepper that makes people hot-tempered,'\r\nshe went on, very much pleased at having found out a new kind of\r\nrule, 'and vinegar that makes them sour--and camomile that makes\r\nthem bitter--and--and barley-sugar and such things that make children\r\nsweet-tempered. I only wish people knew that: then they wouldn't be so\r\nstingy about it, you know--'\r\n\r\nShe had quite forgotten the Duchess by this time, and was a little\r\nstartled when she heard her voice close to her ear. 'You're thinking\r\nabout something, my dear, and that makes you forget to talk. I can't\r\ntell you just now what the moral of that is, but I shall remember it in\r\na bit.'\r\n\r\n'Perhaps it hasn't one,' Alice ventured to remark.\r\n\r\n'Tut, tut, child!' said the Duchess. 'Everything's got a moral, if only\r\nyou can find it.' And she squeezed herself up closer to Alice's side as\r\nshe spoke.\r\n\r\nAlice did not much like keeping so close to her: first, because the\r\nDuchess was VERY ugly; and secondly, because she was exactly the\r\nright height to rest her chin upon Alice's shoulder, and it was an\r\nuncomfortably sharp chin. However, she did not like to be rude, so she\r\nbore it as well as she could.\r\n\r\n'The game's going on rather better now,' she said, by way of keeping up\r\nthe conversation a little.\r\n\r\n''Tis so,' said the Duchess: 'and the moral of that is--\"Oh, 'tis love,\r\n'tis love, that makes the world go round!\"'\r\n\r\n'Somebody said,' Alice whispered, 'that it's done by everybody minding\r\ntheir own business!'\r\n\r\n'Ah, well! It means much the same thing,' said the Duchess, digging her\r\nsharp little chin into Alice's shoulder as she added, 'and the moral\r\nof THAT is--\"Take care of the sense, and the sounds will take care of\r\nthemselves.\"'\r\n\r\n'How fond she is of finding morals in things!' Alice thought to herself.\r\n\r\n'I dare say you're wondering why I don't put my arm round your waist,'\r\nthe Duchess said after a pause: 'the reason is, that I'm doubtful about\r\nthe temper of your flamingo. Shall I try the experiment?'\r\n\r\n'HE might bite,' Alice cautiously replied, not feeling at all anxious to\r\nhave the experiment tried.\r\n\r\n'Very true,' said the Duchess: 'flamingoes and mustard both bite. And\r\nthe moral of that is--\"Birds of a feather flock together.\"'\r\n\r\n'Only mustard isn't a bird,' Alice remarked.\r\n\r\n'Right, as usual,' said the Duchess: 'what a clear way you have of\r\nputting things!'\r\n\r\n'It's a mineral, I THINK,' said Alice.\r\n\r\n'Of course it is,' said the Duchess, who seemed ready to agree to\r\neverything that Alice said; 'there's a large mustard-mine near here. And\r\nthe moral of that is--\"The more there is of mine, the less there is of\r\nyours.\"'\r\n\r\n'Oh, I know!' exclaimed Alice, who had not attended to this last remark,\r\n'it's a vegetable. It doesn't look like one, but it is.'\r\n\r\n'I quite agree with you,' said the Duchess; 'and the moral of that\r\nis--\"Be what you would seem to be\"--or if you'd like it put more\r\nsimply--\"Never imagine yourself not to be otherwise than what it might\r\nappear to others that what you were or might have been was not otherwise\r\nthan what you had been would have appeared to them to be otherwise.\"'\r\n\r\n'I think I should understand that better,' Alice said very politely, 'if\r\nI had it written down: but I can't quite follow it as you say it.'\r\n\r\n'That's nothing to what I could say if I chose,' the Duchess replied, in\r\na pleased tone.\r\n\r\n'Pray don't trouble yourself to say it any longer than that,' said\r\nAlice.\r\n\r\n'Oh, don't talk about trouble!' said the Duchess. 'I make you a present\r\nof everything I've said as yet.'\r\n\r\n'A cheap sort of present!' thought Alice. 'I'm glad they don't give\r\nbirthday presents like that!' But she did not venture to say it out\r\nloud.\r\n\r\n'Thinking again?' the Duchess asked, with another dig of her sharp\r\nlittle chin.\r\n\r\n'I've a right to think,' said Alice sharply, for she was beginning to\r\nfeel a little worried.\r\n\r\n'Just about as much right,' said the Duchess, 'as pigs have to fly; and\r\nthe m--'\r\n\r\nBut here, to Alice's great surprise, the Duchess's voice died away, even\r\nin the middle of her favourite word 'moral,' and the arm that was linked\r\ninto hers began to tremble. Alice looked up, and there stood the Queen\r\nin front of them, with her arms folded, frowning like a thunderstorm.\r\n\r\n'A fine day, your Majesty!' the Duchess began in a low, weak voice.\r\n\r\n'Now, I give you fair warning,' shouted the Queen, stamping on the\r\nground as she spoke; 'either you or your head must be off, and that in\r\nabout half no time! Take your choice!'\r\n\r\nThe Duchess took her choice, and was gone in a moment.\r\n\r\n'Let's go on with the game,' the Queen said to Alice; and Alice was\r\ntoo much frightened to say a word, but slowly followed her back to the\r\ncroquet-ground.\r\n\r\nThe other guests had taken advantage of the Queen's absence, and were\r\nresting in the shade: however, the moment they saw her, they hurried\r\nback to the game, the Queen merely remarking that a moment's delay would\r\ncost them their lives.\r\n\r\nAll the time they were playing the Queen never left off quarrelling with\r\nthe other players, and shouting 'Off with his head!' or 'Off with her\r\nhead!' Those whom she sentenced were taken into custody by the soldiers,\r\nwho of course had to leave off being arches to do this, so that by\r\nthe end of half an hour or so there were no arches left, and all the\r\nplayers, except the King, the Queen, and Alice, were in custody and\r\nunder sentence of execution.\r\n\r\nThen the Queen left off, quite out of breath, and said to Alice, 'Have\r\nyou seen the Mock Turtle yet?'\r\n\r\n'No,' said Alice. 'I don't even know what a Mock Turtle is.'\r\n\r\n'It's the thing Mock Turtle Soup is made from,' said the Queen.\r\n\r\n'I never saw one, or heard of one,' said Alice.\r\n\r\n'Come on, then,' said the Queen, 'and he shall tell you his history,'\r\n\r\nAs they walked off together, Alice heard the King say in a low voice,\r\nto the company generally, 'You are all pardoned.' 'Come, THAT'S a good\r\nthing!' she said to herself, for she had felt quite unhappy at the\r\nnumber of executions the Queen had ordered.\r\n\r\nThey very soon came upon a Gryphon, lying fast asleep in the sun.\r\n(IF you don't know what a Gryphon is, look at the picture.) 'Up, lazy\r\nthing!' said the Queen, 'and take this young lady to see the Mock\r\nTurtle, and to hear his history. I must go back and see after some\r\nexecutions I have ordered'; and she walked off, leaving Alice alone with\r\nthe Gryphon. Alice did not quite like the look of the creature, but on\r\nthe whole she thought it would be quite as safe to stay with it as to go\r\nafter that savage Queen: so she waited.\r\n\r\nThe Gryphon sat up and rubbed its eyes: then it watched the Queen till\r\nshe was out of sight: then it chuckled. 'What fun!' said the Gryphon,\r\nhalf to itself, half to Alice.\r\n\r\n'What IS the fun?' said Alice.\r\n\r\n'Why, SHE,' said the Gryphon. 'It's all her fancy, that: they never\r\nexecutes nobody, you know. Come on!'\r\n\r\n'Everybody says \"come on!\" here,' thought Alice, as she went slowly\r\nafter it: 'I never was so ordered about in all my life, never!'\r\n\r\nThey had not gone far before they saw the Mock Turtle in the distance,\r\nsitting sad and lonely on a little ledge of rock, and, as they came\r\nnearer, Alice could hear him sighing as if his heart would break. She\r\npitied him deeply. 'What is his sorrow?' she asked the Gryphon, and the\r\nGryphon answered, very nearly in the same words as before, 'It's all his\r\nfancy, that: he hasn't got no sorrow, you know. Come on!'\r\n\r\nSo they went up to the Mock Turtle, who looked at them with large eyes\r\nfull of tears, but said nothing.\r\n\r\n'This here young lady,' said the Gryphon, 'she wants for to know your\r\nhistory, she do.'\r\n\r\n'I'll tell it her,' said the Mock Turtle in a deep, hollow tone: 'sit\r\ndown, both of you, and don't speak a word till I've finished.'\r\n\r\nSo they sat down, and nobody spoke for some minutes. Alice thought to\r\nherself, 'I don't see how he can EVEN finish, if he doesn't begin.' But\r\nshe waited patiently.\r\n\r\n'Once,' said the Mock Turtle at last, with a deep sigh, 'I was a real\r\nTurtle.'\r\n\r\nThese words were followed by a very long silence, broken only by an\r\noccasional exclamation of 'Hjckrrh!' from the Gryphon, and the constant\r\nheavy sobbing of the Mock Turtle. Alice was very nearly getting up and\r\nsaying, 'Thank you, sir, for your interesting story,' but she could\r\nnot help thinking there MUST be more to come, so she sat still and said\r\nnothing.\r\n\r\n'When we were little,' the Mock Turtle went on at last, more calmly,\r\nthough still sobbing a little now and then, 'we went to school in the\r\nsea. The master was an old Turtle--we used to call him Tortoise--'\r\n\r\n'Why did you call him Tortoise, if he wasn't one?' Alice asked.\r\n\r\n'We called him Tortoise because he taught us,' said the Mock Turtle\r\nangrily: 'really you are very dull!'\r\n\r\n'You ought to be ashamed of yourself for asking such a simple question,'\r\nadded the Gryphon; and then they both sat silent and looked at poor\r\nAlice, who felt ready to sink into the earth. At last the Gryphon said\r\nto the Mock Turtle, 'Drive on, old fellow! Don't be all day about it!'\r\nand he went on in these words:\r\n\r\n'Yes, we went to school in the sea, though you mayn't believe it--'\r\n\r\n'I never said I didn't!' interrupted Alice.\r\n\r\n'You did,' said the Mock Turtle.\r\n\r\n'Hold your tongue!' added the Gryphon, before Alice could speak again.\r\nThe Mock Turtle went on.\r\n\r\n'We had the best of educations--in fact, we went to school every day--'\r\n\r\n'I'VE been to a day-school, too,' said Alice; 'you needn't be so proud\r\nas all that.'\r\n\r\n'With extras?' asked the Mock Turtle a little anxiously.\r\n\r\n'Yes,' said Alice, 'we learned French and music.'\r\n\r\n'And washing?' said the Mock Turtle.\r\n\r\n'Certainly not!' said Alice indignantly.\r\n\r\n'Ah! then yours wasn't a really good school,' said the Mock Turtle in\r\na tone of great relief. 'Now at OURS they had at the end of the bill,\r\n\"French, music, AND WASHING--extra.\"'\r\n\r\n'You couldn't have wanted it much,' said Alice; 'living at the bottom of\r\nthe sea.'\r\n\r\n'I couldn't afford to learn it.' said the Mock Turtle with a sigh. 'I\r\nonly took the regular course.'\r\n\r\n'What was that?' inquired Alice.\r\n\r\n'Reeling and Writhing, of course, to begin with,' the Mock Turtle\r\nreplied; 'and then the different branches of Arithmetic--Ambition,\r\nDistraction, Uglification, and Derision.'\r\n\r\n'I never heard of \"Uglification,\"' Alice ventured to say. 'What is it?'\r\n\r\nThe Gryphon lifted up both its paws in surprise. 'What! Never heard of\r\nuglifying!' it exclaimed. 'You know what to beautify is, I suppose?'\r\n\r\n'Yes,' said Alice doubtfully: 'it means--to--make--anything--prettier.'\r\n\r\n'Well, then,' the Gryphon went on, 'if you don't know what to uglify is,\r\nyou ARE a simpleton.'\r\n\r\nAlice did not feel encouraged to ask any more questions about it, so she\r\nturned to the Mock Turtle, and said 'What else had you to learn?'\r\n\r\n'Well, there was Mystery,' the Mock Turtle replied, counting off\r\nthe subjects on his flappers, '--Mystery, ancient and modern, with\r\nSeaography: then Drawling--the Drawling-master was an old conger-eel,\r\nthat used to come once a week: HE taught us Drawling, Stretching, and\r\nFainting in Coils.'\r\n\r\n'What was THAT like?' said Alice.\r\n\r\n'Well, I can't show it you myself,' the Mock Turtle said: 'I'm too\r\nstiff. And the Gryphon never learnt it.'\r\n\r\n'Hadn't time,' said the Gryphon: 'I went to the Classics master, though.\r\nHe was an old crab, HE was.'\r\n\r\n'I never went to him,' the Mock Turtle said with a sigh: 'he taught\r\nLaughing and Grief, they used to say.'\r\n\r\n'So he did, so he did,' said the Gryphon, sighing in his turn; and both\r\ncreatures hid their faces in their paws.\r\n\r\n'And how many hours a day did you do lessons?' said Alice, in a hurry to\r\nchange the subject.\r\n\r\n'Ten hours the first day,' said the Mock Turtle: 'nine the next, and so\r\non.'\r\n\r\n'What a curious plan!' exclaimed Alice.\r\n\r\n'That's the reason they're called lessons,' the Gryphon remarked:\r\n'because they lessen from day to day.'\r\n\r\nThis was quite a new idea to Alice, and she thought it over a little\r\nbefore she made her next remark. 'Then the eleventh day must have been a\r\nholiday?'\r\n\r\n'Of course it was,' said the Mock Turtle.\r\n\r\n'And how did you manage on the twelfth?' Alice went on eagerly.\r\n\r\n'That's enough about lessons,' the Gryphon interrupted in a very decided\r\ntone: 'tell her something about the games now.'\r\n\r\n\r\n\r\n\r\nCHAPTER X. The Lobster Quadrille\r\n\r\nThe Mock Turtle sighed deeply, and drew the back of one flapper across\r\nhis eyes. He looked at Alice, and tried to speak, but for a minute or\r\ntwo sobs choked his voice. 'Same as if he had a bone in his throat,'\r\nsaid the Gryphon: and it set to work shaking him and punching him in\r\nthe back. At last the Mock Turtle recovered his voice, and, with tears\r\nrunning down his cheeks, he went on again:--\r\n\r\n'You may not have lived much under the sea--' ('I haven't,' said\r\nAlice)--'and perhaps you were never even introduced to a lobster--'\r\n(Alice began to say 'I once tasted--' but checked herself hastily, and\r\nsaid 'No, never') '--so you can have no idea what a delightful thing a\r\nLobster Quadrille is!'\r\n\r\n'No, indeed,' said Alice. 'What sort of a dance is it?'\r\n\r\n'Why,' said the Gryphon, 'you first form into a line along the\r\nsea-shore--'\r\n\r\n'Two lines!' cried the Mock Turtle. 'Seals, turtles, salmon, and so on;\r\nthen, when you've cleared all the jelly-fish out of the way--'\r\n\r\n'THAT generally takes some time,' interrupted the Gryphon.\r\n\r\n'--you advance twice--'\r\n\r\n'Each with a lobster as a partner!' cried the Gryphon.\r\n\r\n'Of course,' the Mock Turtle said: 'advance twice, set to partners--'\r\n\r\n'--change lobsters, and retire in same order,' continued the Gryphon.\r\n\r\n'Then, you know,' the Mock Turtle went on, 'you throw the--'\r\n\r\n'The lobsters!' shouted the Gryphon, with a bound into the air.\r\n\r\n'--as far out to sea as you can--'\r\n\r\n'Swim after them!' screamed the Gryphon.\r\n\r\n'Turn a somersault in the sea!' cried the Mock Turtle, capering wildly\r\nabout.\r\n\r\n'Change lobsters again!' yelled the Gryphon at the top of its voice.\r\n\r\n'Back to land again, and that's all the first figure,' said the Mock\r\nTurtle, suddenly dropping his voice; and the two creatures, who had been\r\njumping about like mad things all this time, sat down again very sadly\r\nand quietly, and looked at Alice.\r\n\r\n'It must be a very pretty dance,' said Alice timidly.\r\n\r\n'Would you like to see a little of it?' said the Mock Turtle.\r\n\r\n'Very much indeed,' said Alice.\r\n\r\n'Come, let's try the first figure!' said the Mock Turtle to the Gryphon.\r\n'We can do without lobsters, you know. Which shall sing?'\r\n\r\n'Oh, YOU sing,' said the Gryphon. 'I've forgotten the words.'\r\n\r\nSo they began solemnly dancing round and round Alice, every now and\r\nthen treading on her toes when they passed too close, and waving their\r\nforepaws to mark the time, while the Mock Turtle sang this, very slowly\r\nand sadly:--\r\n\r\n '\"Will you walk a little faster?\" said a whiting to a snail.\r\n \"There's a porpoise close behind us, and he's treading on my tail.\r\n\r\n See how eagerly the lobsters and the turtles all advance!\r\n They are waiting on the shingle--will you come and join the dance?\r\n\r\n Will you, won't you, will you, won't you, will you join the dance?\r\n Will you, won't you, will you, won't you, won't you join the dance?\r\n\r\n \"You can really have no notion how delightful it will be\r\n When they take us up and throw us, with the lobsters, out to sea!\"\r\n But the snail replied \"Too far, too far!\" and gave a look askance--\r\n Said he thanked the whiting kindly, but he would not join the dance.\r\n\r\n Would not, could not, would not, could not, would not join the dance.\r\n Would not, could not, would not, could not, could not join the dance.\r\n\r\n '\"What matters it how far we go?\" his scaly friend replied.\r\n \"There is another shore, you know, upon the other side.\r\n The further off from England the nearer is to France--\r\n Then turn not pale, beloved snail, but come and join the dance.\r\n\r\n Will you, won't you, will you, won't you, will you join the dance?\r\n Will you, won't you, will you, won't you, won't you join the dance?\"'\r\n\r\n'Thank you, it's a very interesting dance to watch,' said Alice, feeling\r\nvery glad that it was over at last: 'and I do so like that curious song\r\nabout the whiting!'\r\n\r\n'Oh, as to the whiting,' said the Mock Turtle, 'they--you've seen them,\r\nof course?'\r\n\r\n'Yes,' said Alice, 'I've often seen them at dinn--' she checked herself\r\nhastily.\r\n\r\n'I don't know where Dinn may be,' said the Mock Turtle, 'but if you've\r\nseen them so often, of course you know what they're like.'\r\n\r\n'I believe so,' Alice replied thoughtfully. 'They have their tails in\r\ntheir mouths--and they're all over crumbs.'\r\n\r\n'You're wrong about the crumbs,' said the Mock Turtle: 'crumbs would all\r\nwash off in the sea. But they HAVE their tails in their mouths; and the\r\nreason is--' here the Mock Turtle yawned and shut his eyes.--'Tell her\r\nabout the reason and all that,' he said to the Gryphon.\r\n\r\n'The reason is,' said the Gryphon, 'that they WOULD go with the lobsters\r\nto the dance. So they got thrown out to sea. So they had to fall a long\r\nway. So they got their tails fast in their mouths. So they couldn't get\r\nthem out again. That's all.'\r\n\r\n'Thank you,' said Alice, 'it's very interesting. I never knew so much\r\nabout a whiting before.'\r\n\r\n'I can tell you more than that, if you like,' said the Gryphon. 'Do you\r\nknow why it's called a whiting?'\r\n\r\n'I never thought about it,' said Alice. 'Why?'\r\n\r\n'IT DOES THE BOOTS AND SHOES.' the Gryphon replied very solemnly.\r\n\r\nAlice was thoroughly puzzled. 'Does the boots and shoes!' she repeated\r\nin a wondering tone.\r\n\r\n'Why, what are YOUR shoes done with?' said the Gryphon. 'I mean, what\r\nmakes them so shiny?'\r\n\r\nAlice looked down at them, and considered a little before she gave her\r\nanswer. 'They're done with blacking, I believe.'\r\n\r\n'Boots and shoes under the sea,' the Gryphon went on in a deep voice,\r\n'are done with a whiting. Now you know.'\r\n\r\n'And what are they made of?' Alice asked in a tone of great curiosity.\r\n\r\n'Soles and eels, of course,' the Gryphon replied rather impatiently:\r\n'any shrimp could have told you that.'\r\n\r\n'If I'd been the whiting,' said Alice, whose thoughts were still running\r\non the song, 'I'd have said to the porpoise, \"Keep back, please: we\r\ndon't want YOU with us!\"'\r\n\r\n'They were obliged to have him with them,' the Mock Turtle said: 'no\r\nwise fish would go anywhere without a porpoise.'\r\n\r\n'Wouldn't it really?' said Alice in a tone of great surprise.\r\n\r\n'Of course not,' said the Mock Turtle: 'why, if a fish came to ME, and\r\ntold me he was going a journey, I should say \"With what porpoise?\"'\r\n\r\n'Don't you mean \"purpose\"?' said Alice.\r\n\r\n'I mean what I say,' the Mock Turtle replied in an offended tone. And\r\nthe Gryphon added 'Come, let's hear some of YOUR adventures.'\r\n\r\n'I could tell you my adventures--beginning from this morning,' said\r\nAlice a little timidly: 'but it's no use going back to yesterday,\r\nbecause I was a different person then.'\r\n\r\n'Explain all that,' said the Mock Turtle.\r\n\r\n'No, no! The adventures first,' said the Gryphon in an impatient tone:\r\n'explanations take such a dreadful time.'\r\n\r\nSo Alice began telling them her adventures from the time when she first\r\nsaw the White Rabbit. She was a little nervous about it just at first,\r\nthe two creatures got so close to her, one on each side, and opened\r\ntheir eyes and mouths so VERY wide, but she gained courage as she went\r\non. Her listeners were perfectly quiet till she got to the part about\r\nher repeating 'YOU ARE OLD, FATHER WILLIAM,' to the Caterpillar, and the\r\nwords all coming different, and then the Mock Turtle drew a long breath,\r\nand said 'That's very curious.'\r\n\r\n'It's all about as curious as it can be,' said the Gryphon.\r\n\r\n'It all came different!' the Mock Turtle repeated thoughtfully. 'I\r\nshould like to hear her try and repeat something now. Tell her to\r\nbegin.' He looked at the Gryphon as if he thought it had some kind of\r\nauthority over Alice.\r\n\r\n'Stand up and repeat \"'TIS THE VOICE OF THE SLUGGARD,\"' said the\r\nGryphon.\r\n\r\n'How the creatures order one about, and make one repeat lessons!'\r\nthought Alice; 'I might as well be at school at once.' However, she\r\ngot up, and began to repeat it, but her head was so full of the Lobster\r\nQuadrille, that she hardly knew what she was saying, and the words came\r\nvery queer indeed:--\r\n\r\n  ''Tis the voice of the Lobster; I heard him declare,\r\n  \"You have baked me too brown, I must sugar my hair.\"\r\n  As a duck with its eyelids, so he with his nose\r\n  Trims his belt and his buttons, and turns out his toes.'\r\n\r\n       [later editions continued as follows\r\n  When the sands are all dry, he is gay as a lark,\r\n  And will talk in contemptuous tones of the Shark,\r\n  But, when the tide rises and sharks are around,\r\n  His voice has a timid and tremulous sound.]\r\n\r\n'That's different from what I used to say when I was a child,' said the\r\nGryphon.\r\n\r\n'Well, I never heard it before,' said the Mock Turtle; 'but it sounds\r\nuncommon nonsense.'\r\n\r\nAlice said nothing; she had sat down with her face in her hands,\r\nwondering if anything would EVER happen in a natural way again.\r\n\r\n'I should like to have it explained,' said the Mock Turtle.\r\n\r\n'She can't explain it,' said the Gryphon hastily. 'Go on with the next\r\nverse.'\r\n\r\n'But about his toes?' the Mock Turtle persisted. 'How COULD he turn them\r\nout with his nose, you know?'\r\n\r\n'It's the first position in dancing.' Alice said; but was dreadfully\r\npuzzled by the whole thing, and longed to change the subject.\r\n\r\n'Go on with the next verse,' the Gryphon repeated impatiently: 'it\r\nbegins \"I passed by his garden.\"'\r\n\r\nAlice did not dare to disobey, though she felt sure it would all come\r\nwrong, and she went on in a trembling voice:--\r\n\r\n  'I passed by his garden, and marked, with one eye,\r\n  How the Owl and the Panther were sharing a pie--'\r\n\r\n    [later editions continued as follows\r\n  The Panther took pie-crust, and gravy, and meat,\r\n  While the Owl had the dish as its share of the treat.\r\n  When the pie was all finished, the Owl, as a boon,\r\n  Was kindly permitted to pocket the spoon:\r\n  While the Panther received knife and fork with a growl,\r\n  And concluded the banquet--]\r\n\r\n'What IS the use of repeating all that stuff,' the Mock Turtle\r\ninterrupted, 'if you don't explain it as you go on? It's by far the most\r\nconfusing thing I ever heard!'\r\n\r\n'Yes, I think you'd better leave off,' said the Gryphon: and Alice was\r\nonly too glad to do so.\r\n\r\n'Shall we try another figure of the Lobster Quadrille?' the Gryphon went\r\non. 'Or would you like the Mock Turtle to sing you a song?'\r\n\r\n'Oh, a song, please, if the Mock Turtle would be so kind,' Alice\r\nreplied, so eagerly that the Gryphon said, in a rather offended tone,\r\n'Hm! No accounting for tastes! Sing her \"Turtle Soup,\" will you, old\r\nfellow?'\r\n\r\nThe Mock Turtle sighed deeply, and began, in a voice sometimes choked\r\nwith sobs, to sing this:--\r\n\r\n   'Beautiful Soup, so rich and green,\r\n   Waiting in a hot tureen!\r\n   Who for such dainties would not stoop?\r\n   Soup of the evening, beautiful Soup!\r\n   Soup of the evening, beautiful Soup!\r\n     Beau--ootiful Soo--oop!\r\n     Beau--ootiful Soo--oop!\r\n   Soo--oop of the e--e--evening,\r\n     Beautiful, beautiful Soup!\r\n\r\n   'Beautiful Soup! Who cares for fish,\r\n   Game, or any other dish?\r\n   Who would not give all else for two\r\n   Pennyworth only of beautiful Soup?\r\n   Pennyworth only of beautiful Soup?\r\n     Beau--ootiful Soo--oop!\r\n     Beau--ootiful Soo--oop!\r\n   Soo--oop of the e--e--evening,\r\n     Beautiful, beauti--FUL SOUP!'\r\n\r\n'Chorus again!' cried the Gryphon, and the Mock Turtle had just begun\r\nto repeat it, when a cry of 'The trial's beginning!' was heard in the\r\ndistance.\r\n\r\n'Come on!' cried the Gryphon, and, taking Alice by the hand, it hurried\r\noff, without waiting for the end of the song.\r\n\r\n'What trial is it?' Alice panted as she ran; but the Gryphon only\r\nanswered 'Come on!' and ran the faster, while more and more faintly\r\ncame, carried on the breeze that followed them, the melancholy words:--\r\n\r\n   'Soo--oop of the e--e--evening,\r\n     Beautiful, beautiful Soup!'\r\n\r\n\r\n\r\n\r\nCHAPTER XI. Who Stole the Tarts?\r\n\r\nThe King and Queen of Hearts were seated on their throne when they\r\narrived, with a great crowd assembled about them--all sorts of little\r\nbirds and beasts, as well as the whole pack of cards: the Knave was\r\nstanding before them, in chains, with a soldier on each side to guard\r\nhim; and near the King was the White Rabbit, with a trumpet in one hand,\r\nand a scroll of parchment in the other. In the very middle of the court\r\nwas a table, with a large dish of tarts upon it: they looked so good,\r\nthat it made Alice quite hungry to look at them--'I wish they'd get the\r\ntrial done,' she thought, 'and hand round the refreshments!' But there\r\nseemed to be no chance of this, so she began looking at everything about\r\nher, to pass away the time.\r\n\r\nAlice had never been in a court of justice before, but she had read\r\nabout them in books, and she was quite pleased to find that she knew\r\nthe name of nearly everything there. 'That's the judge,' she said to\r\nherself, 'because of his great wig.'\r\n\r\nThe judge, by the way, was the King; and as he wore his crown over the\r\nwig, (look at the frontispiece if you want to see how he did it,) he did\r\nnot look at all comfortable, and it was certainly not becoming.\r\n\r\n'And that's the jury-box,' thought Alice, 'and those twelve creatures,'\r\n(she was obliged to say 'creatures,' you see, because some of them were\r\nanimals, and some were birds,) 'I suppose they are the jurors.' She said\r\nthis last word two or three times over to herself, being rather proud of\r\nit: for she thought, and rightly too, that very few little girls of her\r\nage knew the meaning of it at all. However, 'jury-men' would have done\r\njust as well.\r\n\r\nThe twelve jurors were all writing very busily on slates. 'What are they\r\ndoing?' Alice whispered to the Gryphon. 'They can't have anything to put\r\ndown yet, before the trial's begun.'\r\n\r\n'They're putting down their names,' the Gryphon whispered in reply, 'for\r\nfear they should forget them before the end of the trial.'\r\n\r\n'Stupid things!' Alice began in a loud, indignant voice, but she stopped\r\nhastily, for the White Rabbit cried out, 'Silence in the court!' and the\r\nKing put on his spectacles and looked anxiously round, to make out who\r\nwas talking.\r\n\r\nAlice could see, as well as if she were looking over their shoulders,\r\nthat all the jurors were writing down 'stupid things!' on their slates,\r\nand she could even make out that one of them didn't know how to spell\r\n'stupid,' and that he had to ask his neighbour to tell him. 'A nice\r\nmuddle their slates'll be in before the trial's over!' thought Alice.\r\n\r\nOne of the jurors had a pencil that squeaked. This of course, Alice\r\ncould not stand, and she went round the court and got behind him, and\r\nvery soon found an opportunity of taking it away. She did it so quickly\r\nthat the poor little juror (it was Bill, the Lizard) could not make out\r\nat all what had become of it; so, after hunting all about for it, he was\r\nobliged to write with one finger for the rest of the day; and this was\r\nof very little use, as it left no mark on the slate.\r\n\r\n'Herald, read the accusation!' said the King.\r\n\r\nOn this the White Rabbit blew three blasts on the trumpet, and then\r\nunrolled the parchment scroll, and read as follows:--\r\n\r\n   'The Queen of Hearts, she made some tarts,\r\n      All on a summer day:\r\n    The Knave of Hearts, he stole those tarts,\r\n      And took them quite away!'\r\n\r\n'Consider your verdict,' the King said to the jury.\r\n\r\n'Not yet, not yet!' the Rabbit hastily interrupted. 'There's a great\r\ndeal to come before that!'\r\n\r\n'Call the first witness,' said the King; and the White Rabbit blew three\r\nblasts on the trumpet, and called out, 'First witness!'\r\n\r\nThe first witness was the Hatter. He came in with a teacup in one\r\nhand and a piece of bread-and-butter in the other. 'I beg pardon, your\r\nMajesty,' he began, 'for bringing these in: but I hadn't quite finished\r\nmy tea when I was sent for.'\r\n\r\n'You ought to have finished,' said the King. 'When did you begin?'\r\n\r\nThe Hatter looked at the March Hare, who had followed him into the\r\ncourt, arm-in-arm with the Dormouse. 'Fourteenth of March, I think it\r\nwas,' he said.\r\n\r\n'Fifteenth,' said the March Hare.\r\n\r\n'Sixteenth,' added the Dormouse.\r\n\r\n'Write that down,' the King said to the jury, and the jury eagerly\r\nwrote down all three dates on their slates, and then added them up, and\r\nreduced the answer to shillings and pence.\r\n\r\n'Take off your hat,' the King said to the Hatter.\r\n\r\n'It isn't mine,' said the Hatter.\r\n\r\n'Stolen!' the King exclaimed, turning to the jury, who instantly made a\r\nmemorandum of the fact.\r\n\r\n'I keep them to sell,' the Hatter added as an explanation; 'I've none of\r\nmy own. I'm a hatter.'\r\n\r\nHere the Queen put on her spectacles, and began staring at the Hatter,\r\nwho turned pale and fidgeted.\r\n\r\n'Give your evidence,' said the King; 'and don't be nervous, or I'll have\r\nyou executed on the spot.'\r\n\r\nThis did not seem to encourage the witness at all: he kept shifting\r\nfrom one foot to the other, looking uneasily at the Queen, and in\r\nhis confusion he bit a large piece out of his teacup instead of the\r\nbread-and-butter.\r\n\r\nJust at this moment Alice felt a very curious sensation, which puzzled\r\nher a good deal until she made out what it was: she was beginning to\r\ngrow larger again, and she thought at first she would get up and leave\r\nthe court; but on second thoughts she decided to remain where she was as\r\nlong as there was room for her.\r\n\r\n'I wish you wouldn't squeeze so.' said the Dormouse, who was sitting\r\nnext to her. 'I can hardly breathe.'\r\n\r\n'I can't help it,' said Alice very meekly: 'I'm growing.'\r\n\r\n'You've no right to grow here,' said the Dormouse.\r\n\r\n'Don't talk nonsense,' said Alice more boldly: 'you know you're growing\r\ntoo.'\r\n\r\n'Yes, but I grow at a reasonable pace,' said the Dormouse: 'not in that\r\nridiculous fashion.' And he got up very sulkily and crossed over to the\r\nother side of the court.\r\n\r\nAll this time the Queen had never left off staring at the Hatter, and,\r\njust as the Dormouse crossed the court, she said to one of the officers\r\nof the court, 'Bring me the list of the singers in the last concert!' on\r\nwhich the wretched Hatter trembled so, that he shook both his shoes off.\r\n\r\n'Give your evidence,' the King repeated angrily, 'or I'll have you\r\nexecuted, whether you're nervous or not.'\r\n\r\n'I'm a poor man, your Majesty,' the Hatter began, in a trembling voice,\r\n'--and I hadn't begun my tea--not above a week or so--and what with the\r\nbread-and-butter getting so thin--and the twinkling of the tea--'\r\n\r\n'The twinkling of the what?' said the King.\r\n\r\n'It began with the tea,' the Hatter replied.\r\n\r\n'Of course twinkling begins with a T!' said the King sharply. 'Do you\r\ntake me for a dunce? Go on!'\r\n\r\n'I'm a poor man,' the Hatter went on, 'and most things twinkled after\r\nthat--only the March Hare said--'\r\n\r\n'I didn't!' the March Hare interrupted in a great hurry.\r\n\r\n'You did!' said the Hatter.\r\n\r\n'I deny it!' said the March Hare.\r\n\r\n'He denies it,' said the King: 'leave out that part.'\r\n\r\n'Well, at any rate, the Dormouse said--' the Hatter went on, looking\r\nanxiously round to see if he would deny it too: but the Dormouse denied\r\nnothing, being fast asleep.\r\n\r\n'After that,' continued the Hatter, 'I cut some more bread-and-butter--'\r\n\r\n'But what did the Dormouse say?' one of the jury asked.\r\n\r\n'That I can't remember,' said the Hatter.\r\n\r\n'You MUST remember,' remarked the King, 'or I'll have you executed.'\r\n\r\nThe miserable Hatter dropped his teacup and bread-and-butter, and went\r\ndown on one knee. 'I'm a poor man, your Majesty,' he began.\r\n\r\n'You're a very poor speaker,' said the King.\r\n\r\nHere one of the guinea-pigs cheered, and was immediately suppressed by\r\nthe officers of the court. (As that is rather a hard word, I will just\r\nexplain to you how it was done. They had a large canvas bag, which tied\r\nup at the mouth with strings: into this they slipped the guinea-pig,\r\nhead first, and then sat upon it.)\r\n\r\n'I'm glad I've seen that done,' thought Alice. 'I've so often read\r\nin the newspapers, at the end of trials, \"There was some attempts\r\nat applause, which was immediately suppressed by the officers of the\r\ncourt,\" and I never understood what it meant till now.'\r\n\r\n'If that's all you know about it, you may stand down,' continued the\r\nKing.\r\n\r\n'I can't go no lower,' said the Hatter: 'I'm on the floor, as it is.'\r\n\r\n'Then you may SIT down,' the King replied.\r\n\r\nHere the other guinea-pig cheered, and was suppressed.\r\n\r\n'Come, that finished the guinea-pigs!' thought Alice. 'Now we shall get\r\non better.'\r\n\r\n'I'd rather finish my tea,' said the Hatter, with an anxious look at the\r\nQueen, who was reading the list of singers.\r\n\r\n'You may go,' said the King, and the Hatter hurriedly left the court,\r\nwithout even waiting to put his shoes on.\r\n\r\n'--and just take his head off outside,' the Queen added to one of the\r\nofficers: but the Hatter was out of sight before the officer could get\r\nto the door.\r\n\r\n'Call the next witness!' said the King.\r\n\r\nThe next witness was the Duchess's cook. She carried the pepper-box in\r\nher hand, and Alice guessed who it was, even before she got into the\r\ncourt, by the way the people near the door began sneezing all at once.\r\n\r\n'Give your evidence,' said the King.\r\n\r\n'Shan't,' said the cook.\r\n\r\nThe King looked anxiously at the White Rabbit, who said in a low voice,\r\n'Your Majesty must cross-examine THIS witness.'\r\n\r\n'Well, if I must, I must,' the King said, with a melancholy air, and,\r\nafter folding his arms and frowning at the cook till his eyes were\r\nnearly out of sight, he said in a deep voice, 'What are tarts made of?'\r\n\r\n'Pepper, mostly,' said the cook.\r\n\r\n'Treacle,' said a sleepy voice behind her.\r\n\r\n'Collar that Dormouse,' the Queen shrieked out. 'Behead that Dormouse!\r\nTurn that Dormouse out of court! Suppress him! Pinch him! Off with his\r\nwhiskers!'\r\n\r\nFor some minutes the whole court was in confusion, getting the Dormouse\r\nturned out, and, by the time they had settled down again, the cook had\r\ndisappeared.\r\n\r\n'Never mind!' said the King, with an air of great relief. 'Call the next\r\nwitness.' And he added in an undertone to the Queen, 'Really, my dear,\r\nYOU must cross-examine the next witness. It quite makes my forehead\r\nache!'\r\n\r\nAlice watched the White Rabbit as he fumbled over the list, feeling very\r\ncurious to see what the next witness would be like, '--for they haven't\r\ngot much evidence YET,' she said to herself. Imagine her surprise, when\r\nthe White Rabbit read out, at the top of his shrill little voice, the\r\nname 'Alice!'\r\n\r\n\r\n\r\n             CHAPTER XII\r\n\r\n           Alice's Evidence\r\n\r\n\r\n'Here!' cried Alice, quite forgetting in the flurry of the moment how\r\nlarge she had grown in the last few minutes, and she jumped up in such\r\na hurry that she tipped over the jury-box with the edge of her skirt,\r\nupsetting all the jurymen on to the heads of the crowd below, and there\r\nthey lay sprawling about, reminding her very much of a globe of goldfish\r\nshe had accidentally upset the week before.\r\n\r\n'Oh, I BEG your pardon!' she exclaimed in a tone of great dismay, and\r\nbegan picking them up again as quickly as she could, for the accident of\r\nthe goldfish kept running in her head, and she had a vague sort of idea\r\nthat they must be collected at once and put back into the jury-box, or\r\nthey would die.\r\n\r\n'The trial cannot proceed,' said the King in a very grave voice, 'until\r\nall the jurymen are back in their proper places--ALL,' he repeated with\r\ngreat emphasis, looking hard at Alice as he said do.\r\n\r\nAlice looked at the jury-box, and saw that, in her haste, she had put\r\nthe Lizard in head downwards, and the poor little thing was waving its\r\ntail about in a melancholy way, being quite unable to move. She soon got\r\nit out again, and put it right; 'not that it signifies much,' she said\r\nto herself; 'I should think it would be QUITE as much use in the trial\r\none way up as the other.'\r\n\r\nAs soon as the jury had a little recovered from the shock of being\r\nupset, and their slates and pencils had been found and handed back to\r\nthem, they set to work very diligently to write out a history of the\r\naccident, all except the Lizard, who seemed too much overcome to do\r\nanything but sit with its mouth open, gazing up into the roof of the\r\ncourt.\r\n\r\n'What do you know about this business?' the King said to Alice.\r\n\r\n'Nothing,' said Alice.\r\n\r\n'Nothing WHATEVER?' persisted the King.\r\n\r\n'Nothing whatever,' said Alice.\r\n\r\n'That's very important,' the King said, turning to the jury. They were\r\njust beginning to write this down on their slates, when the White Rabbit\r\ninterrupted: 'UNimportant, your Majesty means, of course,' he said in a\r\nvery respectful tone, but frowning and making faces at him as he spoke.\r\n\r\n'UNimportant, of course, I meant,' the King hastily said, and went on\r\nto himself in an undertone,\r\n\r\n'important--unimportant--unimportant--important--' as if he were trying\r\nwhich word sounded best.\r\n\r\nSome of the jury wrote it down 'important,' and some 'unimportant.'\r\nAlice could see this, as she was near enough to look over their slates;\r\n'but it doesn't matter a bit,' she thought to herself.\r\n\r\nAt this moment the King, who had been for some time busily writing in\r\nhis note-book, cackled out 'Silence!' and read out from his book, 'Rule\r\nForty-two. ALL PERSONS MORE THAN A MILE HIGH TO LEAVE THE COURT.'\r\n\r\nEverybody looked at Alice.\r\n\r\n'I'M not a mile high,' said Alice.\r\n\r\n'You are,' said the King.\r\n\r\n'Nearly two miles high,' added the Queen.\r\n\r\n'Well, I shan't go, at any rate,' said Alice: 'besides, that's not a\r\nregular rule: you invented it just now.'\r\n\r\n'It's the oldest rule in the book,' said the King.\r\n\r\n'Then it ought to be Number One,' said Alice.\r\n\r\nThe King turned pale, and shut his note-book hastily. 'Consider your\r\nverdict,' he said to the jury, in a low, trembling voice.\r\n\r\n'There's more evidence to come yet, please your Majesty,' said the White\r\nRabbit, jumping up in a great hurry; 'this paper has just been picked\r\nup.'\r\n\r\n'What's in it?' said the Queen.\r\n\r\n'I haven't opened it yet,' said the White Rabbit, 'but it seems to be a\r\nletter, written by the prisoner to--to somebody.'\r\n\r\n'It must have been that,' said the King, 'unless it was written to\r\nnobody, which isn't usual, you know.'\r\n\r\n'Who is it directed to?' said one of the jurymen.\r\n\r\n'It isn't directed at all,' said the White Rabbit; 'in fact, there's\r\nnothing written on the OUTSIDE.' He unfolded the paper as he spoke, and\r\nadded 'It isn't a letter, after all: it's a set of verses.'\r\n\r\n'Are they in the prisoner's handwriting?' asked another of the jurymen.\r\n\r\n'No, they're not,' said the White Rabbit, 'and that's the queerest thing\r\nabout it.' (The jury all looked puzzled.)\r\n\r\n'He must have imitated somebody else's hand,' said the King. (The jury\r\nall brightened up again.)\r\n\r\n'Please your Majesty,' said the Knave, 'I didn't write it, and they\r\ncan't prove I did: there's no name signed at the end.'\r\n\r\n'If you didn't sign it,' said the King, 'that only makes the matter\r\nworse. You MUST have meant some mischief, or else you'd have signed your\r\nname like an honest man.'\r\n\r\nThere was a general clapping of hands at this: it was the first really\r\nclever thing the King had said that day.\r\n\r\n'That PROVES his guilt,' said the Queen.\r\n\r\n'It proves nothing of the sort!' said Alice. 'Why, you don't even know\r\nwhat they're about!'\r\n\r\n'Read them,' said the King.\r\n\r\nThe White Rabbit put on his spectacles. 'Where shall I begin, please\r\nyour Majesty?' he asked.\r\n\r\n'Begin at the beginning,' the King said gravely, 'and go on till you\r\ncome to the end: then stop.'\r\n\r\nThese were the verses the White Rabbit read:--\r\n\r\n   'They told me you had been to her,\r\n    And mentioned me to him:\r\n   She gave me a good character,\r\n    But said I could not swim.\r\n\r\n   He sent them word I had not gone\r\n    (We know it to be true):\r\n   If she should push the matter on,\r\n    What would become of you?\r\n\r\n   I gave her one, they gave him two,\r\n    You gave us three or more;\r\n   They all returned from him to you,\r\n    Though they were mine before.\r\n\r\n   If I or she should chance to be\r\n    Involved in this affair,\r\n   He trusts to you to set them free,\r\n    Exactly as we were.\r\n\r\n   My notion was that you had been\r\n    (Before she had this fit)\r\n   An obstacle that came between\r\n    Him, and ourselves, and it.\r\n\r\n   Don't let him know she liked them best,\r\n    For this must ever be\r\n   A secret, kept from all the rest,\r\n    Between yourself and me.'\r\n\r\n'That's the most important piece of evidence we've heard yet,' said the\r\nKing, rubbing his hands; 'so now let the jury--'\r\n\r\n'If any one of them can explain it,' said Alice, (she had grown so large\r\nin the last few minutes that she wasn't a bit afraid of interrupting\r\nhim,) 'I'll give him sixpence. _I_ don't believe there's an atom of\r\nmeaning in it.'\r\n\r\nThe jury all wrote down on their slates, 'SHE doesn't believe there's an\r\natom of meaning in it,' but none of them attempted to explain the paper.\r\n\r\n'If there's no meaning in it,' said the King, 'that saves a world of\r\ntrouble, you know, as we needn't try to find any. And yet I don't know,'\r\nhe went on, spreading out the verses on his knee, and looking at them\r\nwith one eye; 'I seem to see some meaning in them, after all. \"--SAID\r\nI COULD NOT SWIM--\" you can't swim, can you?' he added, turning to the\r\nKnave.\r\n\r\nThe Knave shook his head sadly. 'Do I look like it?' he said. (Which he\r\ncertainly did NOT, being made entirely of cardboard.)\r\n\r\n'All right, so far,' said the King, and he went on muttering over\r\nthe verses to himself: '\"WE KNOW IT TO BE TRUE--\" that's the jury, of\r\ncourse--\"I GAVE HER ONE, THEY GAVE HIM TWO--\" why, that must be what he\r\ndid with the tarts, you know--'\r\n\r\n'But, it goes on \"THEY ALL RETURNED FROM HIM TO YOU,\"' said Alice.\r\n\r\n'Why, there they are!' said the King triumphantly, pointing to the tarts\r\non the table. 'Nothing can be clearer than THAT. Then again--\"BEFORE SHE\r\nHAD THIS FIT--\" you never had fits, my dear, I think?' he said to the\r\nQueen.\r\n\r\n'Never!' said the Queen furiously, throwing an inkstand at the Lizard\r\nas she spoke. (The unfortunate little Bill had left off writing on his\r\nslate with one finger, as he found it made no mark; but he now hastily\r\nbegan again, using the ink, that was trickling down his face, as long as\r\nit lasted.)\r\n\r\n'Then the words don't FIT you,' said the King, looking round the court\r\nwith a smile. There was a dead silence.\r\n\r\n'It's a pun!' the King added in an offended tone, and everybody laughed,\r\n'Let the jury consider their verdict,' the King said, for about the\r\ntwentieth time that day.\r\n\r\n'No, no!' said the Queen. 'Sentence first--verdict afterwards.'\r\n\r\n'Stuff and nonsense!' said Alice loudly. 'The idea of having the\r\nsentence first!'\r\n\r\n'Hold your tongue!' said the Queen, turning purple.\r\n\r\n'I won't!' said Alice.\r\n\r\n'Off with her head!' the Queen shouted at the top of her voice. Nobody\r\nmoved.\r\n\r\n'Who cares for you?' said Alice, (she had grown to her full size by this\r\ntime.) 'You're nothing but a pack of cards!'\r\n\r\nAt this the whole pack rose up into the air, and came flying down upon\r\nher: she gave a little scream, half of fright and half of anger, and\r\ntried to beat them off, and found herself lying on the bank, with her\r\nhead in the lap of her sister, who was gently brushing away some dead\r\nleaves that had fluttered down from the trees upon her face.\r\n\r\n'Wake up, Alice dear!' said her sister; 'Why, what a long sleep you've\r\nhad!'\r\n\r\n'Oh, I've had such a curious dream!' said Alice, and she told her\r\nsister, as well as she could remember them, all these strange Adventures\r\nof hers that you have just been reading about; and when she had\r\nfinished, her sister kissed her, and said, 'It WAS a curious dream,\r\ndear, certainly: but now run in to your tea; it's getting late.' So\r\nAlice got up and ran off, thinking while she ran, as well she might,\r\nwhat a wonderful dream it had been.\r\n\r\nBut her sister sat still just as she left her, leaning her head on her\r\nhand, watching the setting sun, and thinking of little Alice and all her\r\nwonderful Adventures, till she too began dreaming after a fashion, and\r\nthis was her dream:--\r\n\r\nFirst, she dreamed of little Alice herself, and once again the tiny\r\nhands were clasped upon her knee, and the bright eager eyes were looking\r\nup into hers--she could hear the very tones of her voice, and see that\r\nqueer little toss of her head to keep back the wandering hair that\r\nWOULD always get into her eyes--and still as she listened, or seemed to\r\nlisten, the whole place around her became alive the strange creatures of\r\nher little sister's dream.\r\n\r\nThe long grass rustled at her feet as the White Rabbit hurried by--the\r\nfrightened Mouse splashed his way through the neighbouring pool--she\r\ncould hear the rattle of the teacups as the March Hare and his friends\r\nshared their never-ending meal, and the shrill voice of the Queen\r\nordering off her unfortunate guests to execution--once more the pig-baby\r\nwas sneezing on the Duchess's knee, while plates and dishes crashed\r\naround it--once more the shriek of the Gryphon, the squeaking of the\r\nLizard's slate-pencil, and the choking of the suppressed guinea-pigs,\r\nfilled the air, mixed up with the distant sobs of the miserable Mock\r\nTurtle.\r\n\r\nSo she sat on, with closed eyes, and half believed herself in\r\nWonderland, though she knew she had but to open them again, and all\r\nwould change to dull reality--the grass would be only rustling in the\r\nwind, and the pool rippling to the waving of the reeds--the rattling\r\nteacups would change to tinkling sheep-bells, and the Queen's shrill\r\ncries to the voice of the shepherd boy--and the sneeze of the baby, the\r\nshriek of the Gryphon, and all the other queer noises, would change (she\r\nknew) to the confused clamour of the busy farm-yard--while the lowing\r\nof the cattle in the distance would take the place of the Mock Turtle's\r\nheavy sobs.\r\n\r\nLastly, she pictured to herself how this same little sister of hers\r\nwould, in the after-time, be herself a grown woman; and how she would\r\nkeep, through all her riper years, the simple and loving heart of her\r\nchildhood: and how she would gather about her other little children, and\r\nmake THEIR eyes bright and eager with many a strange tale, perhaps even\r\nwith the dream of Wonderland of long ago: and how she would feel with\r\nall their simple sorrows, and find a pleasure in all their simple joys,\r\nremembering her own child-life, and the happy summer days.\r\n\r\n              THE END\r\n\r\n\r\n\r\n\r\n\r\nEnd of Project Gutenberg's Alice's Adventures in Wonderland, by Lewis Carroll\r\n\r\n*** END OF THIS PROJECT GUTENBERG EBOOK ALICE'S ADVENTURES IN WONDERLAND ***\r\n\r\n***** This file should be named 11.txt or 11.zip *****\r\nThis and all associated files of various formats will be found in:\r\n        http://www.gutenberg.org/1/11/\r\n\r\n\r\n\r\nUpdated editions will replace the previous one--the old editions\r\nwill be renamed.\r\n\r\nCreating the works from public domain print editions means that no\r\none owns a United States copyright in these works, so the Foundation\r\n(and you!) can copy and distribute it in the United States without\r\npermission and without paying copyright royalties.  Special rules,\r\nset forth in the General Terms of Use part of this license, apply to\r\ncopying and distributing Project Gutenberg-tm electronic works to\r\nprotect the PROJECT GUTENBERG-tm concept and trademark.  Project\r\nGutenberg is a registered trademark, and may not be used if you\r\ncharge for the eBooks, unless you receive specific permission.  If you\r\ndo not charge anything for copies of this eBook, complying with the\r\nrules is very easy.  You may use this eBook for nearly any purpose\r\nsuch as creation of derivative works, reports, performances and\r\nresearch.  They may be modified and printed and given away--you may do\r\npractically ANYTHING with public domain eBooks.  Redistribution is\r\nsubject to the trademark license, especially commercial\r\nredistribution.\r\n\r\n\r\n\r\n*** START: FULL LICENSE ***\r\n\r\nTHE FULL PROJECT GUTENBERG LICENSE\r\nPLEASE READ THIS BEFORE YOU DISTRIBUTE OR USE THIS WORK\r\n\r\nTo protect the Project Gutenberg-tm mission of promoting the free\r\ndistribution of electronic works, by using or distributing this work\r\n(or any other work associated in any way with the phrase \"Project\r\nGutenberg\"), you agree to comply with all the terms of the Full Project\r\nGutenberg-tm License (available with this file or online at\r\nhttp://gutenberg.org/license).\r\n\r\n\r\nSection 1.  General Terms of Use and Redistributing Project Gutenberg-tm\r\nelectronic works\r\n\r\n1.A.  By reading or using any part of this Project Gutenberg-tm\r\nelectronic work, you indicate that you have read, understand, agree to\r\nand accept all the terms of this license and intellectual property\r\n(trademark/copyright) agreement.  If you do not agree to abide by all\r\nthe terms of this agreement, you must cease using and return or destroy\r\nall copies of Project Gutenberg-tm electronic works in your possession.\r\nIf you paid a fee for obtaining a copy of or access to a Project\r\nGutenberg-tm electronic work and you do not agree to be bound by the\r\nterms of this agreement, you may obtain a refund from the person or\r\nentity to whom you paid the fee as set forth in paragraph 1.E.8.\r\n\r\n1.B.  \"Project Gutenberg\" is a registered trademark.  It may only be\r\nused on or associated in any way with an electronic work by people who\r\nagree to be bound by the terms of this agreement.  There are a few\r\nthings that you can do with most Project Gutenberg-tm electronic works\r\neven without complying with the full terms of this agreement.  See\r\nparagraph 1.C below.  There are a lot of things you can do with Project\r\nGutenberg-tm electronic works if you follow the terms of this agreement\r\nand help preserve free future access to Project Gutenberg-tm electronic\r\nworks.  See paragraph 1.E below.\r\n\r\n1.C.  The Project Gutenberg Literary Archive Foundation (\"the Foundation\"\r\nor PGLAF), owns a compilation copyright in the collection of Project\r\nGutenberg-tm electronic works.  Nearly all the individual works in the\r\ncollection are in the public domain in the United States.  If an\r\nindividual work is in the public domain in the United States and you are\r\nlocated in the United States, we do not claim a right to prevent you from\r\ncopying, distributing, performing, displaying or creating derivative\r\nworks based on the work as long as all references to Project Gutenberg\r\nare removed.  Of course, we hope that you will support the Project\r\nGutenberg-tm mission of promoting free access to electronic works by\r\nfreely sharing Project Gutenberg-tm works in compliance with the terms of\r\nthis agreement for keeping the Project Gutenberg-tm name associated with\r\nthe work.  You can easily comply with the terms of this agreement by\r\nkeeping this work in the same format with its attached full Project\r\nGutenberg-tm License when you share it without charge with others.\r\n\r\n1.D.  The copyright laws of the place where you are located also govern\r\nwhat you can do with this work.  Copyright laws in most countries are in\r\na constant state of change.  If you are outside the United States, check\r\nthe laws of your country in addition to the terms of this agreement\r\nbefore downloading, copying, displaying, performing, distributing or\r\ncreating derivative works based on this work or any other Project\r\nGutenberg-tm work.  The Foundation makes no representations concerning\r\nthe copyright status of any work in any country outside the United\r\nStates.\r\n\r\n1.E.  Unless you have removed all references to Project Gutenberg:\r\n\r\n1.E.1.  The following sentence, with active links to, or other immediate\r\naccess to, the full Project Gutenberg-tm License must appear prominently\r\nwhenever any copy of a Project Gutenberg-tm work (any work on which the\r\nphrase \"Project Gutenberg\" appears, or with which the phrase \"Project\r\nGutenberg\" is associated) is accessed, displayed, performed, viewed,\r\ncopied or distributed:\r\n\r\nThis eBook is for the use of anyone anywhere at no cost and with\r\nalmost no restrictions whatsoever.  You may copy it, give it away or\r\nre-use it under the terms of the Project Gutenberg License included\r\nwith this eBook or online at www.gutenberg.org\r\n\r\n1.E.2.  If an individual Project Gutenberg-tm electronic work is derived\r\nfrom the public domain (does not contain a notice indicating that it is\r\nposted with permission of the copyright holder), the work can be copied\r\nand distributed to anyone in the United States without paying any fees\r\nor charges.  If you are redistributing or providing access to a work\r\nwith the phrase \"Project Gutenberg\" associated with or appearing on the\r\nwork, you must comply either with the requirements of paragraphs 1.E.1\r\nthrough 1.E.7 or obtain permission for the use of the work and the\r\nProject Gutenberg-tm trademark as set forth in paragraphs 1.E.8 or\r\n1.E.9.\r\n\r\n1.E.3.  If an individual Project Gutenberg-tm electronic work is posted\r\nwith the permission of the copyright holder, your use and distribution\r\nmust comply with both paragraphs 1.E.1 through 1.E.7 and any additional\r\nterms imposed by the copyright holder.  Additional terms will be linked\r\nto the Project Gutenberg-tm License for all works posted with the\r\npermission of the copyright holder found at the beginning of this work.\r\n\r\n1.E.4.  Do not unlink or detach or remove the full Project Gutenberg-tm\r\nLicense terms from this work, or any files containing a part of this\r\nwork or any other work associated with Project Gutenberg-tm.\r\n\r\n1.E.5.  Do not copy, display, perform, distribute or redistribute this\r\nelectronic work, or any part of this electronic work, without\r\nprominently displaying the sentence set forth in paragraph 1.E.1 with\r\nactive links or immediate access to the full terms of the Project\r\nGutenberg-tm License.\r\n\r\n1.E.6.  You may convert to and distribute this work in any binary,\r\ncompressed, marked up, nonproprietary or proprietary form, including any\r\nword processing or hypertext form.  However, if you provide access to or\r\ndistribute copies of a Project Gutenberg-tm work in a format other than\r\n\"Plain Vanilla ASCII\" or other format used in the official version\r\nposted on the official Project Gutenberg-tm web site (www.gutenberg.org),\r\nyou must, at no additional cost, fee or expense to the user, provide a\r\ncopy, a means of exporting a copy, or a means of obtaining a copy upon\r\nrequest, of the work in its original \"Plain Vanilla ASCII\" or other\r\nform.  Any alternate format must include the full Project Gutenberg-tm\r\nLicense as specified in paragraph 1.E.1.\r\n\r\n1.E.7.  Do not charge a fee for access to, viewing, displaying,\r\nperforming, copying or distributing any Project Gutenberg-tm works\r\nunless you comply with paragraph 1.E.8 or 1.E.9.\r\n\r\n1.E.8.  You may charge a reasonable fee for copies of or providing\r\naccess to or distributing Project Gutenberg-tm electronic works provided\r\nthat\r\n\r\n- You pay a royalty fee of 20% of the gross profits you derive from\r\n     the use of Project Gutenberg-tm works calculated using the method\r\n     you already use to calculate your applicable taxes.  The fee is\r\n     owed to the owner of the Project Gutenberg-tm trademark, but he\r\n     has agreed to donate royalties under this paragraph to the\r\n     Project Gutenberg Literary Archive Foundation.  Royalty payments\r\n     must be paid within 60 days following each date on which you\r\n     prepare (or are legally required to prepare) your periodic tax\r\n     returns.  Royalty payments should be clearly marked as such and\r\n     sent to the Project Gutenberg Literary Archive Foundation at the\r\n     address specified in Section 4, \"Information about donations to\r\n     the Project Gutenberg Literary Archive Foundation.\"\r\n\r\n- You provide a full refund of any money paid by a user who notifies\r\n     you in writing (or by e-mail) within 30 days of receipt that s/he\r\n     does not agree to the terms of the full Project Gutenberg-tm\r\n     License.  You must require such a user to return or\r\n     destroy all copies of the works possessed in a physical medium\r\n     and discontinue all use of and all access to other copies of\r\n     Project Gutenberg-tm works.\r\n\r\n- You provide, in accordance with paragraph 1.F.3, a full refund of any\r\n     money paid for a work or a replacement copy, if a defect in the\r\n     electronic work is discovered and reported to you within 90 days\r\n     of receipt of the work.\r\n\r\n- You comply with all other terms of this agreement for free\r\n     distribution of Project Gutenberg-tm works.\r\n\r\n1.E.9.  If you wish to charge a fee or distribute a Project Gutenberg-tm\r\nelectronic work or group of works on different terms than are set\r\nforth in this agreement, you must obtain permission in writing from\r\nboth the Project Gutenberg Literary Archive Foundation and Michael\r\nHart, the owner of the Project Gutenberg-tm trademark.  Contact the\r\nFoundation as set forth in Section 3 below.\r\n\r\n1.F.\r\n\r\n1.F.1.  Project Gutenberg volunteers and employees expend considerable\r\neffort to identify, do copyright research on, transcribe and proofread\r\npublic domain works in creating the Project Gutenberg-tm\r\ncollection.  Despite these efforts, Project Gutenberg-tm electronic\r\nworks, and the medium on which they may be stored, may contain\r\n\"Defects,\" such as, but not limited to, incomplete, inaccurate or\r\ncorrupt data, transcription errors, a copyright or other intellectual\r\nproperty infringement, a defective or damaged disk or other medium, a\r\ncomputer virus, or computer codes that damage or cannot be read by\r\nyour equipment.\r\n\r\n1.F.2.  LIMITED WARRANTY, DISCLAIMER OF DAMAGES - Except for the \"Right\r\nof Replacement or Refund\" described in paragraph 1.F.3, the Project\r\nGutenberg Literary Archive Foundation, the owner of the Project\r\nGutenberg-tm trademark, and any other party distributing a Project\r\nGutenberg-tm electronic work under this agreement, disclaim all\r\nliability to you for damages, costs and expenses, including legal\r\nfees.  YOU AGREE THAT YOU HAVE NO REMEDIES FOR NEGLIGENCE, STRICT\r\nLIABILITY, BREACH OF WARRANTY OR BREACH OF CONTRACT EXCEPT THOSE\r\nPROVIDED IN PARAGRAPH F3.  YOU AGREE THAT THE FOUNDATION, THE\r\nTRADEMARK OWNER, AND ANY DISTRIBUTOR UNDER THIS AGREEMENT WILL NOT BE\r\nLIABLE TO YOU FOR ACTUAL, DIRECT, INDIRECT, CONSEQUENTIAL, PUNITIVE OR\r\nINCIDENTAL DAMAGES EVEN IF YOU GIVE NOTICE OF THE POSSIBILITY OF SUCH\r\nDAMAGE.\r\n\r\n1.F.3.  LIMITED RIGHT OF REPLACEMENT OR REFUND - If you discover a\r\ndefect in this electronic work within 90 days of receiving it, you can\r\nreceive a refund of the money (if any) you paid for it by sending a\r\nwritten explanation to the person you received the work from.  If you\r\nreceived the work on a physical medium, you must return the medium with\r\nyour written explanation.  The person or entity that provided you with\r\nthe defective work may elect to provide a replacement copy in lieu of a\r\nrefund.  If you received the work electronically, the person or entity\r\nproviding it to you may choose to give you a second opportunity to\r\nreceive the work electronically in lieu of a refund.  If the second copy\r\nis also defective, you may demand a refund in writing without further\r\nopportunities to fix the problem.\r\n\r\n1.F.4.  Except for the limited right of replacement or refund set forth\r\nin paragraph 1.F.3, this work is provided to you 'AS-IS' WITH NO OTHER\r\nWARRANTIES OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO\r\nWARRANTIES OF MERCHANTIBILITY OR FITNESS FOR ANY PURPOSE.\r\n\r\n1.F.5.  Some states do not allow disclaimers of certain implied\r\nwarranties or the exclusion or limitation of certain types of damages.\r\nIf any disclaimer or limitation set forth in this agreement violates the\r\nlaw of the state applicable to this agreement, the agreement shall be\r\ninterpreted to make the maximum disclaimer or limitation permitted by\r\nthe applicable state law.  The invalidity or unenforceability of any\r\nprovision of this agreement shall not void the remaining provisions.\r\n\r\n1.F.6.  INDEMNITY - You agree to indemnify and hold the Foundation, the\r\ntrademark owner, any agent or employee of the Foundation, anyone\r\nproviding copies of Project Gutenberg-tm electronic works in accordance\r\nwith this agreement, and any volunteers associated with the production,\r\npromotion and distribution of Project Gutenberg-tm electronic works,\r\nharmless from all liability, costs and expenses, including legal fees,\r\nthat arise directly or indirectly from any of the following which you do\r\nor cause to occur: (a) distribution of this or any Project Gutenberg-tm\r\nwork, (b) alteration, modification, or additions or deletions to any\r\nProject Gutenberg-tm work, and (c) any Defect you cause.\r\n\r\n\r\nSection  2.  Information about the Mission of Project Gutenberg-tm\r\n\r\nProject Gutenberg-tm is synonymous with the free distribution of\r\nelectronic works in formats readable by the widest variety of computers\r\nincluding obsolete, old, middle-aged and new computers.  It exists\r\nbecause of the efforts of hundreds of volunteers and donations from\r\npeople in all walks of life.\r\n\r\nVolunteers and financial support to provide volunteers with the\r\nassistance they need, is critical to reaching Project Gutenberg-tm's\r\ngoals and ensuring that the Project Gutenberg-tm collection will\r\nremain freely available for generations to come.  In 2001, the Project\r\nGutenberg Literary Archive Foundation was created to provide a secure\r\nand permanent future for Project Gutenberg-tm and future generations.\r\nTo learn more about the Project Gutenberg Literary Archive Foundation\r\nand how your efforts and donations can help, see Sections 3 and 4\r\nand the Foundation web page at http://www.pglaf.org.\r\n\r\n\r\nSection 3.  Information about the Project Gutenberg Literary Archive\r\nFoundation\r\n\r\nThe Project Gutenberg Literary Archive Foundation is a non profit\r\n501(c)(3) educational corporation organized under the laws of the\r\nstate of Mississippi and granted tax exempt status by the Internal\r\nRevenue Service.  The Foundation's EIN or federal tax identification\r\nnumber is 64-6221541.  Its 501(c)(3) letter is posted at\r\nhttp://pglaf.org/fundraising.  Contributions to the Project Gutenberg\r\nLiterary Archive Foundation are tax deductible to the full extent\r\npermitted by U.S. federal laws and your state's laws.\r\n\r\nThe Foundation's principal office is located at 4557 Melan Dr. S.\r\nFairbanks, AK, 99712., but its volunteers and employees are scattered\r\nthroughout numerous locations.  Its business office is located at\r\n809 North 1500 West, Salt Lake City, UT 84116, (801) 596-1887, email\r\nbusiness@pglaf.org.  Email contact links and up to date contact\r\ninformation can be found at the Foundation's web site and official\r\npage at http://pglaf.org\r\n\r\nFor additional contact information:\r\n     Dr. Gregory B. Newby\r\n     Chief Executive and Director\r\n     gbnewby@pglaf.org\r\n\r\n\r\nSection 4.  Information about Donations to the Project Gutenberg\r\nLiterary Archive Foundation\r\n\r\nProject Gutenberg-tm depends upon and cannot survive without wide\r\nspread public support and donations to carry out its mission of\r\nincreasing the number of public domain and licensed works that can be\r\nfreely distributed in machine readable form accessible by the widest\r\narray of equipment including outdated equipment.  Many small donations\r\n($1 to $5,000) are particularly important to maintaining tax exempt\r\nstatus with the IRS.\r\n\r\nThe Foundation is committed to complying with the laws regulating\r\ncharities and charitable donations in all 50 states of the United\r\nStates.  Compliance requirements are not uniform and it takes a\r\nconsiderable effort, much paperwork and many fees to meet and keep up\r\nwith these requirements.  We do not solicit donations in locations\r\nwhere we have not received written confirmation of compliance.  To\r\nSEND DONATIONS or determine the status of compliance for any\r\nparticular state visit http://pglaf.org\r\n\r\nWhile we cannot and do not solicit contributions from states where we\r\nhave not met the solicitation requirements, we know of no prohibition\r\nagainst accepting unsolicited donations from donors in such states who\r\napproach us with offers to donate.\r\n\r\nInternational donations are gratefully accepted, but we cannot make\r\nany statements concerning tax treatment of donations received from\r\noutside the United States.  U.S. laws alone swamp our small staff.\r\n\r\nPlease check the Project Gutenberg Web pages for current donation\r\nmethods and addresses.  Donations are accepted in a number of other\r\nways including checks, online payments and credit card donations.\r\nTo donate, please visit: http://pglaf.org/donate\r\n\r\n\r\nSection 5.  General Information About Project Gutenberg-tm electronic\r\nworks.\r\n\r\nProfessor Michael S. Hart is the originator of the Project Gutenberg-tm\r\nconcept of a library of electronic works that could be freely shared\r\nwith anyone.  For thirty years, he produced and distributed Project\r\nGutenberg-tm eBooks with only a loose network of volunteer support.\r\n\r\n\r\nProject Gutenberg-tm eBooks are often created from several printed\r\neditions, all of which are confirmed as Public Domain in the U.S.\r\nunless a copyright notice is included.  Thus, we do not necessarily\r\nkeep eBooks in compliance with any particular paper edition.\r\n\r\n\r\nMost people start at our Web site which has the main PG search facility:\r\n\r\n     http://www.gutenberg.org\r\n\r\nThis Web site includes information about Project Gutenberg-tm,\r\nincluding how to make donations to the Project Gutenberg Literary\r\nArchive Foundation, how to help produce our new eBooks, and how to\r\nsubscribe to our email newsletter to hear about new eBooks.\r\n"
  },
  {
    "path": "examples/input_format/check_results.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport sys\nimport logging\n\nlogging.basicConfig(level=logging.INFO)\n\nimport pydoop.hadut as hadut\nimport pydoop.test_support as pts\n\n\ndef get_res(output_dir):\n    all_data = hadut.collect_output(output_dir)\n    return pts.parse_mr_output(all_data, vtype=int)\n\n\ndef check(measured_res, expected_res):\n    res = pts.compare_counts(measured_res, expected_res)\n    if res:\n        return \"ERROR: %s\" % res\n    else:\n        return \"OK.\"\n\n\ndef main(argv):\n    logger = logging.getLogger(\"main\")\n    logger.setLevel(logging.INFO)\n    input_dir = argv[1]\n    output_dir = argv[2]\n    logger.info(\"checking results\")\n    lwc = pts.LocalWordCount(input_dir)\n    measured_res = get_res(output_dir)\n    expected_res = lwc.expected_output\n    logger.info(check(measured_res, expected_res))\n\n\nif __name__ == \"__main__\":\n    main(sys.argv)\n"
  },
  {
    "path": "examples/input_format/it/crs4/pydoop/mapred/TextInputFormat.java",
    "content": "// BEGIN_COPYRIGHT\n// \n// Copyright 2009-2026 CRS4.\n// \n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n// \n//   http://www.apache.org/licenses/LICENSE-2.0\n// \n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n// \n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapred;\n\nimport java.io.*;\n\nimport org.apache.hadoop.fs.*;\nimport org.apache.hadoop.mapred.*;\nimport org.apache.hadoop.io.LongWritable;\nimport org.apache.hadoop.io.Text;\n\n\npublic class TextInputFormat extends FileInputFormat<LongWritable, Text>\n    implements JobConfigurable {\n    \n    private Boolean will_split;\n\n    public void configure(JobConf conf) {\n        will_split = conf.getBoolean(\"pydoop.input.issplitable\", true);\n    }\n\n    protected boolean isSplitable(FileSystem fs, Path file) {\n        return will_split;\n    }\n    \n    public RecordReader<LongWritable, Text> getRecordReader(\n                                          InputSplit genericSplit, JobConf job,\n                                          Reporter reporter)\n\tthrows IOException {\n        reporter.setStatus(genericSplit.toString());\n        return new LineRecordReader(job, (FileSplit) genericSplit);\n    }\n}\n"
  },
  {
    "path": "examples/input_format/it/crs4/pydoop/mapreduce/TextInputFormat.java",
    "content": "// BEGIN_COPYRIGHT\n// \n// Copyright 2009-2026 CRS4.\n// \n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n// \n//   http://www.apache.org/licenses/LICENSE-2.0\n// \n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n// \n// END_COPYRIGHT\n\n// DOCS_INCLUDE_START\npackage it.crs4.pydoop.mapreduce;\n\nimport org.apache.hadoop.fs.Path;\nimport org.apache.hadoop.io.LongWritable;\nimport org.apache.hadoop.io.Text;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.JobContext;\nimport org.apache.hadoop.mapreduce.RecordReader;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.mapreduce.lib.input.FileInputFormat;\nimport org.apache.hadoop.mapreduce.lib.input.LineRecordReader;\n\n\npublic class TextInputFormat extends FileInputFormat<LongWritable, Text> {\n\n    @Override\n    public RecordReader<LongWritable, Text> createRecordReader(\n        InputSplit split, TaskAttemptContext context) {\n      return new LineRecordReader();\n    }\n\n    @Override\n    protected boolean isSplitable(JobContext context, Path file) {\n      return context.getConfiguration().getBoolean(\n          \"pydoop.input.issplitable\", true);\n    }\n}\n"
  },
  {
    "path": "examples/input_format/run",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\nmodule=\"wordcount_minimal\"\nmodule_path=\"${this_dir}/../pydoop_submit/mr/${module}.py\"\njob_name=\"input_format_test_job\"\njar_name=\"pydoop-input-formats.jar\"\n\nwd=$(mktemp -d)\njavac -cp $(${HADOOP} classpath) -d \"${wd}\" it/crs4/pydoop/mapred*/*.java\njar cvf \"${wd}/${jar_name}\" -C \"${wd}\" it\nopts=(\n    \"--upload-file-to-cache\" \"${module_path}\"\n    \"--entry-point\" \"main\"\n    \"--input-format\" \"it.crs4.pydoop.mapreduce.TextInputFormat\"\n    \"--libjars\" \"${wd}/${jar_name}\"\n    \"-D\" \"pydoop.input.issplitable=true\"\n    \"-D\" \"mapreduce.job.name=${job_name}\"\n    \"-D\" \"mapreduce.task.timeout=10000\"\n)\n[ -n \"${DEBUG:-}\" ] && opts+=( \"--log-level\" \"DEBUG\" )\n\nif [ \"$(hadoop_fs)\" != \"file\" ]; then\n    ensure_dfs_home\n    input=\"input\"\n    output=\"output\"\n    ${HDFS} dfs -rm -r -f \"${input}\" \"${output}\"\n    ${HDFS} dfs -put \"${this_dir}/../input\" \"${input}\"\nelse\n    input=\"${this_dir}/../input\"\n    output=\"${wd}/output\"\nfi\n\n${PYDOOP} submit \"${opts[@]}\" ${module} \"${input}\" \"${output}\"\n${PYTHON} \"${this_dir}\"/check_results.py \"${this_dir}/../input\" \"${output}\"\n\nrm -rf \"${wd}\"\n"
  },
  {
    "path": "examples/pydoop_script/check.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport sys\nimport os\nimport argparse\nfrom collections import Counter\n\nimport pydoop.hadut as hadut\nimport pydoop.hdfs as hdfs\nimport pydoop.test_support as pts\n\nTHIS_DIR = os.path.dirname(os.path.abspath(__file__))\nDEFAULT_INPUT_DIR = os.path.join(THIS_DIR, os.pardir, \"input\")\nCHECKS = [\n    \"base_histogram\",\n    \"caseswitch\",\n    \"grep\",\n    \"grep_compiled\",\n    \"lowercase\",\n    \"transpose\",\n    \"wc_combiner\",\n    \"wordcount\",\n    \"wordcount_sw\",\n]\n\n\ndef check_base_histogram(mr_out_dir):\n    output = Counter()\n    for line in hadut.collect_output(mr_out_dir).splitlines():\n        k, v = line.split(\"\\t\")\n        output[k] = int(v)\n    exp_output = Counter()\n    in_dir = os.path.join(THIS_DIR, \"data\", \"base_histogram_input\")\n    for name in os.listdir(in_dir):\n        with open(os.path.join(in_dir, name)) as f:\n            for line in f:\n                for base in line.rstrip().split(\"\\t\", 10)[9]:\n                    exp_output[base] += 1\n    return output == exp_output\n\n\ndef check_caseswitch(mr_out_dir, switch=\"upper\"):\n    output = set(hadut.collect_output(mr_out_dir).splitlines())\n    exp_output = set()\n    for name in os.listdir(DEFAULT_INPUT_DIR):\n        with open(os.path.join(DEFAULT_INPUT_DIR, name)) as f:\n            exp_output.update(getattr(_.rstrip(), switch)() for _ in f)\n    return output == exp_output\n\n\ndef check_grep(mr_out_dir):\n    output = set(hadut.collect_output(mr_out_dir).splitlines())\n    exp_output = set()\n    for name in os.listdir(DEFAULT_INPUT_DIR):\n        with open(os.path.join(DEFAULT_INPUT_DIR, name)) as f:\n            exp_output.update(_.rstrip() for _ in f if \"March\" in _)\n    return output == exp_output\n\n\ncheck_grep_compiled = check_grep\n\n\ndef check_lowercase(mr_out_dir):\n    return check_caseswitch(mr_out_dir, switch=\"lower\")\n\n\ndef check_transpose(mr_out_dir):\n    output = []\n    for fn in hadut.iter_mr_out_files(mr_out_dir):\n        with hdfs.open(fn, \"rt\") as f:\n            for line in f:\n                row = line.rstrip().split(\"\\t\")\n                index = int(row.pop(0))\n                output.append((index, row))\n    output = [_[1] for _ in sorted(output)]\n    exp_output = []\n    in_fn = os.path.join(THIS_DIR, \"data\", \"transpose_input\", \"matrix.txt\")\n    with open(in_fn) as f:\n        for line in f:\n            for i, item in enumerate(line.split()):\n                try:\n                    exp_output[i].append(item)\n                except IndexError:\n                    exp_output.append([item])\n    return output == exp_output\n\n\ndef check_wordcount(mr_out_dir, stop_words=None):\n    output = hadut.collect_output(mr_out_dir)\n    local_wc = pts.LocalWordCount(DEFAULT_INPUT_DIR, stop_words=stop_words)\n    res = local_wc.check(output)\n    return res.startswith(\"OK\")  # FIXME: change local_wc to raise an exception\n\n\ndef check_wordcount_sw(mr_out_dir):\n    with open(os.path.join(THIS_DIR, \"data\", \"stop_words.txt\"), \"rt\") as f:\n        stop_words = frozenset(_.strip() for _ in f if not _.isspace())\n    return check_wordcount(mr_out_dir, stop_words=stop_words)\n\n\ncheck_wc_combiner = check_wordcount\n\n\ndef make_parser():\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"name\", metavar=\"NAME\", choices=CHECKS,\n                        help=\"one of: %s\" % \"; \".join(CHECKS))\n    parser.add_argument(\"mr_out\", metavar=\"DIR\", help=\"MapReduce out dir\")\n    return parser\n\n\ndef main(argv):\n    parser = make_parser()\n    args = parser.parse_args(argv)\n    check = globals()[\"check_%s\" % args.name]\n    if check(args.mr_out):\n        print(\"OK.\")\n    else:\n        sys.exit(\"ERROR: output differs from the expected one\")\n\n\nif __name__ == \"__main__\":\n    main(sys.argv[1:])\n"
  },
  {
    "path": "examples/pydoop_script/data/base_histogram_input/example_1.sam",
    "content": "foo_0/1\t81\tchr6\t3558357\t37\t91M\t*\t0\t0\tAGCTTCTTTGACTCTCGAATTTTAGCACTAGAAGAAATAGTGAGGATTATATATTTCAGAAGTTCTCACCCAGGATATCAGAACACATTCA\t5:CB:CCBCCB>:C@;BBBB??B;?>1@@=C=4ACCAB3A8=CC=C?CBC=CBCCCCCCCCCCCCC@5>?=?CAAB=3=>====5>=AC?C\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_1/1\t99\tchr1\t155858007\t60\t91M\t=\t155858380\t464\tTGCTGTGCTTCAAATGGATAAAGCCACATTATGTCAACAAGAGGCTTGTTATCTTGGTAACCAGTTACCGTTTTTATGTCCATTCTGCCCT\tEEEEEBEFDFGGGDGGGGGGGDDDGGFGGGF?DFAFDBF?A,CC?B9.:?27;-:=A##################################\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:68A17C4\nfoo_1/2\t147\tchr1\t155858380\t60\t91M\t=\t155858007\t-464\tGTTTCATGCCATTCCCCTGCCTCAGCCCCCTGAGTAGCTGGGACTACAGGTGCCCGCCACCATGCCATGCAGAGCTTTTAAAAACACAGAT\t#####################################################??+@@>B@-FAEFE?EDEB5EDECC=8?BDDAC?=DDD\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:1G12T12T63\nfoo_3/1\t99\tchr7\t148337326\t60\t91M\t=\t148337704\t469\tCAACAACAACAAAAAAAACCCGGAAAAGTTCACAGAAAAACAGCTAATTTGCCAGAAGCTGCTGTCAACTGATGTCTATAAGCAGCACTGA\tGGGGGGGGGGGFGGGGFGGGGGGDGDEEECFFFFFGAGDGGFGFGG:EFGDBGEFE?B=?ECCBCBA@?AAAAAADDA:BCCCA?B=CB=9\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:17C73\nfoo_3/2\t147\tchr7\t148337704\t60\t91M\t=\t148337326\t-469\tAAGGACCATGTGGTCTGTCCCAGCTGCTTAACTCTGCTGCTGCAGTGTGAAAGCAGCTGGACAGGGTGTGGACAAAGCTGTGTTCCAATGA\t#########@@C-@?C5-DDD-EDA:FFFFF?=?>4BB-DDEBADDDDDDC-FFAFB=EED?B;GFEEEEE=:EDDGGFDGFGFGGGGFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_5/1\t99\tchr2\t157433580\t60\t91M\t=\t157433946\t457\tCTGGGTAAAAGTAGCTGTAGATGTCATCTCATTTATCTGAAGTACCGTTTTCTCACATTAACTCTTTCAATTTCATTGCCACATCAGATCA\tGGGGGGGGGGGEGGGGGGGGFFGFGBGGFGDGGGGGGGGGGGBGGFGDGGGFGFGGFEGGGEGGGDFDAEGFBEEGEEGGGGFGGGGBGFB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_5/2\t147\tchr2\t157433946\t60\t91M\t=\t157433580\t-457\tGCATAAGGAAAAACACACCTAGACACACAGAGAAAGATAGCTAGATACTGAAGACAAAGAGAAAATATTAAAAGCAATCAGAGAAGAAAAG\t?:=DE=E>@EEC?A?A=FE=EGDGGGGGGGGGGGGGGGGGGFGGFDGFDFGGGGGGGEGGGGGFFDGGGGGGEGGGGGAGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_7/1\t81\tchr17\t72146167\t37\t91M\t*\t0\t0\tTGATAAAAGCGATAATCCTCAGCCCCCTGAGTAGCTGGGATTACAGGCAACCGCCACCACGCCTGGCTAACTTTTGTATTTTTATTTATTG\t################################################?5BGAGGDFFFFBFGEF=GGGGGFFGEGGG@GGGEEDGGGGGG\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:16A7T66\nfoo_8/1\t99\tchr13\t75552481\t60\t91M\t=\t75552867\t477\tGGTGAGGTTGCAGAGAAAAGGAACACCTTTACACTGCTAGTGGAAGTGTAAATTAGTCCAACCACTGCGGAAGACAGTGTGACAATTCCTC\tFFEFFGGEGGGGGGGGGGGGGBGDDGGGGEEGGGGGGGGEEGGGECACABFFEDFC@CECEBEEABCCCDBC:AB7;8;5CBB:=?BBBBG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_8/2\t147\tchr13\t75552867\t60\t91M\t=\t75552481\t-477\tCAGAAAAACAAACACTGCACATTCTCACCTATAAGTGGGAGGTGAATGATGAGAACACATGGAAAGATGGGGTGGGGGAAAACACATACTG\t;GEGGFEBACCBEEEBFDFFFFEFFFEGFEGGGGGFGGGGGGGGBG=GGFGGGEDGGGFGGGEGGGGGGGGGGGGGGGGGGEGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_10/1\t99\tchr16\t82273022\t60\t91M\t=\t82273387\t456\tAGCAGGCACTGGAAGGAGAAACTTAGCTCGAAAAGAACTCTTCCTTCTTCTCCTCCTCCTCCTCCACAAAACAGGAATCAGGTGTCTCCTA\tGGGGGGEEGGGFCGDE:EEEFFFFDDBAFEEFFFBGGEAGDGEDDGGGGFDEGEBE=:EB?A>>CDEFGFFB:DFADAD=4A077C>CCAE\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:15A12G61G0\nfoo_10/2\t147\tchr16\t82273387\t60\t91M\t=\t82273022\t-456\tGTCTTCCATGATTATTCATAAGGAGGTGGGAAGAGGCGTTACTAGGAAGCTGTTCTTGGTGGTCTTCCGGGTGCACATGTGCAGCAGCTGT\tC?:AA?:=661C??C=B:AE;ECECA:EABEACC5CDBBD?AECEEGBEFECCA>B+CBAAC5=C:D;EEEEECA=DEFFFD?EADFEFFE\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:56G34\nfoo_12/1\t83\tchr9\t21719826\t60\t91M\t=\t21719457\t-460\tAAAGGAATGCTGGGCAAAAATTTTGCGAAACTGATGAAAGACACCAGAAATGTAAGTAATTCTTTGTACCCCAAGCAGGATAACTACAACG\tFGBFGGEEFGEEEEECE?EBEGGGGGEEEBBBGEGGGDEGEEBE=BFGGGFGFFGGFGFGGGGGGGGBGDGGGGGGGFGGFGEGGGGGGGG\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:26A42T21\nfoo_12/2\t163\tchr9\t21719457\t60\t91M\t=\t21719826\t460\tTAGATCAACAAATAATACAGATACTGAAGTTATCAAACACAGACTTTAAAACAGCTATGATTAACATTTTCAAGAAAACAGATTTAAAAGT\tFGGGGGGDGGGGDGGGGGBGGGGGGGEFGBGGFGGGGEGGGGAGGGGFGDGGGGGFGGGFGFEDFFEFFDDGDDBGF>>GGEFEGFFF?=1\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_14/1\t99\tchr8\t77600428\t60\t91M\t=\t77600797\t460\tCACTGTCCACTGTGAAAAATAATAAGAAAAAATACAAATTATAACGAATTTGCCTTCAGGAGGCCTTGTTAGACATAATGGGACAATACCT\tGGGGGGGGGGGGGGGGGGGGGGFGFEGGGGGGEGGGGGGGGGGGGFFGGEGGFGGEGGGGDEEEEEBEEEEEEEGGFFGGEG@EAGEGEEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_14/2\t147\tchr8\t77600797\t60\t91M\t=\t77600428\t-460\tAGACTTTTCTTCAACTTAAACTTTGCATTGAAATAAAAAAAGAATTGTACATGACATTATGGAAAATAACAAATCAACCCTTAAATCAGCA\tGG=GCGDGFFGGGGGGFDFEFDFGGGGGAFGGGGGGEEFFDDGGGDGFFFDFGGEGDGGGGGGGGGGGEGGGGBGGGGGGGGGGGGGGGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_16/1\t99\tchrX\t73888313\t60\t91M\t=\t73888696\t474\tTCCTTCCCTTCCTCTTCTTTCCTTCCCTCCCTCCCTCTTCTTTCCTTCCCTCCCTCCCTCCTCCCTTCCTTCCCTTCCTTCCCTTCCTTCC\tGGGGGGGGG:GGGGGGGGGGEGFGGGDGGGD<FECGGBGB?CE:AA?5?ABC?;0>@6(@###############################\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_16/2\t147\tchrX\t73888696\t60\t91M\t=\t73888313\t-474\tATTACATGTACTACAAAAAATTGACATGCAAGAATAAATTCTGATTTAAGTTTTGTGTGATACTGAGATGAGCCTGAGGGAACTCTTTAAT\t?BADD:FFFFF>CDDGG?FFGGGGGFGGAGGGBGGGEFF5FGGDFGGGFGDFGGGGGGEFGGGEGGFDFF:FGGGGFGGGGFFEGGFEGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_18/1\t83\tchr14\t44712230\t60\t91M\t=\t44711860\t-461\tTTCTTAATTTGAAGTAATAAGAAATACTTTGTTTTTCTGTATAGTTCCTTGTGATCTCAGTAAAGAATGAGTTTTCTGCCTTAAGAGAAGA\tE:?ECDDC:EEAAAEEBC:CB9CA:B:5?A?;D?5?FEBEDEEEEEAB:DD=DD?CEB:EECACACB:EBEAAC?A=?AFFEEFEFFFFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_18/2\t163\tchr14\t44711860\t60\t91M\t=\t44712230\t461\tCTTTTTTTCTTCTGAGATTTTAGTTGGGAGTATCTAAATGGATCCAAATTATGTTAAGCTGATTATATGTATATTGAGTGTTTTTTAAAAT\tD?EEDEEEBEBCE=DA5CAADEEBD@;?>@DBD5=CDD:DFFFEFBEDCFDADEE=BEEEDF=FEF:FDEFFDFFAA?=C=DDA@>5;,@;\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_20/2\t129\tchr1\t207736382\t37\t91M\t*\t0\t0\tTTCAATGTCCACCTCCCAGAATCAAACATCTTCCCACCTCAGCCCCCCCAAGTAGCTGAGACTACGGGCACACATCACCACACCTGGCTAA\t;<5;;@>C>@@,>;?99A9?>>.>@?-BB.5;2;AAAAC?3*37=27>111:3?7:9-<29<;>9=@>46)7/;59-09??:A5A@.;>B@\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_21/1\t83\tchr7\t50438238\t60\t91M\t=\t50437868\t-461\tCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAAT\tBB?GFBGGGFGEGGGGGEGEGGGEEGGGGGGEGEGEGGGFFGFGGEGBFGGGDFGGFGGFFFBFCFDGECGFGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_21/2\t163\tchr7\t50437868\t60\t5M1I85M\t=\t50438238\t461\tGTCTTCAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCT\tGGGGGGDGGGGGGGFGGGGGGGGGDFGGGGGGF=GFGGGFGEGGGGGFFGGGGDFGGGGGFGGGDFGGFGGGGGGGEEFFAEEEBDBGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:90\nfoo_23/1\t83\tchr9\t108075300\t60\t91M\t=\t108074918\t-473\tGGCTGTGTATTTACAGGAAGACTTACTTGGCTGGTATTTACTTCATGTGGACATAAAATGTGGAAAGTAGGTACTGTATATAACTTTATTT\tGGGGGGGGGGGGGGGGD?EECFGFDFFGGEDDGEDFGGFFGGDGGGGGEGGGFGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_23/2\t163\tchr9\t108074918\t60\t91M\t=\t108075300\t473\tTGTAGTTGTACAAGTAGGAGAGAGTGAAGTTTCTATCAAGATTGTAGCAGTTGAGATAGAAAAAGGGAAACAGATTTTAGGTATATTTTGG\tGGGGGGGGGEFFFFFFFEFFDEFEBFFDFFGFGGGGGGFGBFEFFGFGGGDEEEEGGEGFGFEAGGGBGFGGG=GFFBABFDFDF?EEEEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_25/1\t65\tchr18\t16773270\t25\t91M\t*\t0\t0\tTCTACAAAAAGAGTGTTTCAAAACTGCTCTGTAAAAAGAAAGGTTCAACTATGTTAGTTGAGTACACATATCACAAACAAGTTTCACAGAA\tGGGGGGGGGGGGGBGGGGGGGGGGGGGGGGGGGGFGFGGGEGFGGGGFGGGGGEGGGEGFGGEGFGGFG=EGGGGBFFF>EEEEFGEF=EG\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:50C17C16T0G4\nfoo_26/1\t83\tchr6\t83345667\t60\t91M\t=\t83345296\t-462\tTTTTATCAGTTGAATAACAACAGAGCTCAGCTTATAACTAACTTATAAAACAGGCTCTATGGCACGATGTGGGTGAAGAGGATTCAGTGCA\tDCECB?EEEACDDDDDBCCB:EEEFE?FFFFEF5FFFGGGFFGDFGGGFFGGGFGGGFGGGF=FBFFFFDFFGGGGGGGGDGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_26/2\t163\tchr6\t83345296\t60\t91M\t=\t83345667\t462\tATTCCTCTGGAGAGCAATGGTGTACATACTCAAGTCAATTCATTAGGACATGGGGCTGATGATGAAAGCATGTAACAAAGGAACTGCTGTG\tFGEEDEGFGGFGGFGEFFFFFEFFEEEBE=DDDDBFFDFB??DDDEE?CEGGFDDGGGDGGGGE:AEDE-AD=DCFF?6B:=BABDDB5A6\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_28/2\t145\tchr17\t16958252\t37\t91M\t*\t0\t0\tGTCTTGGCCATTTGTATATCTTCTTTGTATATTCACATCCTTTGTCTATTTTTAATTGTTTTTTGTTGTTGAGTTTTAGGAGCTGTTTATT\tBECGGGC?AA:DEEBBGFGFGGF?GFGEGGGGGGGGGADGGEEE?EF?GGFGGEGGAGGGFGEGGGGGGFGGGGFGGGGGGGFDGDGGGEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_29/1\t99\tchr22\t45659974\t60\t91M\t=\t45660345\t462\tCTGTGATTAGTGGGATTAAAGCAGCACTGTGTGAAGCACGGAGCGTGGTGAGGACCCAGCCCTTCCTTAGTTCACCTGCGGGTATGGCAAT\tGGGFGFFFFFBFFFDFGGDGGGEGDGGDGBGEGGGFGGECFF?FEAEEAEEEEEBD@EE@EDBBDABBBBBBEBE=FFFFEB=A@>>>=5B\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_29/2\t147\tchr22\t45660345\t60\t91M\t=\t45659974\t-462\tAAGAGGCCTGATTCTGGAGATGCTCAGGTGGGTCATTAAATAATCACCCGGAAGGCACCGCAGAACCACGCCTGGCACTTCTGTGCACACT\t####@35C@C?D-FFFFF:DFEDCECDE=EDEGDGFEDFFFABAC:EEEAFFFFFF?DBDDEEE:EE5EEBFFFFFBGFGGFGGGDFGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_31/1\t65\tchr10\t29460867\t37\t91M\t*\t0\t0\tGTGGTTTGTTTGCTTATCCTGTATCTGGAGTTGAGAAGAAACCAAGTTTGAGATTTAGGATGTGGACAGGAGGTAGGAGTTTTGCCCATAA\tGGGGGGGGGGGFEGGDFFFEDCEEDDDD=DBEBBEEEDEEF<FFFFDFFFBEBCEECCE@EEBBEBEBBE?BC=?B@:<5AAA<>EDAEAG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_32/1\t83\tchr7\t79683300\t60\t91M\t=\t79682929\t-462\tTACAAAAGAGGGGTTTAAACAGTTTTTTTCCAAATGAAATTTGAATGTTTTTCAGTTTTTTTATTGAAGCATTACATAGAGCATTACATTA\tGGGGFGGDGFFFFEFDEFDDGG@EEEFBGGGGGGGFGGGFBGGGFFGGGGFFGGGGDGGGGGGGGGGGGGGGGGGGGGFGGGGGFGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_32/2\t163\tchr7\t79682929\t60\t91M\t=\t79683300\t462\tTAGGAGATTCAGCACTGAAAAAGAATTTAAGACTCTATAAAGCAATATAACAAAAACTGAAATAGTTATTTCTAGGAGGAGGAAAAAGTTT\tGFGGGGGGGGGGADGGGGGGGDGFGGGGGGGFGGGGGGGFCFFFFGEGGGGGGGDCGGGGFFGGGGDGGGGGGBDEFFEFGGEGF=DDABE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_34/2\t129\tchr13\t56061006\t37\t91M\t*\t0\t0\tAGTTTGGTAGTTTTTACAGCCTTCAAGTTTTACAAAATATTTATTTGGTGAGGGCAACTTGACTAATCACCCTCAGGACCATGTTTCAGTA\tFFBFFFFFFEFFFFFBDEEEGGFGGGGFFFEEEEEFDFFBGGGGFECEFDEBEEDBBFFFDGGEDFF?FFGEGGBDD?DEFGG@DED==CF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_35/1\t99\tchr1\t184430571\t60\t91M\t=\t184430964\t484\tTTCCAAACACACACATACACACACACACACACACACACACACACACACACACAAACTAATCCTGAGAGTGGCTGAATTACGACGCGAGTTA\tFEFFFFDFFFFGEBFFFDEFGGGFGGGFFDGEEGFFGGBFAGDGEEFFDFEE=BE-568.:<;?=8?;4*>>=>9A###############\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:82T8\nfoo_35/2\t147\tchr1\t184430964\t60\t91M\t=\t184430571\t-484\tGGAATCCAATAACAAAAAAGAAACAAAGCATATATTTTATATATCTTTTTCATTTTACTTTTCTAGTTATTTGTATTATATTCTATGAAAG\tCEB5=EC??8>GGF>G@EE@:EBEEED=EA=GEFGGEFFFFEABEDEE??EEEEDECB?DDFFFFAE?EADGD:DEEGGGGE5GGGGGAGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_37/1\t83\tchr2\t78122797\t60\t91M\t=\t78122426\t-462\tTGTGATATTCTCATTTTCTCAGCCTTAAGAAGTCAAATGTCTCTGTAGGATAAGATATTCAAGGTAATATTTTCCTGTAAAGAAACACAGA\t?B=EGGFEBFFAA?B5BEEGEEEBD?DEEGE?BGGGGDEFBFGEGDGGGGFGGGGDGFGGGGGGGGGGEGGEFDFAGGGGGGGEGGGGGGE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_37/2\t163\tchr2\t78122426\t60\t91M\t=\t78122797\t462\tAAAGTAGTGTCACTAGGGACTTCAATAATCTATCTGAAGTACATATAGCAGCATCTGGATTATTTGCAATCTATTAGCTTCAGATCCAAAA\tEEEEDEEEEEGGGGGGGFGGFGGGFGFGFFGGGGGFE?EECDCDDFGDGGDGGEGAGGFDGDBGGGGFEGGGFGFAGEFG?;A-:BC=BCF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_39/1\t99\tchr10\t111115054\t60\t91M\t=\t111115440\t477\tATTAAGGTTATGGGCTTGATGTTTCTAGTTGTGGTTGTGTTTCTGCCAGGATTTGGTATCAGAATGATGCTGGCCTCCTAGAATGAGTTAA\tGGGGGGGGGGGGGGGGGEGGGFGGGFFFEFFFEFFDDADBEEEEAEE=DE=BDCCDAA?A=C=AADB=CBC@EABA?AA-<088<>A:A-?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_39/2\t147\tchr10\t111115440\t60\t91M\t=\t111115054\t-477\tGTCTTGGCCTAAAAGCTCCTTCAGTTGATTAAGAACTTCAGCAAAGTTTTAGGATACAAAAATCAATATATAAATATTACTGTCATTTCTA\tFEDEDGEEFGGGGFFBEGGBEGGGFGGEGGGGGGEGDGGGGGGGGFGFGF?GGGGEDGGGGGGGGFEGGDGGGGGGGFFGGGGGGGGDGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_41/1\t65\tchr2\t201662148\t37\t91M\t*\t0\t0\tAGACTCTGTCTCTAAAAACAAAAAAAGTTAAAAAAAATTAGCCAGGCGTGATGGCGTGCACCTGTAGTCCCAGCTACTCAGGAAGCTGAGG\tC:C5A>>C9@<A?AADDBBAAAA<A<8BDBDD?:B;8<C5:>@A@D?BBBBABBB>=>=>@?:?###########################\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_42/2\t145\tchr16\t82783352\t37\t91M\t*\t0\t0\tAGTAATCCTCCCGCCTCAGCCTCCCGAGGAGCTGGGACTACAGGTGGACTCCACTACACCTGGCTAATTTTTGTATTTTTTTTGTAGAGAT\t#########ABD@GGFGGEFFFDGGGFGF?GEGGGGGGDGGGGGGGFFFEFECEE?F@FFFGEGFGGGGGGFGGFGGGGFGGGFGGGDGGG\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:3G2A84\nfoo_43/1\t83\tchr2\t4935961\t60\t91M\t=\t4935586\t-466\tACCACGAACATCTGACCTTGGCTGGAGTATAACTGTTATGATTACTTGAAAACCACCACTGCCAGGGCTGAAACCTTTTAAGTTACTCATC\tCFBDEEBBABBECC@ECECE@EDEEE?CEEADDEDFBGEGFEEEGGCEEEC:EECEGGGGFGGFGGGGGGGGFGGGGGGGGGGFGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:5A85\nfoo_43/2\t163\tchr2\t4935586\t60\t91M\t=\t4935961\t466\tAGAAACATCCCCAGTTTTTTCAAATGCATTGAGTGCTGTTAACCATTGTAGAGGATCAGACTGTTAAATTAGGAGGTAGATGCAATGTGTA\tGGGGGGFGGFGFGGFGFGGGGGGFDGBGGGGEGBGFFGGDGEGGGGGFGGGEGGFGGGEFFGGAGEFGFGAGGEGD?CECBCC@BBEAEBE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_45/1\t81\tchr8\t43942323\t5\t91M\t*\t0\t0\tCATTCGCAGAATCACGTTTGTGATGTGTGCACTCAACTGTCAGAATTGAACCTTGGTTTGGACAGAGCACTTTTGAAACACTCTTTTTGTA\tF?F?EGFFFBFDGEGGEFGEGEGFDFEEEAAEGFFEGFGBGGGGGEGEFFEGGFGGGGGFGGGGGGGGEAGGGGGGGGGBGGGGGGGGGGG\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:7\tX1:i:12\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_46/1\t99\tchr1\t160898441\t60\t91M\t=\t160898820\t470\tAGGAGTGAATTAGAGCCCCTTATGTTAAACCTTGTTTCTTGGAAGCCCTCCTGGAATTCCCTAAGATGCAGAGCTGATTAGCTTAGCTGGT\tGGEGGEGGFGGGGGGGGGGGGGGGGGFGFGGFGGGGGGGGEGGGGEEGEGFFD?EBFFFEGEBGE:=A@5CCCBCEFFFEDFBFFEDFAFC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_46/2\t147\tchr1\t160898820\t60\t91M\t=\t160898441\t-470\tATCTGCAGGTTAGGTTGGACATTTACTTCTCTGCTTTTGCAGCAACTTCTACACCGTCTGTTCTCATACCACATTGTGTGGAAATTATCCA\t@:B=BA;AAA5:=D=CAA>?5DDC=DD=:?BB:AFFF?FFFAAAAAD?DCAFDFFFFGDEGEEEDEEBFFFGFGGGGGEBGGGGEGGFGFG\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:75A1A13\nfoo_48/1\t83\tchr11\t88357992\t60\t91M\t=\t88357628\t-455\tTCTGGAAAAACTATTTTCATACCCACAATCCTAAAGGTTAGATGCCATAACTCATGCTCCTACATATATAACCAATTTTGTTTATTTATTT\tEEEGEGGFECGFFFFF=???-@D??@FEDFF:GFGGGFGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_48/2\t163\tchr11\t88357628\t60\t91M\t=\t88357992\t455\tTCCAGGGCATCATATACTAAAGAATCATTCCATCTAATTTCTTCAAATCTCAAGAACACACACCTGGCAAATCATGAAAATAAATTTTATG\tGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGFBGGGGEGGEGGDGGGFGGFGFGGFDDGGGGDGGGGEGDGDGFEDG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_50/1\t99\tchr1\t210048059\t53\t91M\t=\t210048451\t483\tCCAGCACTTTGGGAGGCCAAGGCAGGCAGATCACTTGAGGTCGGGAGTTCGAGACCAGCCTGACCAACATGGAGAAACCCCGTCTCTACTA\tGGGGGGGGGGGEGEBGGGGGGGGCGDBBFDEDFEEFFEFECAEEB???:@BAACBEEDFE5=>@>C=B=5?@?:@A@7=C>>>>@EDD?5?\tXT:A:U\tNM:i:0\tSM:i:16\tAM:i:16\tX0:i:1\tX1:i:5\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_50/2\t147\tchr1\t210048451\t53\t91M\t=\t210048059\t-483\tTCATTAGCCACTAGGGAGATGAAAATTAAAAACATGACAAGCTACCTTTATACATTTACTAAAATGAGTACAATTTTAAAATATTGGCAAT\t######F=E@ECCCCC:FFFBFF?BEAAEEE?EEEE?FEEBDCCCDDEEEFBCECEB@EE=EE=EEF?FBDFFFFEFAF:FEEEEBAB:CC\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:16\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:17A73\nfoo_52/1\t99\tchr1\t210048059\t53\t91M\t=\t210048451\t483\tCCAGCACTTTGGGAGGCCAAGGCAGGCAGATCACTTGAGGTCGGGAGTTCGAGACCAGCCTGACCAACATGGAGAAACCCCGTCTCTACTA\tGGGGGGGGGGFFGAGFBGGGGGFEGFFBED?EEEEGGDGGCFFFF>B2?=CCC?CEDEDGFEFBFFFFDDAEAECDD2CCCAACAGGBEEE\tXT:A:U\tNM:i:0\tSM:i:16\tAM:i:16\tX0:i:1\tX1:i:5\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_52/2\t147\tchr1\t210048451\t53\t91M\t=\t210048059\t-483\tTCATTAGCCACTAGGGAGATGAAAATTAAAAACATGACAAGCTACCTTTATACATTTACTAAAATGAGTACAATTTTAAAATATTGGCAAT\t6?-BCBFFFBEDGGGGGGG?FGFGGFFFGGAGGGGGEEE:EEECEEGGGGGFFBBFFGGGGGGGFGGGGGFGFGGGGFGGGGGGGGFGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:16\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:17A73\nfoo_54/1\t83\tchr5\t99043369\t60\t91M\t=\t99043015\t-445\tGAACACAAAAAATTTTCATTATAATCATACAATCAACTGACAATTCACACATTAGAATTAGCAGAAAAGAAAATTAAACACTTATAACTGC\t=E=@;AEEEEDAABAA9;>8=>C>:6=EEEEE-DEEAAA<ACCACC@CACC?BEBEDDDDB;ABCEE?EEEDEEEECBACA>@@=@DDDDD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_54/2\t163\tchr5\t99043015\t60\t91M\t=\t99043369\t445\tAGATAGCTCATTTCATGTGTTCAACTGAGTATGGATCAGCACGTATTACTGAGGTAAATGCCTAAGGCTGGGGAAAGAAGCACCCAATACG\tD?A5DBD=ABDDDDBFFEDFDB:???>C@@:AA?C=5B=:2676;77277@=C=@CA=:A>?@@;:-9A;72:(9:26A);B;8;AC>B##\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_56/1\t99\tchr5\t61899391\t60\t91M\t=\t61899789\t489\tCCACCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCACCCCGCTACATTTGCTCTTTTGATGCTGTCCCATAGATCTCGT\tEEEAE=DFFFGGEEF?EEBDFGFG?D:BDE?CAA=E?=AEC=DDD,DDCDC?C178:;*=;*7=/?>>64:844=C@-AC@C:?#######\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_56/2\t147\tchr5\t61899789\t60\t91M\t=\t61899391\t-489\tCCCCCCCCCTCAGCCTCCCAAAGTGCTAGGATTACAGATGTGAGCCACCACACCCAGGCTGACTGTATTTTCAAATATTCTGTCTTCTACG\t##########?=/?;2=>,C@?DCAB<;97=CC<6@EE5EE?E?EEE=DEEFBFAFEEDEEE?EED?GEGFFGGBGGFDEGEEEED=DDDA\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:2A3A84\nfoo_58/1\t83\tchr3\t121089753\t60\t91M\t=\t121089378\t-466\tAAATTGTTTAAAGCAAAGTAACAACTGTTGCGGGAAGTCAGGGACCCTGAACACAGGGACCAGCTGGAGCCACGGCAGAGGAACATAAATT\tA<0;22B>:C>?<ACA;>9?<CC??>EFDFFAE:EEA=:@CCA-DDEEBECECA?EEEBEEFEE?FGGFFGGGGGGGGGGGGGGGGGGGGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_58/2\t163\tchr3\t121089378\t60\t91M\t=\t121089753\t466\tTCTTCAAGAATGCCAGAAACACATTATAGACAGGGTATATACAGGGTATCAACAGTTGACTTCTTAACAGAAACAATGAAAGCCAGAAGAA\tGGGGGGGGGGGEBGGGGFGGGGFGGDFDGGFFFEC=AACADEEEAEBEDEEEEEEEGFEAFEGEEFBFEDFAFEFAEEDFGEBFEFBB:=E\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_60/1\t99\tchr7\t91156329\t60\t91M\t=\t91156701\t463\tATGTAGACTTACACACACAGTTTAATCAAGTAATTGTGCCTATTTCTCAGTAATTTAAACTTCACATTTTTGCTTTCTTAAAATCTGAAAA\tDDGFGGFGFEGGGGGGGGGFGGGGDEFBFFFEA?FFEFEFGFFGAEDDD==ACC@AEEEEF=D?DBBCAB>;=;=DDDDCBCCE?A:5?:=\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_60/2\t147\tchr7\t91156701\t60\t91M\t=\t91156329\t-463\tTCCCACACGTTGTGGGAGGGACCAGGTGGGAGGTAACCTCATGGGGGCAGATCTTTCCTATGCTGTTCTCATGCGAGTGAATAAGTCTCAT\t?CCB5=GF?GGGEGFFCFGGAEDECEE:DBF:FFFFFDDFFEEA=EEDB=EFDFDDDF=FFFFF?EDGGGGE:EDEED5FEBGGFDGGDGE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_62/1\t65\tchr19\t37869667\t37\t91M\t*\t0\t0\tTGGGGGAAAGCTGTACATATTTATGAGGGGGTTGACCACATGTGCAATGGGTAAACATACAGGTAACATACATCAACCATGTTTATTTTGG\t:=--CCC?CC:.=?=DB:DA?@=@6=:?=-.&7=659;5;==:>=6*7:*;?23<('.54???5>>@4B?CCA@#################\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:75C15\nfoo_63/1\t83\tchr4\t91585929\t60\t91M\t=\t91585529\t-491\tAGCATGACATTCCTGTGGTAATATTGGGTTATTACAGTTACATTTGTCATATTTGATTGAATGCACAGAATGTTGCTATATATCTTGACTT\tFGDGGGEFEFFFFGFGEFGFBGGGDGGGFGGAFFGGGGGDGGFGGGGGGGGGGBGGGGGGGFGFGGGGGGFGGFEGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_63/2\t163\tchr4\t91585529\t60\t91M\t=\t91585929\t491\tTTTGGAAATGGTATTTCAACAAATACTTAAGTGGAGGTTTGTGATTGCAGTGATTATTTCAAGATCTTTGTGGACTGGGAGTCTTCCATTC\tGGGGGGGGGGGEGGGFGGGGGGGGGGGGGFFEFFEEECEEFEFDDFEFFFGGFFFGGGFGGGEDDDGFGGEAF?GGFG=EBBE5EEDE?EG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_65/1\t83\tchrX\t80714354\t60\t91M\t=\t80713989\t-456\tCATTTCATAATAAAAACACCAAGCAAATTAGGCATAGGAGAAACATACCTTTACATAATAGAGGCCATATGTGACAAACCCATGACTAATC\t4<@2;/E<BBA>>DEEE?EBEEE:EEDBA:A?=B?CFCFFFDBDC?@DDCAEEBEEFFAFFFFDBFFEFFFEEEEDEB=DEEEDCAEEDEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_65/2\t163\tchrX\t80713989\t60\t91M\t=\t80714354\t456\tAATTTTACTTAAATGAGTTTAAAAATGTGAAGAGGAGGGAATTCTTCCTGACACATTCTACAAGGCCAGCATTACCCTGATGCCGGAGGCA\tE=BDBDEDEBFFDFFEECEEACACCBDBDDEDEEE=BD>DCBDDDEDAE:EEE?:A?AAADFFDA=>6>>D==:CEEACBFD=AEBBDB##\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:89T1\nfoo_67/1\t99\tchr8\t127325209\t60\t91M\t=\t127325583\t465\tCCTTGAATTAGTTCAATGAATGAGAGTTACCCAAAACCAGTATGCCAGCACCATTCTGATATCTAATCTCACAGACATCATGATATACATA\tGGGGGGEGGGGGGGGGGGGGGGGGFGEGGGGGGFGGGGGFFGDGGGGGGGGGGDBG?GGGGFGGGFGGGGGGGEGGGDGGGGGGGBGGGGD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_67/2\t147\tchr8\t127325583\t60\t91M\t=\t127325209\t-465\tCACTCCCTCCCATGCTGTGCTGGGATTATGCTTATTATATTATGCTGCTAATATGGATAAAGTCATACTTTCTTGGTGTTTATCATAATAA\tC/C5@EGBGGGEGG?BGGGGFFFFD?GGGGGFGGGGGGGDFGFGGGGGGGGGGBGGFGGGBGFGDGGGGFFGGGEFGGGGGGGFGGGGDGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_69/1\t83\tchr10\t88166322\t60\t91M\t=\t88165954\t-459\tGCTTTTTAGTACATTCAAAGGCTTGTGACTAGGAAAGTAAAAAGCTCAGTCTCACTGGAAAGTGATAACTTCATTTCAGTACATCCCCATT\tDDDEBEDDFEADEBE?FFDGGG=GGGEDEEAGGDGGFGGGFEDDBFFFFFE=FGGGFGEFGGDGDGDDGGGEGGGGGFGGGGGGGGFGGGB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_69/2\t163\tchr10\t88165954\t60\t91M\t=\t88166322\t459\tTTACTTAGAAGGGGTCGGCAGAGGGTGCTGAATATTGAGTCATATCTGTTGCCTACTCTGTTGCCCAGGCTGTTGCCCTCACCCTGGTGCC\tGGFGGGGGBFDEDEEEGGAGE:AEE3D=DDEE5EEDF5FEBBDADCEDEEDEEEEGEGEGEFD?F=5@@??AAB#################\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:82T3T4\nfoo_71/1\t83\tchr9\t91795542\t60\t91M\t=\t91795154\t-479\tTACAGAAGGGTGTGTGTATTAGTGTGTATGAGGAATGTGTATATGGGGTATGTGTGTCATTGTGTATACGTGAGTGTGTATGTGAGTGTGA\t#############################################A584)5B5BBDB.?;==>>=B5-=?=DDD5DDDDDDB5DDDDD:=?\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:4T44G41\nfoo_71/2\t163\tchr9\t91795154\t60\t91M\t=\t91795542\t479\tCAGATCCATCCTGCTCCACTCTCTGTTTCCCCTTCCCTGGAAACGAGTGGCACCTCTGGTCCTCACAAGGGCCACACTCAGCTCTAGCGGC\t3777,,3737;5;55=?>5=A==ACAA;?::CCCC:C::=>5-8;<@9;5AA>>>C:???DCAD=C:D?:4?###################\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:29T61\nfoo_73/1\t83\tchr9\t80693192\t60\t91M\t=\t80692839\t-444\tCAAATGTATTAAAAATGGTTAAAGTGTGCTCTTAAAACAGTTGAATTATATGGCGTGGAACAAAAACAGTATGCCTCATATAGCTACTTTT\tEFGGFGBDGGGGGGGFFFFFBGGFFFGGGFFFGGGGGGGGFGGGEGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_73/2\t163\tchr9\t80692839\t60\t91M\t=\t80693192\t444\tACAAAATGTGGTATATCCATACAATGGAATATAGTTGGCAATAAAAGGGATTGAAATATTAGTACATGATATGATATTGTTGAACCTGAAA\tGGGGGGGGGGGGFGGGGFGGGFGGGGGGGFGGGGGGDGGGGFGGGGGGGFGGGFDEGGGGGGGFGGGGFGEGGFEFEGGGGGGGGGGG?FE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_75/1\t99\tchr3\t42286306\t60\t91M\t=\t42286671\t456\tCACCCACTCTTCCCCATCATGGAATGAGACGCACCAGCTGGTCCCTGCCTTTATCTGGAGGACTGAGAGTGGAGGGGGCCATATCCAGATG\tGGGGGGGGGGGGGGGBGGGGFGGGGGFGDGFEGGGGGGGGGEEGGBFFDDGGGGGFGGDDF=EEE>?:C;AA=@AD###############\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:76T14\nfoo_75/2\t147\tchr3\t42286671\t60\t91M\t=\t42286306\t-456\tTTCATAGAATTTAAGCTTGTTACAAGCAAGCCCCACTTACCAGAAGGCAGGAGGCTATTTGCTGTGATCAGAAACGTGATGGTTAGAAGGC\tE@FAF?GG?FGFF?AFFEFFB?DGGDGGGGGFFGFGGGGGFFGGGGGGGGFGGFGGGGGGGGGGDGGGGGGGGGGGGGFGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_77/2\t145\tchr2\t88437391\t37\t91M\t*\t0\t0\tTTATTGAGTAAAATTAAAATCTAAATAAATACAAAGACATACCTCCTTCATAGATTTGAACACTCAAGTTCATAAAGATGTCAGTTTCCCC\t8CD?:CDEEGGGGEDGGGGD?GFCGEFFFDBAGFGGEEEECEEEDEGDGGFGGGGGFEFEFEGFFGGGGGGGGGGGGGGGFGGGGGFGEGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_78/1\t99\tchr6\t31022292\t60\t91M\t=\t31022683\t482\tAATTTTGCCAATTACTTAACCTCTTTGCGTCTCCTTTCCCCACCTGTAAAACAGTAAGATCACACTCCTCACTTCACAGGTTCATAATTAT\tGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGEGGGGEFFEGGEGEEGGEFEEGEGEEGGFEGEEEBEECECFFFFEAEEEF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_78/2\t147\tchr6\t31022683\t60\t91M\t=\t31022292\t-482\tTCCCCTCCCTTGTAGACCCTGCCTGTCTCATTGTAGAGCTCTAGAAATGTTGCCCATTGCTATTGTTGTGGGACTATGTACAGGTCACTTT\t#################EEEEEGGGAGGGGFGGGGGGGGGGGGGFFGGGGFDGFEGGGGFDGGGGGGGFGGGFGGGFGGGGGGGGGGGGFG\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:1G5A8A30C43\nfoo_80/1\t99\tchr3\t65874913\t60\t91M\t=\t65875296\t474\tAGCCAGCATAAACCCGGCGGGGACTTCTACCAACAGAGGGCCTCCTGTGAAAAGCCTGGAAAAAAAAGTCAACTCTCTGTTGCATAGTGAG\tFFFFFFFFFFFFFFFFFFFFFF0F=EBE=EACFFFDFFFFFDEFFFFEFEEE?E?FEFEAAEFEFCCCBBDFADBCBBC@FFBFEBEEB:E\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_80/2\t147\tchr3\t65875296\t60\t91M\t=\t65874913\t-474\tACAGGTGCAGTCCAAATCCAGCCCACAGGCTACCAGTTTGAGAGCTCTGGGAAGGCCACCATCCTTAAGAGGATTTGAGCCTAATGTTCGA\t#################BA?>DD:5BEE=BEEGGGGEE:BEEFFF=F?GGEF?FFFEAEEEEFFEFDGGGGCGGDGGG=EGGFGGGGFGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_82/1\t99\tchr3\t59620239\t60\t91M\t=\t59620620\t472\tTTCGTGTCTTTCGAGCCCTGATGAGGGCATGGCTCCCTTAGCCAAGTCAGTACATCCTCTTCCCAGAGGAGAGCAACACTTTCCTGTATGG\tBEDBEEEDEEEE:DB:B??=DDAD=?;?DDA5::A########################################################\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:66T1A22\nfoo_82/2\t147\tchr3\t59620620\t60\t91M\t=\t59620239\t-472\tCAGGGAAAGTGCTGGGCCTGTCATTCTGGCTGCCCGTCTTCTTCCCAGCCTTGCCTTGCTGGGAGACTTGACCTGTCCCCCTGCTCTCTGG\t#####################A:AA>977:+*4>=>-@6@@?+===5D?E=CA:5AD?:DB?5?<-5><9::55>=5995=@;@,BB?:A=\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:16A3G63A6\nfoo_84/1\t99\tchr16\t18158667\t36\t91M\t=\t18159047\t471\tGAGACCAGCCTGGCCAACATAGTGAAACCCCATCTCTACTGAAAATACAAAAAAAGTTAGCCAGGTGTGGTGGCAGGCACCCGGAAACCCA\tEAEEEEABEDAAC??:C?A=EECDE-;B=;A5A>A?BC5AEEC:BEEBBBC<7880=1=A>ACA###########################\tXT:A:R\tNM:i:3\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:81T1T2T4\tXA:Z:chr16,-16556437,91M,3;\nfoo_84/2\t147\tchr16\t18159047\t37\t91M\t=\t18158667\t-471\tGCCTCCCCCAGCTTTTATCTCCTCAACAGTGAAGTGGGTTACAAGATTATTTTGCAAGTTAGAAATAATGTTTGTGGAACACACAGGACAG\tA:82C,AAA:B=6?@;D5BFEC:CCC::AAA>==@>EEDEE@B@@8>DB?DC-==CBAA;A>D;EE:=CEACEEDE;;59>B?5:<:A?A5\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_86/1\t99\tchr12\t23830502\t60\t91M\t=\t23830869\t458\tCAAACTATTGTATGTAGCTATTTAAATTATAAAAAGTCCTTGTTTTAAACATACATTTTTCTTTTCAGGAATGATAATTACTGCTTAGATA\tGGGGGGGGGGFGGGGGGGGFGGGGFGGGFGGGGGGGEGGGGEGGFGGGDGGGFGFFGGGGFGGGGEBGGCGGGGGGGGG?GGGFGGGEGEF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_86/2\t147\tchr12\t23830869\t60\t91M\t=\t23830502\t-458\tACTATAACATCAAAGTATGACAAATCCACAAATGAGATACATTTGAAAGCCAAGTATTATAACATTTACTGTCTCTGAATTTTTATTAATA\tEDFDGGDGGDGGGGGFGGGGGGFGGFGGGGGGGGGGGGGGGGGGFGGGGFGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_88/1\t99\tchr2\t103054674\t29\t91M\t=\t103055053\t470\tAACCCCATCAAAAAGTGGGCGAAGGATATGAACAGACACTTCTCAAAAGAAGACATTTATGCAGCCAAAAAACACATGAAAAAATGCTCAC\tGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGDGE>EFFFGGGGFGEFGGGBE?E9EEAAGGGEGGGFGDCEFEGG\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:34\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_88/2\t147\tchr2\t103055053\t37\t91M\t=\t103054674\t-470\tGCACACGTATGTTTATTGCAGCATTATTCACAATAGCAAAGACTTGGAACCAACCCAAATGTCCGATAATGATAGACTGGATTAAGAAAAT\tEABECEDFD;FFGGGFFDGGGGBEDGGGFGGGGGGGGGFGGGGGEGGGGFFGGGGGGGGDGDGGGGDGGGGGGGGGGEGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_90/1\t83\tchr12\t116461306\t60\t91M\t=\t116460928\t-469\tGCTAGAGGAACCTTCCAGATTATCTTGTCCAACTACATCTTTTCAGAGGTGAGACACAGCAAGAATGCAATAGACCTGGATCTTGACTCCA\tCABEEEBCEBEFEBFFFFGEEF?FBEF=FBEDFABFFEGFFGEFGGGGGFGGGGGGGGGGGGGGGFGFGGFGGGGGGFGGGFGFGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_90/2\t163\tchr12\t116460928\t60\t91M\t=\t116461306\t469\tATTTTAATTTATTTAAATGTAAATAGCCACACATGGCTAGCAGATACCATGTTGAACAAAACAGATGTAGACTCTGGTTCCAACTGGCCAT\tGFGGGEGFGGGGGGEGFGGGGGFGGGGFFGGGDGGFGGGAFFFAFGGFEGGGE=?DEDED<:D?CEBCEEEDEDCCCAAB###########\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_92/1\t99\tchr19\t9744682\t60\t91M\t=\t9745053\t462\tAAAACCGACCATGCCCCTGCACTCTAGCCTGGGTGATAGAGCCAGACCTTGTCTAAAGAAAAGAAAAGAAAAAAAATCCATTAAATAATGC\tGGGGGGGGGGGGGFGGGGGGEGGGGGFGGGGBEDGGGGGFGGFGFGFGDGGFGGGEGGGEEEGCEC@DBDCBCDCGGGGGGFGFGGEGEGD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_92/2\t147\tchr19\t9745053\t60\t91M\t=\t9744682\t-462\tACTTTCTTAAGATGATTGCCTGTACCTATAGCTGAGTGTGTGTGTGTGTCTGTGTGTGTGTGTACTGGAAGCATTAGCTAATACAATTCAG\t:4BBC==7?:4A?=A=AA:5ACCB-EEEDFC=AFFGFFFGGGDGGEGFGEGGGGGGGGEGFGGGFGGGGGFGGGGGGGGGGGFGGGGGGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_94/1\t83\tchr14\t38814826\t60\t91M\t=\t38814461\t-456\tTTTGTTTGGTACAGGTTATGAAGTATAGATAGTACCTTGGAAGGAGTACATATTTGGATTAGAGTTATAAAAGATTTTGGAGAGGAAAATA\tFAGFGDGGGGGGGGGGGFGEGFGGGGGGGGGFFCEFFGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_94/2\t163\tchr14\t38814461\t60\t91M\t=\t38814826\t456\tAAGAAATATTATTGTACTAAAGGTAATTACGTGTTTGATCTGAGAGATGCTTTTGGAATTGCCTTTTAAAGTAGAGTGTCATAGGTCATAT\tFGGGGFGGGGGGGGFGGGGGFGGFGGGGGGGGGFGEGFDGGGGGGGEGGGGGGDGGGGGGGGGGGGGFBFGEGGEACEDEEFFFDDGDECE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_96/1\t99\tchr5\t29693887\t60\t91M\t=\t29694260\t464\tGGAAAAAAGTTAAAAATAGCATAAAAATAAAATTTAAAAAATTTGTGTTAAACATTAAAGATACAGAAACCTAATGGTTGGAAAATCAAAA\tD=:DDDAD?A;<<;;CA=A?DDDDDDCCCCA6>?D;;9;?=)B=?>C@@>6?>=@CC:?55;0990:?,1>?B2C@C6@@A5C=C:--?=?\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:41A17A5C20G4\nfoo_96/2\t147\tchr5\t29694260\t60\t91M\t=\t29693887\t-464\tTTTGTAAATTTTATGACATTTATGAAAACAGTGTTATTGAATGCAAATGTTTAGCATATGTTCATATTTTAAAGGGATAGAGACATGGAAA\tB5CEBC:?E?DEEBB@CCBC=ECBB:A?AA?:DA5DAECBC@CACCDCD?5ADC=?EBE=DB?BGAFFFFFD?C?D72/(52;C;C?@DDD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_98/1\t99\tchr16\t11003510\t60\t91M\t=\t11003870\t451\tTCTGTGCTTCAGTTTCTTCACCTGTAAAATGAGGATGACACAGTAATATCTCATGCAGTTGCTATAAAGTTCAAGTGAGATCATATAGTCA\tD=DEEB=BBDEEEBEAEE?EEEEEBEEEDEDBEEEEE?EEDEEDBEEE?=?DB?-B=@@>B?D?5@CCC>6>@>>C:C=E:BDADDE??:A\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_98/2\t147\tchr16\t11003870\t60\t91M\t=\t11003510\t-451\tTCTGTGTTCATGGGGTAAAATGGCAGTGTTGCCACATTGATGTATTGCATTGTGGTGAAGTCAGGACCTTCATTGCATCCATCACTGGAGT\t#?5??A/-4*.-;7=07?<:4>B?5>?C@-@A:>A?=C=EEAC5CCBCD:CEDEBF@@:?C:8;6:A5DC?@B@AD=5:C5C?A5CBEC?E\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_100/1\t99\tchr8\t109999747\t60\t91M\t=\t110000128\t472\tGTTTTCATCTCCTTCACTTTCTTATTTAAGGCAAAAATCAATTCTGGTGAACTCCACATATTTCTTAGGCTATATAGTCACTCTTTCTAAA\tC==@>ACAA5@CACAEEEEEDEEBBEEEEEEEEBEEEEEEBE5E?A<:>:AA???B?EE=B?B@B?B@5B??=??>>3@C>:@@=BE???B\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_100/2\t147\tchr8\t110000128\t60\t91M\t=\t109999747\t-472\tTAAATATTCTTTATGACACTTAATTTTGTCTATCTGGGATTTAAAGAGATGTAGTCAGTAAGCAGTTGCTGTTCACAGGTCTTTTCTAAAT\tFD5:?E==@>;=@CB::CB:DE?EA:C?@?=C5C:C:DBDBBCDDDB?@@.DD:ADCAABABD?DDC:CAAD:DDEDBDDBADCADEF?ED\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_102/1\t99\tchrX\t108127694\t60\t91M\t=\t108128067\t464\tTCATATTCATCATTCTAAAACACAACTCTCATTGTGTCACTTAAGATCTCAGTTCCTTCAGTGGCTTTCTTTTGCCTATGGGATGCAATTC\tFFDFFFBFFBFFFEFFDFFEFEFEA=DDDDFFFDFEAFFDEEEEEFFF=FBD@?BABC:5>;BB:FF?@FC:;;:AB?AAC:CBC??B?EB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_102/2\t147\tchrX\t108128067\t60\t91M\t=\t108127694\t-464\tCAACCATCACCCGAGAAGTATACATTGAACCCAATTTGTAGTCTTTTATCCCTCACGCCCTTCCCACCCTTGACCACTGAGACCCCAAAGT\t##@DBC@A=CC?5?-CC-CCA>@@;>>@9B=D?EEEE?EEBDD6DDC:4CAEEEEDD?D=C@3>DB=DE?EFFFFFDDDDB>;C@>==BB>\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_104/1\t83\tchr8\t53287150\t60\t91M\t=\t53286795\t-446\tGCCCATTAATGTGAAAAAAAAGTAATATAGGGTAAATCTTACCTGAGCTCCTGGATTAGTGAATCCATTGGCACTGTTGACAAAAGAAGAA\tGBDGDEEGFBFGGGGFGEGGGGEGGEEEEGGFEGGFDGDECGDFFFGGEGGEGGFGGGGGGGGBGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_104/2\t163\tchr8\t53286795\t60\t91M\t=\t53287150\t446\tAATGCAAGAAGTGGATAACTTAGGATCTGGGTTATCCATCAGTCAGGTTCATGGATAACAGCTCAGCTCTTCCCTGACCATCAGTGTGAAA\tGGGGGGGGGGGGGGGEGGGGGGFGGFGGEGGDGFGGGGGGCGGGGGGGEGGGGGGGGGGGGAGGGGFEGFG5GGBC-ACBCBCDC??5?CC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_106/1\t83\tchr12\t119228405\t57\t91M\t=\t119228013\t-483\tTAATCCCAGCACTGTGGGAGGCTGAGTTGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGACCAACATGGAGAAACCCTGTCTC\tCFD@FFC@CC@C,:<CECCECD?FFFE:EEE<B==*C@CBAFFFFE=FFEFEFBFFFFEFFFFFFFFBFFFFFFFFFFFFFCEEEEE5EEE\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:20\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:13T77\nfoo_106/2\t163\tchr12\t119228013\t57\t91M\t=\t119228405\t483\tATGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCTGGGCGCGGTGGCGGACGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGC\tFGEGGGGGGGGEGGGGGGGGGDG?FGGDGGFDF6E:CDDDF?EDEFFF?FGGGGGE4EA:DD:BDDB?DD?DA5:CDA5D5@CC@EFA###\tXT:A:U\tNM:i:0\tSM:i:20\tAM:i:20\tX0:i:1\tX1:i:2\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\tXA:Z:chr12,-32642124,91M,1;chr12,-25028971,91M,1;\nfoo_108/1\t81\tchr10\t133762969\t37\t91M\t*\t0\t0\tAGTATGAAGGTGCAGTCCCGGTCTGAAACTGACAGCGAAGCCCACGGGGGAACCCTGTGCAGCAGAGGAAGCACTCTGTCTACGGGGGTTT\t######C@CA:EB=FDE@?EB==:=@A:A5>BG=BEEFFEF<?<;(EEECEBEEE=FFFDEGEDGDFFFFFFADDFFGGGGGGGGFFFF:F\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:33C11T45\nfoo_109/1\t99\tchr8\t104387376\t60\t91M\t=\t104387743\t458\tGTCATGTGTTCATTCATATTTATTGAATATGTACCTGAATATGTAGCTTTCCCAGAATTACAGTTTTTCAAGTTCTAAGGTTTACATTAAA\tEEEEEEEEEEEDEEEEE?EEE4EEE9>B>CEEEEEEE5BEEEEEEEEEEEEEE5EDEEEEEEEBEEEEEDEEBEEEEEEEEEEEEEEEEBE\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:48C42\nfoo_109/2\t147\tchr8\t104387743\t60\t91M\t=\t104387376\t-458\tCCACTTTCCTATACAGTAGTCCATTGACATTCTCAAAGAATTTACTCAAGACTTTTGTAACCACTTAAACAGTATTTGCCCAGTGAAGTCT\t;E@BEECEDEEDBEEEGFEFGFDEFFFGBGGGFDGGGGGGFEEEEEEFGGGFGGGGGFGGGGGGGGGGGFGGFGGFGGGEGGCFEGFDDEB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_111/1\t83\tchr5\t78062243\t60\t91M\t=\t78061858\t-476\tTATACCTACTGCTGTTGCCATCTTGAAATTCTTAAGAATTTTGAACAATTGGCCCTATATTTTCAGTGCACTTGGCCCTGCAATTATGTAG\tEEEBDEEEEEEEEEDDEEDEDDEEEEEEEEEDEEEEEEEEEEEEEEEEEEEEEEEEDEEEE?EEEEBEEEEEEEEEEEDEEEEEEE?EEEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_111/2\t163\tchr5\t78061858\t60\t91M\t=\t78062243\t476\tCTTCCCCCCTTTCCCTCCCCTATCCACTACCCTCCTCGGCCTCTGGTAACCATCATTCTATTCTCTATCTCCATGAATTCAATTATTTTGG\tGGDGGGFGGFGGGGGFGGGGGFGGGGGDGGGGGGGGGFCFGBGBEFFFFFGGGGFGGFE?GGGGGGFGGGFFEGGFEEFFEEDFEGGFGEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_113/2\t145\tchr18\t58963478\t37\t91M\t*\t0\t0\tGGTTGTGTCTGCAACGTGCATGACCAAGAGTGGCTACACACCACGGACAGAGCATAAGCCCTCAAAGAAGAGACCCAGGCGGATACAGGGG\t####################################################################################?>=;6@6\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:23A11C43G11\nfoo_114/1\t83\tchr8\t109625334\t60\t91M\t=\t109624953\t-472\tCTCAGATGATCCTCCCACCTCAGCCTCCTGAGTAGCTGGGACCACAGGCAGGTACTACCACATCTGGCTAATTTTTTATTTGTAGAGATGT\t?479:4:;<'CA7CCA<EEAEEFGG=EGGGGBGEGGGFFG?GGGGGGGGDGFFFFFGGEGGFGGGGGFGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_114/2\t163\tchr8\t109624953\t60\t91M\t=\t109625334\t472\tCCAGTTTAGCTAAACTACCCTAGAAATTCCCTCATTGTCAGAGCTTGAGTTCTTGATGTTCCCAAGCCAGAGGAGAAAAATCCTTAGTACA\tGGGGGGGGGGEGGGGGGGGGGGGGGGGGFGGFGGGGGGGGEFGGGGGGGGGGGGFEGFGFGGEGDGGGGGGGGDF=FFFCEGGGGDGEGBD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_116/1\t83\tchr17\t46814499\t60\t91M\t=\t46814141\t-449\tACAGGTCATGTACTAACAGGTAGAATTGGAACCCACATCCACCTCTGGCCCAGTGATATCCTTGGTTACTTTAGTTATTCCCAAATACGAA\tC=EGEDCBBB:CEE?EBCEEAB?CEECBB?@EEECEEDGGCGGDGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGEGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_116/2\t163\tchr17\t46814141\t60\t91M\t=\t46814499\t449\tGTATTCTGCTTATTGCAACCACGGTTGCTGGCTGATTATTATAGTTACTTGACAGTGGGGATACCAGCAGGCTTTGGGACAAAGAAGGGGA\tGGGGGGGDGGGEGGGFGGGGFGGFEGFGEDGEAGBGGEGGFEGGGGGGGGAFFAD@BDDD<?C@CAC@5AD5@CDB:D:AAE?BC@7AA@#\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:29A61\nfoo_118/1\t83\tchr3\t68988262\t60\t91M\t=\t68987880\t-473\tCAACATTACTCTCCAAAGATTATATTCCCAAACGCTTTACCTATTCCTTCTTAGAAAAGACTTCCAGAGTCTTTCAGTCTTCATCTTGGCC\tDCBD?DG=FFGFFDFFCEDECFEFECCCDCA:EEEEGGEGGGGCFGE=GGGEFEGGDEEGGFDEFFGGGGGGGGGFGFGGGGFGFGGGGFD\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:47C43\nfoo_118/2\t163\tchr3\t68987880\t60\t91M\t=\t68988262\t473\tTGTGGATCCTTAATATTTGTACCAGTTCTGGACTACCCGTCCAGAGCTGGTACAACTACCCTCACCCAGACTCCGTGGTGTAAGGAAAAAA\tGGGGBGFGGGEGGFGEFBBFGGBGGGG=AFGFGFGGDFGEFBFFFBDDDD:AEEEDEEEEACD?DGGEAB=CCECACC5?(77=6>=A??#\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_120/1\t83\tchr15\t57813470\t60\t91M\t=\t57813098\t-463\tGGGTAGTTATAGTCTACAAAAGCCATATAGGCCATGCTAAAGATTTTGGATTGTATTGTAAGTATAATTTTTAGGTTGGGGGCTAGTGGTA\tEEDEE=FFGGDG??GGEFGDFGEFFEGGGGGEBGGGGGFDGFGFGGGGGDGGGGFGGGGFGGGGGGGGGGGGFGGGGGGDGGFGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_120/2\t163\tchr15\t57813098\t60\t91M\t=\t57813470\t463\tTTGCTCGCTTGCACTCTCTTGCCTTTCTGCCTTCCACCATGGAATGATGCAGCAGGAAGGCCCTCACCAAATGAAAGCCCCTCAACCTTGG\tGGGGGGGGGGDGFGGGDGEGGGGGGGFDGDGDGGGEGEDGFG?GGGFFDGAFEGGF=BDFDBF?ABEFC=GF?GG?FDFFFD<FA:C5A5?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_122/1\t99\tchr7\t54900450\t60\t91M\t=\t54900817\t458\tTTACACAGTTGTAATCTTAGTTTACATACTACTTTAAACTCTTCTTTCCAAGCTCAGACGATATTATTAGCAACTATATTGCTTGGTGATT\tGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGEGFGFGGFEGBGFDFDEGEEFFEEEEEEEGEGFCGAGBA:A+@D\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:38T52\nfoo_122/2\t147\tchr7\t54900817\t60\t91M\t=\t54900450\t-458\tTGGGATTACAGGTGTGAGCCACTGCGCCCTGCTGCTTGGTGATTTAATTTAGAATTATTGTGTGTGTTTGAATGTTACTTGCAGTGTGGCA\tEGFBGGGBGEGGGGEGDEGGFGGGEGBEGGGDGGFGGEGGGEGGGGGGGGGGFGGDFGGGGGGGGGGGGGGEEGGGGGGGGGGGFGFFGGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_124/1\t99\tchr3\t166272812\t60\t91M\t=\t166273202\t481\tGGTGGAAAATTAACATTTGAAGTATTTTAAAGAATCTTTAGGGTATTTAATGAAAAGTATTATTAAAGATACATAGCTATAAATAACAACA\tGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGFGGGGFGGGEGGGGCFGGFGGGFFFF?DEEEEGGGGGGEDGGEGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_124/2\t147\tchr3\t166273202\t60\t91M\t=\t166272812\t-481\tGCCTGTAAACTAGCAAAGTGACACTGGAATATTAATTTAGCCTTGAAAAACAGTCTAAGCTTCCATATAAATCCAAAACTTTGTGAATAAG\tEFEFEFGGDFFGGEEGFGFGFGEGDGFGGGGGDGGGFGGGGGGGGGGGGFGGGAGGGFGGGGGGGGGGGGGEGGGGGFGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_126/1\t99\tchr3\t166950102\t60\t91M\t=\t166950476\t465\tAATAATATATATACATACATATTGGTCTCTGCCCCCAGTCCTTGGCACAGAGCTCCTAAAACTAAAAATTTTCTGAATGATAGACGTGCTA\tGGGGGGGGGGGFGGGGGGGFGGGGGEGFGGGGGGEGEGEGEEDEEEEEEEEEE@EEEEEEEEEEGEEE=EEEEEEEGGEDEEEEGBEGEEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_126/2\t147\tchr3\t166950476\t60\t91M\t=\t166950102\t-465\tGAGAGTTTCTAGGTTGACAAAGACATCTGCATGACAGAAGAGTGACTCACCCCAATCACATGGGAACAGAAGCTCCTGTGTTTGGGACCCA\tFGGEEGGGF?GGGGGGGGFGGGDDGEFEGDGGGGGGGGGGGGDGGGGGDGFGEEGFGBAGFGGGGGFGGFGGGEEGFGGGGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:90T0\nfoo_128/1\t99\tchr13\t99220555\t60\t91M\t=\t99220942\t478\tCTGGTCTCGAACTCTTTTTTTGTTTGTTTGTTTTTTGAGACGGAGTCTCGCTCTGTCACCCAGGGTAGAGTACAGTGGCGCAATCTAGGCT\tDDD?BEEEEDFFFFFEFFCF<A@C@=6A:=?A8:=C>EE=CBD5D-A839?=;;;>C5?,CCC>C*?=BB8>.6>17?#############\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:86T4\nfoo_128/2\t147\tchr13\t99220942\t60\t91M\t=\t99220555\t-478\tAGCCACTGCGCCCGGCCGGGTTTTAACTTTTCTTAACTGGCAGTTATTTTTTTTGAGACAGAGTCTTGCTCTGTTGCCAGGCTGGAGTGCA\t######?B:>3:2<-;9;;+@E:EED;;@=@:??B5>;9;;?<<:@8>DDD??DDDA:A?C;CC6@A=::CCAED:EEDCDDD??BE:EEA\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:72A18\nfoo_130/1\t83\tchr2\t67583140\t60\t91M\t=\t67582783\t-448\tTTACTGGTAAAGCTGTGAGAAAACTTTCCAGACCTTTCTCTGTATTTGTGTGTTCTGTCAAATTGCAGCCACATTCAGCCTTCCCTTTCTC\t#####??-DC?5;??</<:A>;88>?->250>*,96/7839/>5AB:CA?>:A?AA;8;??=A?5CFFFF?GGGDGGGFGGGGGGDGGFGG\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:0G31A58\nfoo_130/2\t163\tchr2\t67582783\t60\t91M\t=\t67583140\t448\tAATATTTGGTGGATGGCAGAAAGTGTCCCCTGTAACTAGATGTAGATTTTACTTCAAAAGAATCCCACTGAAGCAACCCTTCTGGGGTTAG\tGFGGGFGGGGGGGGGFDGGGEGGEGFFGGGFFEFFGGGGEGGDGGFGFGGDGGGGGGDGGGGDGGG?GFGEGDFFF?EFBFGDEEDFD5A=\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_132/1\t99\tchr4\t100066554\t60\t91M\t=\t100066938\t475\tTAAAAGTACAAAATTAGCCGGGCGTGGTGGCACATGCCTGTAATCCCAGCTACTCGGAAGGCTGAGGCAGGAGAATCGATTGAACCCAGGA\tGGGGGGGGGGGGGGFGGGGGGGGGFGGEGGDFEFFGGGGGEGGGFGGFEEGEGCGGGAFEFACECEFEEGC=C5@39:86?;5?<>?:5?#\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_132/2\t147\tchr4\t100066938\t60\t91M\t=\t100066554\t-475\tAACAAGACTCACATCTTGTATAACTTTGTATCCTAAAGAACACTTGGTACAACACCCTGGATATATCAGGAGCTATTAATATTTACTAACT\tC=DEEDFFEEEGDCGGGFFGEFDGGGGGGGGGGGGGGGGDEFGGGGEEEEDFF?FFGGGFGDGDGGGGGGGGGGGEGGFGGGFGGEGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_134/1\t83\tchr11\t80126912\t36\t91M\t=\t80126556\t-447\tATGAGGGTAGATTTCTCATGAAGAGTTCAGCACCATGCCCTTGGTGCTGTCCTCGTGATAGTGAGGAAGTTCTTCTGAGATCTGGCTGTGT\tEFDFFFFDFCFBFEFFFBBFFC5CB:CEEBE@A=B=E?EEECCC5EFDEFBFDFFEEEE?EAFFFDFDFFDFEFFFDFFBFDFFFFFFFFF\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\tXA:Z:chr11,+37231477,91M,0;\nfoo_134/2\t163\tchr11\t80126556\t37\t91M\t=\t80126912\t447\tAACACAGATAGAAAAGGAGACAGTACCTATTAACTTGTCTTGGGGTTGTTAGTCTGAGTATTGTAAGAGATAAGGGCAGGTAAACGATGAG\tGAGDDGGGFGEEBAGDDEE=EDEEEEEFBFE5DEE=DBDDEEE=EEEEDA=C:CC=5B:9CDA:CEE5C5??CBDC?CAC###########\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:52A38\nfoo_136/1\t99\tchr5\t94647738\t60\t91M\t=\t94648127\t480\tTACTATTATTTATCTGAAATTTAAATTTAACTGGGTGTCATGTTTTTGTTTGCTAAATCTGGCAGCCCCATATGGAGAGGTTTTTGGTTTT\tGGGDFGGFGGGFGFGGDGEGGGGGGGFGFGGGGGDFGGGGGGGGGGFGEGGGGGGEGGGGGGGEGEEEECEECDE?D@BDACEEE5C8?@D\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_136/2\t147\tchr5\t94648127\t60\t91M\t=\t94647738\t-480\tTGAAATTGGGAGACATTAGAGGTCTTTTTTAATCGTGAGGGTAATGTGATGTGATTTACTAATACTAATGCCAGGTATTAATAAATAGCCT\tBE?FD=EFAFBGFFGFEFFFGFCGGGGCFCGGGGGGFGGFGGGGFGGGGGGFFGBGFBBFFDFFEFGGFEGDFEGGGGFGGGGGGGEFDDG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_138/1\t99\tchr3\t173697337\t60\t91M\t=\t173697711\t465\tCGGGAGTGGGACCCACTGAGCAAGACCACTGGGCTCCCTGGCTTCAGCCCCCTTTCCAGGGGAGTGACCAGTTCTGTCTCACTGGGGTTCC\t7<772727:8BB=A>@@B3@<>B@@BBB=BAA5BBA=>BA3A<@9;.45@BAA>>BB25@###############################\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:0T66A23\nfoo_138/2\t147\tchr3\t173697711\t60\t91M\t=\t173697337\t-465\tGCACCCACTGCCTAACCAGTCCCAGTGAGATAAACTGGGTACCTCAATTTATGCAGAAATAACCTGCCTTCTGCATTGGTCTTGCTGGTAG\t################CA>>A:5?A:>=;B4<?ABA=076,4:=>:@;6@::?CCC>AC-A:?=?>;5;*9=>BB5=CAAACA:5C?FFFA\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:1T89\nfoo_140/1\t83\tchr12\t17517928\t60\t91M\t=\t17517567\t-452\tGACCCTAAGTCTTTCCCCCATTCTTCCTTTCACTCTCTCAAAAAGGCCCTAGAGAGAGCTCCCACACTAGCACTCCCCGACTCATCCCATC\t#########################################AA:A->:<B7:5**9=+>5@AA>AA949A;,=);<@>>?>AD?:DDDDDD\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:14T76\nfoo_140/2\t163\tchr12\t17517567\t60\t91M\t=\t17517928\t452\tCATCTGCTTCTCACCTTATTCAATGTTTTGATGACCTTCTACTTTATAGCCCCTCCTATTAATCCTCCCTACAGGACACACTCGTGCGCCT\tDEBEEBFFDFEA:EDDABDDDE?EDCAEEAD5DDDFEE:E6=@@@=>@??C;;=-34<;)5=???AA6A+5*/(7.BB:;==B########\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:59G9A17T3\nfoo_142/1\t99\tchr9\t70554492\t60\t91M\t=\t70554875\t474\tTGCCCTGAGGATGTTCAGCGGCATTCCTATCCACTAGATGCCAGTAGTTCCTTCCCCCTAGTTACAACAACCACAAATGTCTCCAAACATT\tBDDDBDDADDACCCAD=DDDBBDDDDDD:DDBDDDA?-CAACC=CDD?DDDDDD=CDDD@D?B?D?CDBDBDBD=?@@B>??ACAB-?AAC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_142/2\t147\tchr9\t70554875\t60\t91M\t=\t70554492\t-474\tCCATGCTCCTTTGTCACACTGTGTTCCATAGTTTCTGTTATAGAACTAATGGCCATTGAAAACATTTTCCAAACAGTAACTTCTGTTGTAA\tC5=C?C:*2'>;B,46@>,@@:-DDD@A=DDDC5DDC?-AAAB:A5A:DFB:?CCCCDD;DDD?DDB=@;;EAEE=DEE=?FF5EFBGFGD\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:7A14T68\nfoo_144/1\t99\tchr7\t147690704\t60\t91M\t=\t147691082\t469\tTAACAGGGGGACTTACTACTCGTGTGCCAAATTCAATGCTGCTAATAGGAAGCTTGAAAGCAAACAGTCTACGCTTTGGCAAGGAGGATCC\tGGGGGDGGGGGGGGGGGGGGGGFGGGGGEGGGEGGGGGGGGGGGFFGGGGGDFEGGGDGGGGFGGGEFGGGEFF?FFDBBEDEFCBECBCE\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:71T19\nfoo_144/2\t147\tchr7\t147691082\t60\t91M\t=\t147690704\t-469\tCGGCACCCCCCCCCTCCTGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATCACAGGCATGTGCCACCACACCTGGCTA\t############??6@>>CC.B<>?7BC5BDADFFGFDFFDGDFGGBGGGGFGGDGGDGGGFFFEFEGFGBFEEFFECEEDFFFFFGGGGG\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:1T3A2T2A79\nfoo_146/1\t83\tchr17\t12277024\t60\t91M\t=\t12276657\t-458\tTAGATTTTCACGTGCAATTTTATAACTTTTTCATGCAAACTGTATGTTGGATTTGGCCAAGGGACTTCCAGTTTGTGATTTCTGGTTTCGA\tGEFF=DGEGEGGEGGEDBEEFFDECEEEEDEGEFEGGEEGGGGGGGGGEGGGFGGGFGFGGGGGGFEGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:43G47\nfoo_146/2\t163\tchr17\t12276657\t60\t91M\t=\t12277024\t458\tTTCCTTGCCAGTCTCCAGGAATACAATATAAAAGGGAATGAGAAAAAGGATAAGCAAAGGGCCGTATCAATTCTGTGCAACAGTGGAAAGT\tGGGGGGGGGGGDGGGGFGFEGGGGGGGGBDGGGFG@7BBCCCCCCFEGGGFG?GGGDEGBGGGG?GBGGGGGGGGD?GFBFEDCFGEFFEC\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:0C59A30\nfoo_148/1\t99\tchr7\t104293902\t35\t91M\t=\t104294284\t473\tATAATAATATTAACCTTAAATGTAAATGGGCTAAATGCTCCAATTAAAAGACACAGACTGACAAATTGGATAAAGAGTCAAGACCCATCAG\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGFGGGGGGGFGBGGGEGGGFFGGGFGGGEGGGGDGGDGGGFFFGGGGFBG\tXT:A:U\tNM:i:0\tSM:i:12\tAM:i:12\tX0:i:1\tX1:i:12\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_148/2\t147\tchr7\t104294284\t35\t91M\t=\t104293902\t-473\tACTTAGACTCCCACACAATAATAATGGGAAACTTTACCACCCCACTGTCAACATTAGACAGATCAATGAGACAGAAAGTTAACAAGGATAT\t=FEEFFBFEFFGDGDFFFFFDGGGGGGGGFGFFFFCEEAEEGGGGFGGGGGGFDGGGGGGGGGGGGDGGGGDGGGGFGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:23\tAM:i:12\tX0:i:1\tX1:i:1\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\tXA:Z:chr7,+57380374,91M,1;\nfoo_150/2\t129\tchr12\t29682556\t37\t91M\t*\t0\t0\tTGAGGGTGATCGGCTGGACTCTAGGGAAAGACTACCTTCCCACTCCATCTCCTTTCTGGATCCCCCTCCAACTCACTGAGAGCTACTTTTA\tFDAEDDEDEE?5CACDDA=?;3*9A@4@@;6;>>==?94<A:?A:?5?###########################################\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:52C12A22C2\nfoo_151/1\t83\tchr10\t99514420\t60\t91M\t=\t99514030\t-481\tTCGCCCTTCCCCGCCTTGCTGTCTGGAATGTGGTGGAGAAGTTGGAAGGAGGCATTGTCTATAACCACGAGAGCAAACGTCACCCCATGGC\t###?86C+CEEEDECDBDEDEED:FDFD@DDEEEGEEEEEEBBGDGGGGEGGGGGEGGGGGGGGFFFGGGGGGGGGGGGGGEGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_151/2\t163\tchr10\t99514030\t60\t91M\t=\t99514420\t481\tTTGTACTTTTCATGCATTTTCCCACTTAACCCTCAGCAACCCTGTGAAATAGACATTTTTACTCCATTTTACAGCTGGGCACTATTCCCGA\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGFGGEGGGGGGGGGGEGGGGGGGDGGGFGDGGGGEGGGGGFGGGFGGGGGGGEGGGGFEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_153/1\t99\tchr1\t214969578\t60\t91M\t=\t214969943\t456\tTTTTCTGCGTGGGGATGAGGAAAACAGACTTGGGAAAGTTAACTGATTCTGCTGACTCTCCAGAGAATTCAATTGTACATTGTTTACCAGA\tGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGEFEGFEGFGDGFGEGGEEEEFGGFBGBGGGDGDECDEEC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_153/2\t147\tchr1\t214969943\t60\t91M\t=\t214969578\t-456\tATTATGAGTCCAGCTTGGTTACTTTACAGGTAAAGGAAATCAGGCTCAGAATGGTTAAGAGCCTTAACCCAGGTTACATACCTAGGTAGTA\tGDGGGGGGGGGGGGBFGGGGFGGGGGGGGGGGGGGFGGGFGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_155/1\t99\tchr6\t49608233\t60\t91M\t=\t49608615\t473\tTTTTTTGTTGTTGTTGTTGTTGGTTTGTTTCTTTTTTAAGGCAGAATCTCGCTCTGTCGCCAGGCTGGAGTACAGTGGCGCGATCTTGGCT\tGGFGGGGGFGGDGGGGGGFFGEGFGFFEFEAFFEEEECFEGGGFGGFGE=EDFEDDDF?AEECEACCCCB?=B@@:CDACBB=@A,>8?;5\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_155/2\t147\tchr6\t49608615\t60\t91M\t=\t49608233\t-473\tCTTATGTTTACTGGAATTTATGGGTCTTCGATTTTTAGCAAGATTATTGTCATCATTTTTAATACTCTTTTGCAATGATACAAATTATTTC\tFE=EBFGBEDGGGDGAFFGGGGGGBFFGFGDG?GGGGGGGGGGFGGFFFFEGGGEGFGDFGGGCGGGFFGGGGGGGGGGGGGGGGFGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_157/1\t83\tchr13\t40631296\t60\t91M\t=\t40630926\t-461\tTCTGTTTAGGAACAAAAGGAAGGCCATTTTTGCATGAATCTGTTCCCAAGCTTTATGTTTACCTTTGGCATTGTGATTTGGGGGACTTAAG\tBGFEGGFEGGFGFGDGEDGFGGGDFEGEGGGGGGGGFGGGGGEEGGGGGGFFGGGGGEGGDFGGGGGGGEFFGGGGGGGGGGGGGGFGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_157/2\t163\tchr13\t40630926\t60\t91M\t=\t40631296\t461\tTACTCCCACTTGGTAAGAAAGTCAAGCTTTCAAGGACATAAAACAAAATGAGAAGGAAACCTCATCCAATTTTATTTCAGGGGCCCACAGC\tGGGGGGGGGFGGGGGGGGGGGGGDFGGGGGGGGGGGGGGFGGGGGGFFGGDGGEGFGGGGGDGGGGFEDGGGGGGC@CFFCFDFFGGGFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_159/1\t81\tchr10\t13256457\t37\t91M\t*\t0\t0\tTACTCATTCTGAAGGAGGTCTTGTCCTCCCTTTGATAATCTGCGCCAGTGCTCTGTATCTTGATGAATATTCAGTAAACATCATTGCATAG\tFBGEGEDFGFGFEFFEGCEDEFE?BFEE?GFGDFGGGGGGBGGGGGDGGFGGGGGFGGGGGGFGFGGFFGFGGGGGGGFGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_160/2\t129\tchr12\t117996522\t37\t91M\t*\t0\t0\tGGACAGAGCAACTGAGACTCTGAGAGGTTAAACGGGGCAGAAGCACCATCTTTGTGAGCCAACAAGGGTCACGGTGTTTAGTATAAAAGAT\tGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGFGGGFGGFGGGFGGGGGGGGGGGFGGGGGGGGGGFEGDEEECFEFBFGEGGGGFGBFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_161/1\t83\tchr4\t114789460\t60\t91M\t=\t114789093\t-458\tGGCATTTGCGACTTCTAGTCTTCCTTTTTTGTCCATGTTCCCCCATAATGGAAATGTTCCAATTCTTAAAAAAATAATGTTTTCAAATCAG\tBDGGGGGGGFFGGGGFEGGGBFGGGEFGGGGFGGGGGEDGGGGGGGGGGFGGGGFGGGGGGGGBGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:63C27\nfoo_161/2\t163\tchr4\t114789093\t60\t91M\t=\t114789460\t458\tGCAAGTCTGATGGGATATACACAAATGAAAGTAGTTGAGCCAACAAAAGATACATGTTTGGGAAAGGCACAAGGGACCGGTAAGAAGTGAG\tGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGFGGGDGDDFGFGGFGFEGGGGGEEGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_163/1\t99\tchr5\t50568265\t60\t91M\t=\t50568652\t478\tATTATTTAGTTGTTGTATCCCCCAACAACGTTTTTAATATAAGGAAGTTATGTCTTCTAATGATTATTTTGATGCTACCCTAATGCAGGAG\tGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGEGGGGGDGEGGFGFEDGDGEGGEFGGBE?DF?ADEBEEFC05=<=C?5BB@?B@@BBCE?A\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_163/2\t147\tchr5\t50568652\t60\t91M\t=\t50568265\t-478\tAATTTCAGAATTCCTCCATTCTCTTCAGGATCAGAACCTTGCCCTTCACTAGGGTCTTTGAACCTTACCAGGTCCTACAATACTCCCCCAT\tF?CCB:BFFFFBFFAFGGEEGEGFGGGGGFGGGGGGGGGFEGGGGFGGGEGGFGEGGGGGFGGGGGFGGFGGDGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_165/1\t99\tchr12\t72989448\t60\t91M\t=\t72989815\t458\tCCAAGGTCTTGCTGGGCAAATCCTACACTTGGCAGCAACTCTCTCCTGACCTATAGGGCTTAGAGCTGTCATTGTATTCTGTTACCTCCAT\tGGGGGGEGGGGGGGGGGGGFGGGGGGGGGGGGGGGFGEGFGGFDGGGAEGGGBGBEFGGFFDGCFBEFCEDDEFCGGGGGGECBDGGFGEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_165/2\t147\tchr12\t72989815\t60\t91M\t=\t72989448\t-458\tATCACATAATAAATGATTTGCTTTATCCTAGAGTTTATGGTTCTATCATGCTTTTTTTTGTAAGTATCAATACCAGTGTTAGGAGCTAAGG\tEFGDGFGGGGGGGGGGGFGGFGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFEGGGGGGEGGGGGGGGGGEGGGGGGFFFGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_167/1\t83\tchr6\t11798197\t60\t91M\t=\t11797819\t-469\tACATCACACAGCTCCTGTGCTTTGTTGATATTGAGTTCCAGAACTCAAAAACAGCCTTCAGTAGCCGGCATAGGCTCATTAGTCATCATTT\t7>=9>;?;A?5:?B1>BB@,DFADFEB?CBE@=@?C:A?:AC>A?3@CCC?ECCE?FEEEEGGGGGGGFGGFGGGFGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_167/2\t163\tchr6\t11797819\t60\t91M\t=\t11798197\t469\tACAAGCTCATGTCCTTTGCAGGGACATGGAAGGAGCTGGAAGCCATTATCCTCAGCAAACTAACTCAGGAACAGAAGATCAAATACCACAT\tGGGFGGGGGGGFGEGGGGGGGGGGGGGFGGGGGFGGGGGFGGEGEGGGGGGGGGFDGGDGFGGGEGGGGEFGDFGGB?BGEEE@EDECECB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_169/2\t129\tchr2\t234871404\t37\t91M\t*\t0\t0\tAGCTCACTTTTGGAGGAATTGAGATTAAGGGGTCAAAAGATTTCTTTTGCCTTAAAAAATATATATACTGCACTCCTAAACTGTTTGAAGT\tGGGGGGGGGGGGGGGGGGGGGGGGEGGFGFBF=DFFFFFF?FFFFGGGGGGGGGGGGEGFFGGGGFGGGBGGGGGGFG5GFDGEGGGGDGB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_170/1\t83\tchr11\t80258983\t60\t91M\t=\t80258618\t-456\tCTCTGCCTCCCAAAATGCTGGGATTACAGTGTAAGCCACCACACCCAGCCTGGGATTTATTAACTTTCAAATTAATCCTCATTATTACACA\t###################################B>::@ABB:C?BB?BBBACDBEE=EDEBEEEACCCCAAC?<DADADDDDDD@C>@C\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_170/2\t163\tchr11\t80258618\t60\t91M\t=\t80258983\t456\tTACAAGGGATGCTGCTCACCTTCTTACTTTAAAAATAAAAATCCTCACTGCAACTTTTAGTTCTAAGTCCCACGGCCCAGTCCTCACTCTA\t@@/4@/*4@@>>>.:6A.9346633D:DDCC=DA470,,/&.A:A4,-<5>>>>CEEB=EFFFF::.7<4803;:?C=5C>??<?*?:?##\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_172/1\t83\tchr12\t13276458\t60\t91M\t=\t13276066\t-483\tGCGTGAAAACAGGCTAATACACATGGGGACTCCTAGGCAGCCCAGTGGAGAGCTCTGGTAACAGAGGCCTCCTGCTTCATTCATCTTGCCA\tFCC:DEBC=EBEEBFE@??B?C:BB?BB?EE5BDDEEFEDGCACC:EGEAEFFGGEFEFDF=GGGFGFGEGGFGGGGDGCGGGDGGGFFGD\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:51A39\nfoo_172/2\t163\tchr12\t13276066\t60\t91M\t=\t13276458\t483\tAACACACAGCAGCCTTGCACCCTCATTGATCTGGGGAAAGCCAGCTGCCATGCCATGGTGATATGATTTGTCTGTGTCCTCGCCCAAATCT\tFGBGGGGGGEGDEGGGG:GAFFFFFGDFE?GFGGGGFEGAFFFFFFEGAGEGDGGCDD5DE?FCEEEGD::DBCA<:AAAFEEFDEBGEE?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_174/1\t99\tchr16\t49523412\t60\t91M\t=\t49523774\t453\tTACAATGGGAGAATAGTACCCATGAGGCAGAGTTAGATGATTGGAGGTAATGATGTTTGCAAAGCATCTATCACAGTGCCTGGCACTAGCA\tGFGGGGGGGGGGGGGGFGGGGGGGAGGGEGFGGGGGFGBGGGGFGGGEGGEFEFGEGFGEEGFGEGGGGGEEEEBEBCEEF=CFACEEFEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_174/2\t147\tchr16\t49523774\t60\t91M\t=\t49523412\t-453\tCTTGAGAACTTAGCAACTGGTAGGAGAGACAGACCACAGACAGGTGAGGAATCAATTTAATAGTTATATGTAAGGTGCAAATTTTGTAACA\tFFGFGGGDFFEGGFGEGCGFGFGGAGGGGGGGEGGGGGFFGGGGGGGGGGGBEGGGGGGGDGGGGGGGGGGGGGGGGGFGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_176/1\t99\tchr11\t73437939\t60\t91M\t=\t73438307\t459\tTATAAGCCGGAGAGTGCCAAACATGAGTAAAGGATGTTCTTATAGGTCAGTACAATTCGTATTGGTTAGGGATGTGAATCTCCATACATAC\tGGGGGGGGGGGGGGCGFGFGGGGGGFGDGGGGFEGGEGGGGGGFGGEGGGBGGEGGGFFEFEGEF=BC@E?5=>:41<35;:A>>5?>>:?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_176/2\t147\tchr11\t73438307\t60\t91M\t=\t73437939\t-459\tAGAGAAGCTGATTCAGGGGATGGAGGAATTTAGTGATCCCAAAGCCATCAGAGAAGACAGATAATGAAGTTGAGAGGGACTGGCAACCAAA\tEGEEFEDEBEDGFFFGFGGDFFGGFFGGGGGGGGGFGFGGGFFGFGGGEGGGEBGGGDGGGGGFFGGGGFGGGEGGGGGGGGDGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_178/1\t99\tchr12\t53797761\t60\t91M\t=\t53798144\t474\tCTTTTATCACAGTAAAGATCATTTTTATACTATTCCAAATGTGGTTCTCTATTTAAAAAATAAATTTTATTCATTTATTACCATTATTTCC\t;;CCB7+67@9=99;262*7CACACC;>@C=C6A?BCC6=AC>A8@@?1==>>=>4-AACC>A0@C?CCA)=ACB;+;;A5+>>=CC@;CC\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:76C14\nfoo_178/2\t147\tchr12\t53798144\t60\t91M\t=\t53797761\t-474\tATGTGAACTCCATGCATTTTCAAACTTTTTTTTTCATTTGATGTAAATTTGAGTTCTGATAGAGAGTATCATGTAGACCAAGTTTTGGGTA\tD=?B=5A-BA?>B:?4:>:?5AAA8=B<BDE:5CA=D?CDAD=?DCDD-D?:EDDE?:DDD:?CC5;=;@6=>>5?776<6::BB8-;59<\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_180/1\t99\tchr13\t73842639\t60\t91M\t=\t73843030\t482\tTCTCGATTTATATCTGTAATTGTATCTCTATATTGTATCTCTATATATCTCCTATTGGTTCTATCTTTCGGGAGAACCCTAATACATAGAC\tGFDGGGGGGGGEEGGGDEGFDBDEBEDEEADFFFFFFGEGFGAGGGFEDDGGGGEGDDEEEFGEEEGGEBBBBBABCBCCEBGGFFEBB=E\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_180/2\t147\tchr13\t73843030\t60\t91M\t=\t73842639\t-482\tAATAATGCAGAGATAAGTGAAATCTCCCTTACCCAAATTTTTGTTTGATTTAGGCCTCCAGTGGATTGGCTGAAGCCTGCCATCCAATTTT\tFBCB?AB:CCBACBEBACADB?8AACAAAA-EE:EEAECEFE?EEEDEDECADDD?EEBEEFFBFFEEDBEGFD?GGFGGGEEEEAEEEEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_182/1\t83\tchr5\t60820050\t60\t91M\t=\t60819675\t-466\tGGCTGTAAATCCATCTGATCCTGGGCTTTTCTTTGCTGGGAGATTCTTTATTACCGATTCAGTGTTGCCACTCATTACTGGTCTGCTCAGG\tFFGEGEGFEEGFA:FFDCCFEEFEEEE=DEGEEAFFEGGEEEEFEGGGFEFFEGGGGGGGGGGFGGGGGEGFGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_182/2\t163\tchr5\t60819675\t60\t91M\t=\t60820050\t466\tCTGAATTTTACAAATGGTTTTTCTGTCTATTGAGATAATCATATGGTTTTTGTTCTTTATTCTGGTATCACATTTCTCAATTTGCATGTTA\tGGGGGGGGGGGFGGGGGEGGGGGGGGGGFGGGDGGFGGGGGGGGGGGGGGFGGGGGGGEGEGGGGGGGGGGGGGGGFGGGGGGGGGEGFFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_184/1\t99\tchr17\t1496449\t60\t91M\t=\t1496841\t483\tTTGAGGCCACACATCCTTCCACAGCCAGACCCCTAAGTCAGGCCTCTGGGGCGGCTGAGGCCCCCAGACAAAGGAGTTCGTGGAGGGCAGA\tDEE?EDDFFFFFFAFFFAFEBDE:?EDEEABEEEEFFFFFEFFFBFFF=FAAC:1<><::*=7B79;8>=3?4=)A###############\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:74T16\nfoo_184/2\t147\tchr17\t1496841\t60\t91M\t=\t1496449\t-483\tAGCACAGCAGGGTGATGGGCAGAGAAGCGGGAAAGGGGATCCTGAGAAGGACCAGCCAGGGCCCTTCCTCCCTCCCCACACTCTTGCCTTT\t###A>=?BA=D=?ABBDB:?D=D:D:A=AEEED=CCD?ACE>@C=@:CC=?DCCB?C?D?DACBA;>;:::B?ADEACECE4BC@=>AA?A\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:15C75\nfoo_186/1\t99\tchr15\t42674703\t60\t91M\t=\t42675069\t457\tAAGTAATCTCTTAAGCTCTGGAAAGAAGTGAAGCAACTATCACTACCTTAAAGAAAAGCTGGAAACCTCTGATGAGAGTTTTGCTCTTTTG\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGDFGGGGGGDGGFGGGGFGGGGGGGGGGGGGGGGGGFGGGGGGGGGEGGGGGGGFGGGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_186/2\t147\tchr15\t42675069\t60\t91M\t=\t42674703\t-457\tGAAAAGCCTTCAAACATGAAACAAACTAATGAGCAACCAAGGGACTGTTCTAAACTGTCCTGCAGAGTGTTGCAGGTCACAAGACCAAGAA\tEGGEDAFFFEBGGFGDGEGEEGGGEGGGGFFGGGGEGEGGGGGGGGGGGGGGGEGGGGGGGFGGGGGGGGGGGGGGGGGFGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_188/1\t99\tchr4\t171770955\t60\t91M\t=\t171771342\t478\tTACTAATTTCTGAATTTTGGCTACTGTTAGAGTTATTTTATGTTATATATATAATCTGATTCTGTTGATTTTATCTAGTCTCATTACTTTA\tGGGGGGGGGGGGGGGGGGGGGGGGEDEE?BBD@C@FFFE?EEFFDFFGEGFGDFFEF?EEGEFF?C?CCDEEAECD5C@CE?CCEB?EEE@\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_188/2\t147\tchr4\t171771342\t60\t91M\t=\t171770955\t-478\tTGGATTGCAAACCTGCTGCTAACCCCTCTCTCCCAAACTAATGTTTGTATGTTTATATTGATGTCGGTATATTGATGTTAATTCCTCCCAC\tFBBBCB=@C@:FFFFGGGGFGDFEFFFGEGEGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_190/1\t83\tchr7\t47698451\t60\t91M\t=\t47698082\t-460\tTTCAAAGAACTAATGAATTAAAATAACTTTCTGTTTCTCATCTTAACTGTTCTTTCCCTGACTGTAGAATTATTATTTGGAAAGTGGAAAG\tG:AEECBEB=DDFFEFBE>>BCBCBA=BEEBFDDFFEABAEDFFDFCBDB=FFFBDFGGGGFFFFEEFFDEGBDGGGEGFGGGFFGFGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_190/2\t163\tchr7\t47698082\t60\t91M\t=\t47698451\t460\tTTTTCCAAGAGCAACACATAATTTTTAGTCATGGTTTGCTCATATTAAACATATGGTTACTGTATTACTAAAAAATATCAAAGTAATTTAT\tGGGFEEGFFEEEDEEEEEDEDEEDEEC?BEEFEEEGGDGFCCDAEDBDDD=ECEDDEDB=:5:A5==.:4ACAAA################\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_192/1\t99\tchr2\t147405447\t60\t91M\t=\t147405814\t458\tGACAGAGCAAAACCCCGTCCCAAAAAATAAAAAAAAAGATTCACCAGGTGAATTAAAGCCTGAATAAGAATATATAAATATAGTTATAAAA\tGGGGGGGGFGFGGGGGGGGGGFGFGGGFGGGGGGGGGFEEGFFGGFDFCFBGGGEGG=BGFFBGEGGGGGEGGGEGGGFGFFGEGGFDGDG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_192/2\t147\tchr2\t147405814\t60\t91M\t=\t147405447\t-458\tGAAATATACGACTGGCAGAAGGGGTGAAAAATCAAACTATAATTCAACAATTGAAGACCATGTATATTAATATTAAATATTACAAAAGCTT\tFGGGGGGDGGFGGGGGGGGGGGGDEGFFFFAAGFGGGGGGGFGFEGGGGGGGGFFGGGFGGGGGGGGGGGGGGFGGGGGDGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_194/1\t99\tchr8\t68921390\t60\t91M\t=\t68921766\t467\tAGGGAAGAGGCAGAAGCAAGATCAAGCCAAGTCAGGAGTGGAGCTGCAGTGCTGGCCCCAGTGCCTTAGCTCTTGACATAAAATAACACGG\tGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGEGGFGGGGGGFEGGGGDEGGGGDDGEGBGEGEEGBGBBGDGGGGGGEECEFFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_194/2\t147\tchr8\t68921766\t60\t91M\t=\t68921390\t-467\tGTCCTGCAGGAACTGCCTCTAGAATTTTTGGTTACAAGCCACATTCTTAGCACCCTTTTGCTCTTCTGTGCTTACCCCACAAAGGTCACTG\t###################################BGGGGGGGGGGFFFFFEEGGGFGGGGFEGGGGGGGGFFFGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:2A12A8A66\nfoo_196/1\t65\tchr1\t94930504\t37\t91M\t*\t0\t0\tTATAATAGTGATTCGTCTTAACCAGGGAGACAGTGTCATGTAAGAGTTCCTGTGTTCAAATCCTTGTTCTGCAACTTACTTGCCATGGAAT\tGGGGGFGGGGGGGGGGGGGGGGGGGFGAGFFFFEFFGGFGGGGGGGCGGGFFDFDGGGGEDGGGGGEFEGEEGEEGGFGGGGDGDFBFDFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_197/1\t83\tchr8\t141740825\t60\t91M\t=\t141740453\t-463\tATCAACACAGATTTGGGAGGCATCAAATACTTGAATCTATGAGAATGAGAAGAACTACCAGAGTGAAAAAGTATTTTTTAATAAAACAAGT\tFDGGDFEFGGFFGGGGGBGGGGFGGEGGFGBGGGFGGEDFGGGGGEGGFGGGGFGGEGGGGFGGGGGGGGGGGGFGFGGGGGGGGGGEGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_197/2\t163\tchr8\t141740453\t60\t91M\t=\t141740825\t463\tAGGACTTGGCAAGAGGGATAAGAGATTTTTAAGAAGGAGAATGACAAAATCTAGAGAGATATTCTAGAAAGATGGCTCTTGGCGGCAATGT\tGGGGGGGGGGGGGFGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGGGGDGFGGGFGGGGDGGFGGGGFGGGGGGGGGEGFGGGGGB@=A@@1\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_199/1\t83\tchr15\t42000147\t60\t91M\t=\t41999776\t-462\tGAGTTTGATTATCCAGAGGTAGCTTAACATTGCCAGATGTAAGCTCCATGAAGGCCCAGAACTGTCAGTCTCGTTTACCACTGTATCACCA\tGGDEFGBFGFGADEEDEEGBEFDGFGEGFGFEGFEGGGGGGFGGEGGGGGFGGGGGGFGGGGGGFGGGGFFGGGGGGGGGGGGGGGFEFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_199/2\t163\tchr15\t41999776\t60\t91M\t=\t42000147\t462\tGAAGTGGAAATTTCCTAAGTCAGGGATGGCTGGTATATATTCCATCTTTCTTTGAGTAACATAAATTGCCAACCCTTGTGTTAGAAAACAC\tGGGGEGGGGGGGGGGGGGGGGGGGGFGGGFDEE?CBABBCGAGFGEFFFADDC>BFDEFEGGFFEEFEDFDEEGGGFFEECFBEBEFECAB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_201/1\t99\tchr2\t55571418\t60\t91M\t=\t55571787\t460\tTGGCAGAATTCCAAGGGAAGGTTCATGAGCAGCTGTTGCCAGGGTCCCCAGAGCTTCCTTAGCTTCCGCTCTTCATGGTTGGTTGTTCAGT\tFEADFFFFFFFFFFEEFFFFF:FFFDE?EAFFFFDFDFFDE5EEAAACCC:DFEEBAAFEB@B?C-A><?@7@<<:??<=DE?C57@####\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_201/2\t147\tchr2\t55571787\t60\t91M\t=\t55571418\t-460\tTACAAAAAGAGCATTTATTTGGTCTTGAAACGCATTTGTGTGAATTGGGGTTGGGGATGGGGGCAGAGCAAAAACAAGATACAGCTTTCTA\t<>9@CDEB=E:C:<C;A5DEA?EDA=EE::DGGDGFDDDD:G?EGFDDADAF=FFFFFEFAEEBD=EE@BBDGGGGEEE=BAAAAA5=BB>\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_203/2\t129\tchr14\t71031940\t37\t91M\t*\t0\t0\tTGATGCTCCCTCATTTATTCCTGATTTTGGTAATTTTGGGGGGGATAATTTTCTTTTTTTCTTGGTCAATCTAAATAAAGCTTTGTCTATT\tGGGGGGGGGGGGFGGGFGGGGGGFGFFFFFEGGGGGGGGFGGGGDGGGGGGGGGGGGGGGGGGEFGGGGGGGGGGGGFEGBGGDGGFGEFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_204/1\t83\tchr3\t43677026\t60\t91M\t=\t43676644\t-473\tGAAGACAAGATTAGAGAAAAGAGAGTGAAAAGAAACGAACAAAGCTTCCAAGACATATGGGACTATGTGAAAAGACCAAATCTATGTTTGA\t######?A:AA*>996?AA:A@5???08<AA4;:8=?66<?A@AAA=ECEEGGEDFEBEEEEGGGFGFGFGBGGGGGGGFGGFGFGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_204/2\t163\tchr3\t43676644\t60\t91M\t=\t43677026\t473\tAGCAGAAAGGCTGAAAATTCCCAAAACCAGAATGACTCTTCTGTTCCAAAGGAACACAACTCCTCGCCAGCAAGAGAACAAAACTGGATGG\tGFGGGGGGGEGGGGGFGGGGGGGGGGEFGGFBEFFGGGEGDFFFEGDGGDGBFGGGFGGDGGGGGGFGFGF?FF:GEGGDFFCCEGGEEGD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_206/1\t83\tchr10\t85262901\t29\t91M\t=\t85262494\t-498\tGGGGAGGCCAAGGCGGGCAGATCACTTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAACATGGTGAAACCCTGTCTCTACTAAAAATACAA\t###########C?=CC=AAAACBABCDFDFDDEEGEBBCDCDBB5CECBEE?GGFGGFFFGGDGFFFFGGGEDDEDBG?GGCEEE?CCCCC\tXT:A:R\tNM:i:1\tSM:i:0\tAM:i:0\tX0:i:3\tX1:i:12\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:0T90\nfoo_206/2\t163\tchr10\t85262494\t37\t91M\t=\t85262901\t498\tTGGTGTAAAAATTTTGTTGAAAGTGTCAAAAAGGCTCATAGATACCCCTGTAGCTACCAGTAATGAAGAAAAAGAAAGAATTTTTGTTTAT\tBDDADEEDBEFGFGG?EAEADED??DDDCDFFFFBFBFBFDEDEEFGGGGG:FGGDGFEGDAAEEEE5EEECC8A80*B4AAAAA66;8>E\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_208/2\t129\tchr6\t125609829\t37\t91M\t*\t0\t0\tATTCCATTCTGGTCACAAAAAGAATACCACATTAATAAAGTTATTTCTGTCCTAGGGAAGACCAAGGCACATATTTGAAAATATTCACTAG\t5-5<;,76<;6,:6,66762211.(;;<-<:=?.>5<;9:09,7:227:6>==:5-=9<;DD:ADEEB5?=5=?=EEEBEDFEFAD5A=DA\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:11C79\nfoo_209/1\t99\tchrM\t4732\t46\t91M\t=\t5123\t482\tAATACTACCAATCAATACTCATCATTAATAATCATAATGGCTATAGCAATAAAACTAGGAATAGCCCCCTTTCACTTCTGAGTCCCAGAGG\tGGGGGGGGGGGGGGFGGGFGGAGGGFGGGDGFGGGGFGGGGBGFFDEEFEFF?ACEFAFDEDADFBEEFBDFECECEBB@CB=5BEBCCAE\tXT:A:U\tNM:i:0\tSM:i:23\tAM:i:23\tX0:i:1\tX1:i:1\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\tXA:Z:chrM,+555144,91M,1;\nfoo_209/2\t147\tchrM\t5123\t46\t91M\t=\t4732\t-482\tTACTCAACTTAAACTCCAGCACCACGACCCTACTACTATCTCGCACCTGAAACAAGCTAACATGACTAACACCCTTAATTCCATCCACCCT\t@5><:6ACBADA5A:CC=CC:CD@DD?CCACBEA:EEE5EEEBEBEFFFFFGFGFGGGGGEEEEEDEEEE5EEEEEGGG?FEEEEE?DDDD\tXT:A:U\tNM:i:0\tSM:i:23\tAM:i:23\tX0:i:1\tX1:i:1\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\tXA:Z:chrM,-555535,91M,1;\nfoo_211/1\t83\tchr5\t74871224\t60\t91M\t=\t74870845\t-470\tAGACAGGCGTCTTACTCTCTTGCCCAGGCTGGTCTCAAACCCCTGGGCTCAAGCAATCCTCCTGCCTTGGCTCCCCAAAGGGCTGGGATTA\tCE?EDF?CEBEC@?B=DBEDCBFED@EEEEEDADDEGEEGGGFEGGEGEFGEEEEECEE@EGGDGGGGGGGDGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_211/2\t163\tchr5\t74870845\t60\t91M\t=\t74871224\t470\tTTTATCATGTTTTGGCAAGTCCCGAAGCAGTATTTTTGGGATGTTTAATTTTTTTGTTTCTGGTATTTAGGCATGTATGATATGACTAGGA\tGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGFGGDGGGGGGGGGGGGGGGGGGGGGFGGGGGFFGGGGGFCGFEGGGFDGFGGG:\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_213/1\t99\tchr9\t20628481\t60\t91M\t=\t20628845\t455\tAATTAGAGACCGCCCTGGGAAACACAGAGAAACCCCGTCTCTACAAGGAATTTTTTTTCTTTTAATTAGCTGGGCATGGTGGTGTGCACGT\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGFGFG:GGGEGEGGGGEFFGGGGEEE=GGDGGDGGCFDFFFFF?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_213/2\t147\tchr9\t20628845\t60\t91M\t=\t20628481\t-455\tATCCAGTAAGGATCAGGGAGCATGCATTTTCCAAACTGTCTCCAACAATAGCAAATAATGGTCAATGGCTGGATATTCTAAACCAATGTAA\t5:A=?@?C:CC:DDBGEGGGGGEGGDGGGBGGGGDGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_215/1\t83\tchr5\t9438690\t60\t91M\t=\t9438325\t-456\tGGACAAAGGAGTCAGGTGAGTGAGCTGACCTAGTGAGGTGGCCTCAGAGATGTCTCAGGAAGTCCTGTCCCATGCATGCCACAGCAGGACA\tCE?AEEEDEBCBBFFFBACB=BBBCB@?D@=AEBC?EDDGEGGGCGFGGGGGGGGDGGGGFDGGEGGFGGGGGGGGGEGGGGGGGGGFGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_215/2\t163\tchr5\t9438325\t60\t91M\t=\t9438690\t456\tGTGTTTGTGCAATAAATAACATTTCTCATTCACTCCCACGCTACAACTGAAACTGAATAGGCTTTGGGGACAGAAAATAACTGCTCCTTTT\tFGFGGGEGGGFAEGDGGGFDGGDGGFGGGGGEGGGEF=FDDEDEEFEFFFEED:EEDEE5EEEEBF?F=EGGFDAFFEF>:DDDBEC=GDF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_217/2\t145\tchr8\t28899072\t37\t91M\t*\t0\t0\tAGATCACTGCCTTTTTAACTTATCTTTAAAATGTAGGATAAAACCACTGCATATTCTATTTAATTATAGCAATTTCTTTACAAAGGTAAAG\tEGGDGFGGGGGG=FGGGGGGGGGGBGGGGGGGGGGGGGGGGGGGFGGFGEGGGGGGGGGGGGGGFGGGGGGGGGGGDGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_218/1\t99\tchr1\t206890991\t60\t91M\t=\t206891385\t485\tCCTTAGGAAAATAGAGAGGGCTTTCTCTTATAAATATATTTTTTAAACAAGTAACATCCAGAAAAAGCAAACCCATCCTTCGCTTGCTATG\tGGGGGGGGGGGGFGEFFFGFGGFEGDDBEDGFG:DGGFGGGFGFEC:EEEGEEBGEF@F:CABAA3?>C5A@B@@AEEEEGGFEFFBGEFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_218/2\t147\tchr1\t206891385\t60\t91M\t=\t206890991\t-485\tAATTCTACTGTCAGATGACCCTATTCATTAATTCATTTTCTCCCTGAAGACTATTGAATATTTAAGCCAGGAACCTGAGATGTACCCAGCG\tD?DB:D?FFF?FDEFFC?C55BDGGGGGEGGE?AEDGGAGGEEEEEEFDEAFFFFFF=EBGGGGGGGGGGGDGDGGFFFFAEEDEEFEFGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_220/1\t99\tchr15\t95365966\t60\t15M2I74M\t=\t95366327\t452\tTCTAGAAATCTCTCCCTCTGTCTCTCTCCTTCACCGAACATGCACAGAGGGAAGGCCATGTGAGGACCTAGGGAGAAGGTGCTGTCTGCAA\tGGFGGEEGGDGEGGFGGFFFFFGGFDGDDGGGFFFGGGDGGGGGDAEFDFDGFFDEFDFFCEEFFEAGFGGDGED?ECECEEEEEECE@:B\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:2\tMD:Z:89\nfoo_220/2\t147\tchr15\t95366327\t60\t91M\t=\t95365966\t-452\tAAGGGATTTATGAATAGATATTTCAAAGATCTCTAAAGAGAAGAGAAGAGAAGTCCTTTCTTCTGTTCAAGCAGATGATTGTTGTAACCTT\tEEEC?CFFDFFEEEEDFAFDBF?=EFF=FDFDDEEDEFDFFEBFFFFFDFFDD:DC>CCCCFD?FF=FDGGEGFFGGFBFGAEEEE:DDDD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_222/1\t99\tchr5\t2818909\t60\t91M\t=\t2819293\t475\tAGTGTTTTTATTAATATATCTTCTCATAGATTAGAAGCCTTGTTTTTTTCAACATTGGCATGTTTATTTACTGAATTTATCTTAAAATAGA\tBA5ABDEE@BFDFFFFFFEEFFFFFFFFFBFFFFDFDFFDE??EABABB<FDDDFFFFA:AC>5CBCC8/BCB5BDDDCDFEF5AC@=@C?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_222/2\t147\tchr5\t2819293\t60\t91M\t=\t2818909\t-475\tTGTAATCCCAACACTTTGGGAGGCTGAGGCAGGCGGATCACCTAAAGTCGGGAGTTCGAGACCAGCCTGACCAACATGGAGAAATGCCATC\tA:>>??C@@CA?=?C=CEEB?EDDAE?DBD?EEBEECB=B?FFFFFFFDFFDG?GGEE?EBEEEEEEGGFGGGGFGFFF=GGGDDGDGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_224/1\t99\tchr3\t142788081\t60\t91M\t=\t142788459\t469\tACTACTATTCATGTCATTTAAAGTTAAAGGATACTTCTTTGTTTTGGATTAACTTTTAATTTTTATAGCTAAATGTTTACATCTGTTATGT\tDFDFFEFBFFFFFFFFEEFFFFFFFFEFFFFFFFFFFFFFFFBFDEDBDBFFFDFFFBDFFFFDECADFFD?FFFCEDFFFEFFFAABDEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_224/2\t147\tchr3\t142788459\t60\t91M\t=\t142788081\t-469\tTTATGTTTTTCTCACATTAAATTTCTTGAGTTTCTGAAAATGGTGTTCCTTCAGTGTGCTCTGTTTTCTAAAAATCCATAGTAATCCATAC\tGGAGGDGDGBFFFFFFDGEDFGGDGGGEGDBGGEDGFGDGGDGGGGGGGDGGGGGGFFFFAGGGGGFFFEF?GGGGEGGDGGFGGEGGGDE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_226/1\t99\tchr10\t65516550\t60\t91M\t=\t65516941\t482\tAGGCCTCAGGAAGGAGACTCAGAGGGTGCAGGAATCAGTGAACCATCCCCTTTCTTCCCACAGTCGCCCTATCACATTATCCGTTTTGGTG\tGGGGGGGGFGGGGGGGGGGGGGGGGF?FDFFFFFFGGGEGFEGGGGGGEGGFFGGGGFGGGFFEDFDGEFEFEEGFFGGGGFGEGFGGGEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_226/2\t147\tchr10\t65516941\t60\t91M\t=\t65516550\t-482\tCCCACCCTCTTCCTTAGCTGCCACGATTCTCTTCTGCTGTTTCCCATGTAGTCAATAAGATGACTTGAAGATTTACCTTCAGAAGAGCGCT\t###########?184-:FAFFGEFGGFDGGGGDGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGFGGFDGGGGGGEGGGGG\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:0A3T86\nfoo_228/1\t99\tchr4\t108147713\t60\t91M\t=\t108148076\t454\tTTATAGAAAGTTTACTTTTTTAGTTTAGTGATTTTTTTCCTCCAGTGGGAGTTCTTGAATTTATAATGCATAGGTGATTTGTTTGTGATGA\t@6?536(127@15-50668@=B=7<A5<1A.8799>C642*@2>5)@############################################\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:51A14A24\nfoo_228/2\t147\tchr4\t108148076\t60\t91M\t=\t108147713\t-454\tTTTTAAAATAAATATATTTTGAATAAAAACACTACAAAATAAGAAATGATTTGAAAAGAAAGCAAATATTTTGGGTGTGATGGTGCTACAG\tD:C;8;?AC?CACAC?:AEFCA888=AAAC:D;D>>EEAEBA>AA>A:?AC?><ABBACA>>6AA?D5ADD?CA>A7?7873+;99=B=B=\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_230/1\t83\tchr20\t20635948\t60\t91M\t=\t20635572\t-467\tAGCAATCTTACTTCAATAAAATCTAAGGAAATATCGAAAAAGCTGCTTCTCAAGATGTTTACTGCAGTATTACTTCTATCAGGAAAAGGGA\tFGFGGEGGF?GDFGGGEGGGFEGGDGGFGGGGGBFGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_230/2\t163\tchr20\t20635572\t60\t91M\t=\t20635948\t467\tATGTCATGATGATAATAACCAATAAAACTAAATATATGATAGCATTCTATCATTTCATTAAGGTCCAGATCCCATGTCCCACAACCCCCCA\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGFGGFGGFGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGFGGAGGGGGF=\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_232/1\t99\tchr2\t91680180\t9\t91M\t=\t91680212\t123\tCCCCTGAAAACTAGACAGAAGAATTCTCAGAATCTTATTTGTGATGTGCGCCCTCAACTAACAGTGTTGAAGCTTTCTTTTGATAGAGCAG\tGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGFGFDGDFGEEGGGGGEGGGGEGGGG?FFECEGEGFFGGGGBGEGEEG\tXT:A:R\tNM:i:2\tSM:i:0\tAM:i:0\tX0:i:4\tX1:i:5\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:21C10A58\nfoo_232/2\t147\tchr2\t91680212\t9\t91M\t=\t91680180\t-123\tACTTATTTGTGATGTGCGCCTTCAACTAACAGTGTTGAAGCATTCTTTTGATAGAGCAGTTTTGAAACACTCTTTTTGTGGAATCTGCAAG\t4FGBEGGFGGGGBGGFGEGGDFGGGGGGGGGGGGGGGGFGGGGFGGGGGGGFGGGGGGGGGGGGGGFFCEAGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:3\tSM:i:0\tAM:i:0\tX0:i:1\tX1:i:2\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:20C20T20G28\tXA:Z:chr2,-91689386,91M,3;chr2,-91680212,91M,3;\nfoo_234/1\t99\tchr2\t142360798\t60\t91M\t=\t142361182\t475\tAGAGTACTGTACTTTTGCTTTCATTCCTTCTATACATTCTGCCTTCATCCTTAAATTGTTCAACTCGATAGTGCTAATATTGGTAGATAAT\tFFFEFGGFGGGGGGEGGEGFAGGGFGGGFEFFFAFGGGFGGGFGGGDGGGGGGEGGGGGGFDGGBGFDFEECFGDDFFFFE:EA=E=CBEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_234/2\t147\tchr2\t142361182\t60\t91M\t=\t142360798\t-475\tGTGCCACCATGCCTTGCTAATTTTTGTACTTTTAGTAGAAAGGGGGTTTCACAATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAAAT\tFEBEFEF=FFFFGEDGGGFGGGBGFGEEEE?GGGGFGGGFGGGGFGEGEGGGGGBGEEFGFGGGFEGGD=GDFEGFFGGGGGG?GFGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:28T62\nfoo_236/1\t81\tchr20\t47287266\t37\t55M1D36M\t*\t0\t0\tCTGTCTCTAAAAAATGAAATGAAAAGTACGTATTTATTGTTGGAAATTAGTAAATAAAAAAAATGTAATTCTGTGTCAAAGAGCTCACCAC\tDDC@B?:??DDDDDDDDBDDDAD=BDA=CDBDB:ADDDBBDDDDDDDDDDCCCC>CCDDDDDD5DDDDDDDDDD=BDDCDD<<>;;3686;\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:55^A36\nfoo_237/1\t99\tchr8\t120141611\t60\t91M\t=\t120141982\t462\tACAATGACTGTAAGAAAATAAGAGGAGCAACCAAAAAGTGTTTTGGAAGCTAGCAAAAGTATTAGACAAATGGTCACTGTCTTAACAGAAC\tGGGGGGGGGDGGGGGGGGGGGGEGGGGGGGFGGGGGFDCGFDFFFDGGFGBGGGGGFGGCEEGFEEEFEGCDCA:CCAC>GDGG?GFGDDA\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_237/2\t147\tchr8\t120141982\t60\t91M\t=\t120141611\t-462\tGCTACAGAAGACTAAAAGAAAGTAAGAACAGTGAGTTTCTTAAATCCCTTTCTTAATCAGAATGCTACTGGTAATATACACATCCCCTTAA\tF=<;<9BGFGEGDGFFGFGGGFGGGGGGFGGGGGGGGGGGGGGGGGFFFDFBGGGGGFGGGGGFEGGDFGGFGGGGGGGGGGFGGBGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_239/1\t83\tchr18\t36107132\t60\t91M\t=\t36106763\t-460\tGAGAGGCAAAGATACCTGAATGCCCTAAGTGTTTCTTTTTTGGGGGAATATTTATATGAAATTGCCAATGTAGTCTGTCTCTCTTGATGCT\tA5@@=56;:89:?>AC5ECCBD5AEF;>9@-A5?EEC@5@4@C=5CEE=:EEEE?EB:DDD::DDEDEBDDEBEEDEDFDFFDFFFEA=EE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_239/2\t163\tchr18\t36106763\t60\t91M\t=\t36107132\t460\tGATTAAGCTTCTTGTGTTCAGTTTCCCTCTACTCCACTGGCAAGGACTTGAACTGCTCTATTTGCTTTGCACAGTCCCATTAAATTATTCA\tC==@=?CC:C@=B@BDEDE5DD5?DEADEDGDDDGFFGAGBD?D=AACA>5AA>AD:D5DDDD;??DD?5DD:=BGGGDFA?E5EEECE:C\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_241/1\t99\tchr13\t21602081\t60\t91M\t=\t21602469\t479\tGAAGATGGAAACACACACAGCTCTTACTTTGCCCATACCCTGGTGCCCCTGGAGCAGAAGTCCTGATGTGGTGATATCATTTCTATGCCTT\tFFFFFDFBFFFE?FFFDFEFAFEFEFFFFDDFFDBFFFEFFFF=FEEDEEE?AEBECCCBBBBCEEEEBBC=C=>DCCDCFEF=FFEEDFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_241/2\t147\tchr13\t21602469\t60\t91M\t=\t21602081\t-479\tAAATACATAATCAAATGGGTTATATTCTGAGATTACAGTAAGAAGGGGCAGGAGATTAAAGTGAGATTGACAAAATTAAAAATATTCAGAG\t=EEE:EGBGEEFBBFDAGGEEDDGGGDDFGGGEGGEGGEGGEEEEEEBEEDFGFGDEEEEEDE=GGFGGGGGFGGFEEFFEEDGEGGGGGE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_243/1\t99\tchr14\t98035327\t60\t91M\t=\t98035718\t482\tCTACAAGATAAGCCTATTTCTTAACACACCTATGTCAAGCAGAGGTTGTAATGCAACGTTCAGCTACATTAAAGGGCTTCCATGAGGAAAA\tGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGFGGGGGGGGEGGGGEDFFFEA?EEGFGE:FDFFFGFFGGDGEFGGEGEGGEGDEGEDECD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_243/2\t147\tchr14\t98035718\t60\t91M\t=\t98035327\t-482\tTAAGCCCTCACTCTGCTGCAACGCTTTACAGTCTAGCTGCAGTGCTGTTCAAATGAACTTTCAGTGATGATGGAAATGTTCTATATCTGTA\tEBB4AECEEAE5FFGGGGG?EFFDEFGGFFGGEGFGFGDGGGFGGGFFFEDGGGGGEGGGGGGGGGBGGGEGGGGGGGGAGGGFEGGDGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_245/1\t83\tchr2\t220279769\t60\t91M\t=\t220279387\t-473\tGCAGTGAAAGCTGAGCCCACCTGGATCCTGCATTCTAGGGTTGGCTTGCAGTCCACAAAGTGGGGGAGGAAGGGAGCACAGAGAAATAGCT\tCBDEEDCB@5CFFDEF@@:BB?EEDEBD?DDDFBBFE?FEEDEFDBAFEBFFFFEFFFEEFFFFFFFFFFFEEE:E?=FFFFFFDFEDFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_245/2\t163\tchr2\t220279387\t60\t91M\t=\t220279769\t473\tTGCCAGCCTGGCTAATTTTTAAATTTTTTGTAGAGATGGGGTCTCACTCTGTTTCCCAGGCTGGTCTTGAACTTCTGGGTTCAAGTGATCC\tGGDGGDEED?FFFEFG=FGGGD?GFFGBGGGAFGGFEDDDGFG?GEGGGGBFFGDFFGF:EEEE=4-<+1B.:B=;5?;(C.CC>73*5?A\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_247/1\t83\tchr7\t126844340\t60\t91M\t=\t126843963\t-468\tGAAGAGTATTTTGTACGGCATAATACAAATGTTGGAATGAGAATATAATGCTCCTGATGCTGTTAGTCAAGAGGTTTGCATAATTTGGACA\tGGGGGFGFGFFDGDFGGGFGGGGGEGGGDGGGGGGGGGGGGFGFEGDGGGGGGDGGGGGGGFGGGGGGGGGGGGGGGGGGGFGGGGGFDGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_247/2\t163\tchr7\t126843963\t60\t91M\t=\t126844340\t468\tGGGACTGTCTACAAAGAGGCACACGAGTACTTTTTGAAGCAATGGAAATATTCTATATCTTGAATGTGATAGTGGTTACACAACTGTATAC\tGFGEGGGGGGGGGGGGGGGGGGGGGFFAFFEEEEEEGGGGGGFGFBFFFFGGEGGGGGGGGGGGGGEGGFDFDFFFDEDB###########\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_249/1\t83\tchr20\t51388009\t60\t91M\t=\t51387633\t-467\tCGGGGACCCCCCATAGTGTGTCCATGAAGTCAAACGTGGTCCTGCAAACTCTTTGCAAGGTGCTGGCCTGTCGCAGGCATTCCCCAGCCAT\t###################################################A?64693?9,539=2=);=7@>=4C:72?8D-DDDDDDDD\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:1A5A1A72A8\nfoo_249/2\t163\tchr20\t51387633\t60\t91M\t=\t51388009\t467\tAAGATAATTAAATAACATTGACTCCCAACGTGAAAAACTCATTTCCCTTCCATGCGTGGCAGCTGAAACTGCTTTTTGATGTAGAGTAGGG\tGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGEG=GGGFGGGGGGGGGGFGGGGGGGGGFGGGGBFFFEGDEF?EGGGG?BFFDEFDB5@?CC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_251/1\t83\tchr4\t104178743\t60\t91M\t=\t104178367\t-467\tAGGTGTGAGCCACTGCACCCAGCCAACGTAAGACTTTAGTTGATACTCATTGTAGTAAAGAACAAGAATCAACTCTATTAATTTTAGATTA\t?@157-=?=C?>C:CC?CBCCEFCGDGGDGGGGFFFGDGEGGGEGGF=FEDGGAGGGFGFGGEGGGGGGGGFGGGG?GGGGGGGGGGGGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_251/2\t163\tchr4\t104178367\t60\t91M\t=\t104178743\t467\tGCTAGTTCTGCCAAAACTACGATCCAGTTAACATCCTTCCACATATCTTTAAATTTTACTTTTTAATTTAGTTTTTTGAGTATATTATAGC\tDEEEDEFFFBEEDEEGGGFG?EEEEDFFFFGGFGGGDGGGDFGGEGGEEGGGGGGGDGGGGGFGGGEGGBFEFFFEDEADCEFDEGGBGDG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_253/2\t145\tchr6\t74622818\t37\t91M\t*\t0\t0\tGATTATTTAGTTGCCCATTTTTTTTCCTTATTCTGTTTCCCCTTAGATGTTAGTGATTTCTAAATGTTTTGGCCCTATTTCAGCTATACTG\t#####################A:A:B=>6C@?*=*?4A+A-==?5=C@6CC-5*.-@@@;;D?D,DBBBB=9)3/8??DEFCC?5C=BDDD\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:38T33A18\nfoo_254/1\t99\tchr6\t92412781\t60\t91M\t=\t92413141\t451\tACCATTATTCTATATAAGTGGCAAAGGGAGGAAGCAGTGACCAGAATCCAGAGACAATAGCTGGAGCTGAAGGCAGCTGGAGCAAAAATGC\tGGGGGGGGGGGGGFGGGGEGGGGGGGGGCGGGGGGGGBGFGGFGFGFGGGGEGBGFEDEEEDCF@>>-=-?<><;7747=0500;>7@588\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_254/2\t147\tchr6\t92413141\t60\t91M\t=\t92412781\t-451\tGGGCCTCAGGGACCTGAGGTCAGCCTCTGGCCAGCAGCCTGCAAGGAGCTGAGGCCCAGAGTTCAACAGCCCTGGGGAAACTGATGTGCCA\t####################?.;B01?=BAD?F=GFFFGFGGEEGGFFGGGADGGGG?GEFGGBGFGGGFGGGGGGGGGGGGGGFG?FFFF\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:0A6T0A3A78\nfoo_256/1\t99\tchr12\t98617437\t60\t91M\t=\t98617820\t474\tCAATAAATATTGACAGAATGAAAACAATACCCTTAATAAATGTGAAAGAACAGAGTCTTCTTGTATTGCTGTTTTAAATATCCCGTAACTT\tGGGGGGGGGGFGEGGFFEGGGFGFGGGGDGEGGGGFGGGFGFDGGGCFGEFDFAFDFFFFEFF@EFDEBEACFF?BEEEDGBFGGFEAGFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_256/2\t147\tchr12\t98617820\t60\t91M\t=\t98617437\t-474\tTTTAAATATGGATTACAAATCCAGCTGCCTTAAAATGAAAGTAAAAGGTTTTCCTTTCTCAGTCTATTTTGTAAGTTGGAAATTAATGTAA\tEC3CCCC=AEB@C>>?BA-@C?D:GFGGGGGGGGGGGGGGCGGGGGGFGGGFGGFGFG?GGFGGGGCGEFGFGGFGGGGGGGGFGGGDGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_258/1\t83\tchr20\t14052099\t60\t91M\t=\t14051713\t-477\tTGTAGGGAAGACAACTAAGACCCCAGCTCCATGCTTCACCTGGTCACCTAGGCCCTTTGAAGAAATCTTGCAGTGCGGCATACCATTAGTC\tEGGFDGGGEEGFFFFFEFFBDGEGGGGEGGGFGGGGGEGEGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_258/2\t163\tchr20\t14051713\t60\t91M\t=\t14052099\t477\tATGAACAAATGCAGATAATAGGGTACATAATTTTTAGTACTTGAAGCATAGCCATGAGTGTAATCTGATTGTCGGAAAGTATACAATTCTT\tGGGGGGGGFGFGGGGGGGGGGGGEGGGGGGGGGGGGFGGFGDGGFGGGGDGGGFGGGGDGGGGGGGGGGGGGGGGGGGGEFFGGGEFGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_260/1\t99\tchr16\t63662422\t60\t91M\t=\t63662802\t471\tTGTGCTGTCCTCGTCACTGTCCGACTGAACTCTTCATTCATTGAACAAACAGTTCAAAAGGTCCTTTGAGGAACTCCCTGGTGCCCTTTGG\tFFCFFDFGGFFGGGGFGFFGGGGGC=EEEEDGDFGGFGFFDGGGEGGFGGGGEGEGGEGEFBGEGGGFFFE5BBEFFFEFGBGGGF?FF=F\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_260/2\t147\tchr16\t63662802\t60\t91M\t=\t63662422\t-471\tCCCAAAAAGGGAGAAACGACTCTAAAATAGCAGAGTCCACTTTGAATCACAGTGAACAAACGGTTATGATCCATTTATGGTGATGAGTAAG\t#@+DDDD?DD=C@CBA=E:ADD>FFFGDF?GFFEFFDDBAGGGGDGGGCGGFGGGGFFGAFGGGGGGGGGGFGGDGGGGGDEGGGBGFGED\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:0T90\nfoo_262/1\t99\tchr18\t25007594\t60\t91M\t=\t25007971\t468\tCAATATCTCAATCTTTAGAACATACCTCAGCATCTCCACATCTCCTCCTACCTCTGTCCTGTGTTGCTGGGACACAGGGGCAGGAAATGGA\tGGFGGGGGGGGGGGGGGGFGFGGGGGGGFGEGGGGGFGGGGGDFGFFGGGFGGFGEGEGGEEGEDEEBEEFEE?EBCCADCCCC??;:<>6\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_262/2\t147\tchr18\t25007971\t60\t91M\t=\t25007594\t-468\tAATTACTCTGGTTCATTTTTTGTCAAAGAATTTTTACTGCTGAATAGTGTCATAGACCTATTCATTTATTTATTTTCTCACTCTTCCCAGT\tEDEFDGGGEGGFDGFGGGGGGGGGGGFGDGGFFGGGGGGGGGGGGGGGGGEFGGGFGDGGGEGEGGGFGGFDGGGGFEE5EGGEFGGDGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_264/1\t83\tchr10\t120158272\t60\t91M\t=\t120157889\t-474\tAAGCAACTTCGTTTTTACCCCCACATAGGTAGTCAGCAACACAATATCTTGTAGTTGTGAGTGGTTAATTGAAGGATAATCCAGTAACACA\tGGGGEGGGEGDEDEEF8EFFFGEGGFFGGGGGFGGGGGGGEGGGGGEGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGFGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_264/2\t163\tchr10\t120157889\t60\t91M\t=\t120158272\t474\tAGACGAGGGCCTGTTTGGGCTCAATCATTAGAATCCAGGTTGCTGGAGAGGTTCTGTGAATCTCAAGTGGAACTTGGCAAATTTCAGCAAA\tGGGGGGGGGGGGGGGGGGGGEEGGGGGGGFGFGGGGGGFFGGGGGGFGFGGDGGGECD?EECEEEGDCFGDCEGGFGGAEFEGGEEFDFEF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_266/1\t99\tchr9\t83424580\t60\t91M\t=\t83424973\t484\tCTACAGAATTTAAACAAGATGCCTCTGGGGTTTCTTCCATTTTGACCATGAACTTAGATCACTAGAATGCTCACTATGGAAGACATTGAGC\tFBDFFGFGGGGGGGGGGGG=GGGGGGGGEFDFFEFBDDCEGFGGDEDFGFDFEGGFGEGEEEFFFEBEEBDFDF?FCDCEGEBG=?DEC5:\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_266/2\t147\tchr9\t83424973\t60\t91M\t=\t83424580\t-484\tGAAGTCTAAAGCTTAACACTGAGCCCGGCAGAAGCATAATTTACAGCCCCAAAGGGAAATACTATCCTACAATGTAAGAACCTCTCCTCTT\t=CDB:D=FFFBC??::GFGDGGFGCFFGFGEGGFEGBFDFEFADAEGGGGGGEFGBDGGEDGFDDFGFGGGFGGDGFFFDFGGEFGGDGGD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_268/1\t83\tchr2\t40577370\t60\t91M\t=\t40576992\t-469\tCCCACAGCCTACATATCCTTCAAAATGTGCATGAGTAATGTTAAAAAATCTGTTTAGATGATTTCCAATTTCATCATTCTCAGTAAACTAC\tGGGBFFGEGGFGGFEGFFFEGGGGGEGFGGGGGFGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_268/2\t163\tchr2\t40576992\t60\t91M\t=\t40577370\t469\tATTATTCAAAAGGATTTCAGAACTATCTGAAAAGACAGCATGGCTGTATGGCACGTATTTCAAACAGTTGGATCAAGATCTGAATAATGTT\tGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGFGGGGEFGGGEGGGGGGDGGGEDGGGGGGGGGGEGGFFGGGDEFFGEGFGEGGEFEB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_270/1\t83\tchr2\t40577370\t60\t91M\t=\t40576992\t-469\tCCCACAGCCTACATATCCTTCAAAATGTGCATGAGTAATGTTAAAAAATCTGTTTAGATGATTTCCAATTTCATCATTCTCAGTAAACTAC\tEEE:EEFFAFEFFFF=FFEEEEFDFF?AFAFEEFEFFDFEFFFFFFFFFFFEFFFFDFFFFDFFFFFDFFEFFDBFEEEEBEFFEFED=DE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_270/2\t163\tchr2\t40576992\t60\t91M\t=\t40577370\t469\tATTATTCAAAAGGATTTCAGAACTATCTGAAAAGACAGCATGGCTGTATGGCACGTATTTCAAACAGTTGGATCAAGATCTGAATAATGTT\tGGEE?GFGGGGGGDGGFGEFDGGGEDBEEEFFF=FEEEEE5EDDEEFE?AEF=EEF:FFF=EEEEC;?DDEBBDCFFFFEEGGBFE=?E?A\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_272/1\t83\tchr3\t154905363\t60\t91M\t=\t154904993\t-461\tGACCATTCCTGAAGTACAAATACTTTGGAGAATGAGACAAATTAATGCCTGACTCCTGTATAAGAAGTATGGGTCTGGGAATGCAGATGAT\tGEGGGGBDFDFGGGGGFGFGEBEEEEDDGGGGFGGGGEGGGGGGFGDFFDFGGGGGGGGGFGGGGGGGGDGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:88C2\nfoo_272/2\t163\tchr3\t154904993\t60\t91M\t=\t154905363\t461\tCCAGGTACAAAAGAATACTATTACATAGACTCTTCCTTGGTTTGCACAGAGGATGACTAGGGCAATTCTGTCATTCATAAAAACCCTGGTT\tFFEFFGGGGGGGGFEGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGFFBFDGFBGGEFDFFFFDCA=AC:BAAACDEFF?FGGE;AFC=EAC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_274/1\t99\tchr3\t6977819\t60\t91M\t=\t6978195\t467\tCACTCATTCCTTTGGAAATATCCTATAACTTATTATCAGTAAGAGTGAAATTTGCGAAATCTCAATCACTAGCATCTTGTGTTCTCACAGC\tFGGFGGGGGGEGGGGGGGGGGGGGGGGDGGGFFGGFGGBGFFFEFBFEFFGGBBGGGGEBGGFGGFF5FFF:F:DGGGDFFGDGFABCCED\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_274/2\t147\tchr3\t6978195\t60\t91M\t=\t6977819\t-467\tAGTGTCTCCATGGACCTTTACACCAACCAGAAATTCTCCTCCACACCCTCCATCAATCGCACTTTCTCCTCTCCACTTAACCATTCATCCT\t###########################?-=@@B9;C7'7)7A-;)>AA:AA5<?B5>C6B9D;DDD@@B>??DABDDFFFFEEAEE:AEEE\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:37A53\nfoo_276/1\t99\tchr11\t3666630\t60\t91M\t=\t3667039\t500\tTATATATATATATATATATTTATAAGAAATCACCTTGAAGATTTGAAATGAGATGAGCTATTTGTGTGACTAGAGAAAAAAAAAACAAAAA\tGGDGGGBGGGFG?EGGGDFAFDBDFGF=GDFEDFDGFEGDBBEEBE=FD?D5B?E@5@D=E:DEBEBE5BCBDABE=E?A@>@<>0:?:08\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_276/2\t147\tchr11\t3667039\t60\t91M\t=\t3666630\t-500\tAAAACAAAACAAGGCTAGGTGCAGTGGCTCGCGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGTGGGCAGATCACGAGGTCAGGAGATCG\t#########@;;:4;->3==7B-4==DD=D:EEEE:DFDF:FB=FEFDDEDDDGGGGGEBGEEEAECAAFFEEEEE:EED=GGBDGDGDE=\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_278/1\t99\tchr11\t112652415\t60\t91M\t=\t112652768\t444\tAGGCTTTGTTGAATGAAGCAGAGAAGATTGTATAGTTGGGGCTGGTCTTGGTGAACACACATTATTACCCCCCACATCCCCTTTGTGTAGA\tGGGGGGGGGGGGGGGGFGFGGFFGDFAFFFFAFFFDGGGGGAGGE:AAC:90/':>>@>CC:E=A9*6=5?####################\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:71A19\nfoo_278/2\t147\tchr11\t112652768\t60\t91M\t=\t112652415\t-444\tGGGGGGGGAAAAGGGAAAAGCCAGCCCTTTGTATAGAAATTTTGCTTTTTTTTCCCTCATTCTACTTTAGAACTGCAAGCTTGTGCACTGT\t#################@=.>A@?=D;=DFFACCCC-:DBABB-::E;B?FDADGGDC=BDFFFFFDDEDEFDDAGBEAEDEEA=EBFFFF\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:9G81\nfoo_280/1\t81\tchr14\t43458764\t37\t91M\t*\t0\t0\tCTCATTGTGGTTTTTATTTGCATTACTCTACTGATCAGTGAGGTTGAGCTTTTTTTCATATGCTTCTTGGTCACATGTATGTCATATTTTG\t>::98;EE=@@;EFFBECDEED@C?DAEEBDDGGEEBGGGGEGGGGGGGEGGGGFFFFFFGGGGGGGGGGGGGGGGGGGGGDGGGGGFGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_281/1\t99\tchr3\t75449080\t60\t91M\t=\t75449462\t473\tAGGTAACAGTGAAGCCCATCCTCCCACACCCCCAGGAACCTGGAAATAATAGAAATGAAGCTGCCTCTGAAGGTTCAATTAGGTCCATCCA\tFFFEFFFFFEGGGGGGGGGGGGGGGDGGGGEGFEGGBFFGFGDGGFEGDGFDFGGBDDEEDGFDEAC=EC:A?==@@B><EFF=EECEGDG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_281/2\t147\tchr3\t75449462\t60\t91M\t=\t75449080\t-473\tCCAACGGGATGAGCACTATTCTTGCTGGTCCATCCTATCATGGCCTGCTGATGTTGAACCATGACTTGCTCTGCAGACTTCCCAAGCCTTG\t##############################A5:CC=AD=5EEBDBEBE?CDDE=EED?DD:EE-EEDBDD5BEEEEDAEDAGGFGGGGGFG\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:5A20A64\nfoo_283/1\t83\tchr10\t17105090\t60\t91M\t=\t17104726\t-455\tACTTAAAGAGATTTCTTTTTTTTCCTTCCTTCTGGATGGGATTACGCATTGAAGGAGACCCCTGGATCTCAGCTGTTCAGTGGTCTTTAAT\tDGGEAGGFEGGGGGGGCEEEBGEDGGEGGGEGGGGGFGGEGFFFFFEEEDEGGGGGGEGGGGGGGFGGDGGGGGGGGGGGGGGGGGFGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_283/2\t163\tchr10\t17104726\t60\t91M\t=\t17105090\t455\tTCTAAACTAATTAGCACAGAAACAAACATACTGCTCCCAAGTAGTGGAGTAATTAGCAAGCAAATGCTCTACCATCTTTCACTCCCCAAAT\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGECEE-CCCCAGGGGGGFGGGGGDGGGGFGGGGBGGGEEGGFGGGGAEEGGE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_285/1\t83\tchr10\t55892951\t60\t91M\t=\t55892548\t-494\tAGTAAATATTGATATAGGATGTTGCTTTGATGGTACCAATTTCATTTTATATATTGTAAGGACTTCCACCTACTATTAGAGTAAATTTCAA\t8:C>0@EE5EEEDEEE??CCB=?-;1=?9@B5@C=DC@AC=>CC:CACC5AEDCEEDCDBB?ECCEB<AAA?DBDAEEEEEEBEEEDEDED\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_285/2\t163\tchr10\t55892548\t60\t91M\t=\t55892951\t494\tGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACCTCGCCTGGACTATTTGTTAATCTTGATCTTGTTATATATCAACTGCTGGGTTG\tDEEEEDFFFFEAABBDDBD-DEEA:BDA5DCAC5CFDAFFEEE:DDADBDBD=DDD6DD=C?EC5@2@=@>-><@>@2==@2726::?###\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_287/1\t99\tchr5\t4001037\t60\t91M\t=\t4001407\t461\tCTTTATATTTGCTGGCAGCTGATTAGATGGTGCCCACCTGATTAAGGGTGGATCTGCCTTCCCCAGCCCACTGACTCAAATGTTAATCCCT\tEEEEDEEEEEEEEEDEEE?BAAACAAAAAC:AAC?AACCCEEEEEDBC-B8(4<7ECB:CC:DACAABB@BDE:BCDDD?BE?D=CB?@CA\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_287/2\t147\tchr5\t4001407\t60\t91M\t=\t4001037\t-461\tAATAATAGTAATAATAATCTTATGCCACATGATAAACGAGAAGGGAATAAAATGAAGATATTTTCTTAGCATAAGTGTATACATGCACAAG\tE=;-:==:EAE;CFFD5BDEE?@5ABACCCA?:EGGEDEDDFFF>EFGGGBDDD=D=D;>CDC:BD=EE?EAFFDFDEEDEEEE=EFFBDF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_289/1\t99\tchr5\t8102459\t60\t91M\t=\t8102844\t476\tACTCAGGGGTCAGGAGCCTTCATAGAGTTGCAGTCATTATTTGTTTACCCTTTGGCTGTGCAGATGACTGTTTGCCCCTCAGAGGCAGAAA\tGGGGGGGGGFGDEGBGGGGGGFGGGBGGGEEGGFGFGGEGDFDGGGGDGGFFDEFEBE:DD@D??>8>>='=520BAAAAD@=BBD?AABD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_289/2\t147\tchr5\t8102844\t60\t91M\t=\t8102459\t-476\tAGGATCACTTTGAGTCCAGGAGTTCAAGGCTGCAGTGCACCATGATTGTGCCACTGCACTCCAGCGTGGGAAACAGAGAGAGATTCTAGAA\t5BAAC?A=CDDEDEE5FDFDFAABA>FAFDEDDDDDEDCEDAEEEEEEE?EDCDD:?BEAEGGFGGDFGDGGEGGGGGGGGGGGDGFGGBG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_291/1\t99\tchr1\t113801412\t60\t91M\t=\t113801792\t471\tATTTGCTTGATATCTGTAAACAGACTTTTCTTTCAGGAAAGGAATTTGGAGGAGGGCAGAGCCATCATTATCTCTTTTTTACAAATCTACA\tFGFGDGGGGEFGGGEGGFGGFFFGFGGGGFGFGGEGGFGDEGGGGFGGFBDFGDGFGFGDEAG:BGD=GADFDFBFFFEDCGDGEGGDDGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_291/2\t147\tchr1\t113801792\t60\t91M\t=\t113801412\t-471\tTCTGTTCTTAAGAGGATTACATTGCAGAGGGGATAGATAGATAATAAGCTGTGTATAAATAAGATAAATTTAATTATAAATAATACTGATA\tEE:?EEAD5=DGAFGGDGGGGGFGEFFGGDFGGGDGGDGGGAGGGGGFGAEFBGGGDGDFGGGDGGBEEGFGFGFGEGGGFGGFGEGGGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_293/1\t99\tchr13\t103952241\t60\t91M\t=\t103952612\t462\tGTAACGATGAAGCCAAAGCCCTTCTTCCTCCCTGATATATGTGCTAAAAGATAGAGACAGAAAAATGCTGAGAAGAAAGAAAAGGCTGAGC\tGGGGGGGGGGGGGGGGGG?GGGGGGGGGFGGGGGBCBFFFECEE5CECCEFGGFDGEGEGGBGGEEGGGGEFEEGGBGGDFGG=FFGGEGB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_293/2\t147\tchr13\t103952612\t60\t91M\t=\t103952241\t-462\tAAGAATTCAGTTTGTAGTGAAGCAGAATCCTCAGAGAGGAAGGATGATGGTACAGTCTATCTTGCCTTGTATATTAGGTTTTATTAAAGTT\tECCE::GGGABFEGFGGEGGGBCDGGDEEGGGGGGGGGGEGGGGGGDGFGGGGDGGEEEDEEFBFGGGGGGFGFEGGGGGGFGGEGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_295/1\t83\tchr18\t41680767\t60\t91M\t=\t41680397\t-461\tTGCCCTTTTTTTTGGTTATTTCTTGATTATATGCTAAACAAGGGGTGGCAAAGAAAGACCAAGAGAGAAAAGGGCTGTCTTCAGAAGAGAA\tFEACGFFGFDFGGEGGCECCAGDEDFGGGEGGGGGEGGGGGGDGGFGGGGGGGFGFGEGGDGGGGGGGGGGGGGGGFGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_295/2\t163\tchr18\t41680397\t60\t91M\t=\t41680767\t461\tTTGCTTGAACCCAGGAGGTGAAGGTTGCAGTGGGCCGAGATCGCGCCACTGCACTCCAGCCTAGGCAACAAGAGCAAAACTCCCTCTCAAA\tGGGGGGGDGGGGGGGGGGEFFFFFBEDDDFEGGGAFFDABBDE?EEEEEDF=FB?DBDD=ECDDEEEEFF?D@DAE?E:CADACC;CABAG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_297/1\t65\tchr9\t29739213\t37\t91M\t*\t0\t0\tGAAAAGGATGGCTTTTCTTTCTCAACGTGGGATCCTACTCTCCAACTGTGAGAGCAACGCTTCTCAAAGACTATCTCCAGTTGGCCATAGA\tFFFFFBGGAFFBEFFDGGGGEEGGFFFDDFEGGFGGEEGGGFGGGGEGEBEEEEE=EBEBBAEBED@EEDD?5BCEED:BACEEBFFEFE=\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_298/1\t99\tchr1\t152336073\t60\t91M\t=\t152336429\t447\tTGGGAGGCTGAGGCACAAGAATCGCTTGAACCTAGGGGGTGGAGGTTGCCGAGATTGCACCACTGCATTCCAGCTGGGCCAACAGAGTGAG\tFFFDFFFFFF=EEEEFEBEFFFFFFFFDFFFFFFFFFFDBCC<EE@EEBECBEEBFFE?EEFFBDEEEFEDEEE?FEFFEEDEFEBE@EB>\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_298/2\t147\tchr1\t152336429\t60\t91M\t=\t152336073\t-447\tGGGAAAGATACGTGACTAACATACTAGGTAGAAAAAGAAAATCATCATCACTTCCTTTCCTTTAAGGACGACTTTCATTCAGGGGAGATCT\tCABC=ECC?CEECECB5EEEFEFGFGEGEEEDGEFGEGGFGBFFEFDEECEEDDFDDDEFEBEGGGFEGGEGGGDGFFBFDFGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_300/1\t99\tchr14\t97106267\t60\t91M\t=\t97106655\t479\tACTTTATATGGTTTCTGAGATTGCATGTAATGACATTTATCATTTGCCCATGCAAATGTGATTTAATCTTAGTCGGTGATCCTTTATCAGT\tGGFGGGGGGGGGGFFGFGGFGGGFGGGGFFGFGGGGGFGGGGGFGGGGGGGGGFAFGGDEBGGGBGGAGGGGEDGGFG=GGGFFGCD??FE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_300/2\t147\tchr14\t97106655\t60\t91M\t=\t97106267\t-479\tTGGCATCTACGTCACAGATTGAGAAGCATGTTCCATATCTTTTCCAACTCCATAATCCTGTGGGCCAATAGTACAGGGATTTCTAATGTTT\tAECCCCBF@BFDF=GEGEFDGDGEDGFG?FGDDGGEDDGGGFEFGGEEEEE5DGGAGGGFGGEFGGGEEGFGGGFGGGGEGEGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_302/1\t99\tchr11\t46886041\t60\t91M\t=\t46886425\t475\tCAAAAGCTTCATAATAATATAACCATCTTTTTTTTTTCCGAGATGGAATCTTGCTGTGTTGCCCAGGCTGGCGTGCAGTGGCGCGATCTCA\tEEEEEDDEED?EEEEBEDEEFEFB=D?DDDBFFFFFEFDD5DBAD5<:CBAAC?A?5CCCFEE?:EE@EB#####################\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:71A19\nfoo_302/2\t147\tchr11\t46886425\t60\t91M\t=\t46886041\t-475\tCACTGAAGGCTCTCAACACCCCTAGGAGGTTGTACTGTTATTATTAGTTTGATGCAAAAGCAATTGCGGTTTTTGCCATTACTTTTAATGA\t?7=?9:-:70=?A)=/4C=CA:9;>;*6>='>54:B<AAB;@@@>@5ACCC;.=>4<CA?A@@>6@B:?EE@@CC;>:AC?:CCC?C?C=C\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:18A11G60\nfoo_304/1\t99\tchr1\t105905893\t60\t91M\t=\t105906291\t489\tAATAATAAATGTGAGGTGATATCTCATTGTGGTTTTGATTTTGCATTTTCCTGATCATTAGTAATGTTGAACATTTCTTCATATACCTGTT\tGGGGGGGGFGGGGGGGEGFGGGGGGFGGGFGGGGGGGFGGGEGGEGFGGGGGGGEGGGGEGEGGGGGFAEFEEFFGGGGGGGGGGGDGGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_304/2\t147\tchr1\t105906291\t60\t91M\t=\t105905893\t-489\tCCCTATGTTTTCTTATAACAGTTTTACAGTTTCTATCTTAAGTTTACATTTTAATCCATTTTAGATTTATTTTTGTATAGGATGTGAGTCA\tDGGGGGFGDGEFFFFBDEGGGGGGGGEGGDGGDGGGGGDGGE=GGGGGFGFFDGGGFGGGGGBGGEGFGGGGEGGGGGGDGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_306/1\t99\tchr4\t87408445\t60\t91M\t=\t87408838\t484\tACTCTACCACAAAGATACACACCATCTTCTCACCATCTTTACTGTGACATCTTGATTCCAACCTTTATCATCTTTCACCTGAATTCTTGCA\tGGGGFGGGGGGGGGFGGGGGGGGGGGGGGGGGGFFFGGGGGGGFEGGGGEGDGGEGFDGEGGGEGGEGEEGEEEEGFGGDGEEGGGEGFGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_306/2\t147\tchr4\t87408838\t60\t91M\t=\t87408445\t-484\tTTCAAATACTCTTTCCCCAGATTTTCATATGGCTGCCTTCTCACCATTCAAGTCTTAACTGAAATGTGGTTTCCTCCCAGAGGTCTTCCTT\t:BCBD5FDGDFGGCGGFGDGGA:EEEFFF5F?GDGGFFFFDEEEEE?FGGGGFEEGAFFFGDEEGFGGDGGGGGGGGDGGGGGBGGGGGEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_308/1\t83\tchr4\t150596485\t60\t91M\t=\t150596109\t-467\tGAGGATTGCCTGAGCCCAAGAGGTGGAGGCAGCAGTGATCTATATTCACACCACTGCACTTCAGCCTGGGTAACAGAATGAGACCTTGTGT\tGGFEEFGGEFFF:FBFEE?GFGFEGGEEGEFGGGGGGEFGGEFFFFEDFCFGEGGGGFDGGGGFGGGGGGGGGGGGGGFGGGEGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_308/2\t163\tchr4\t150596109\t60\t91M\t=\t150596485\t467\tGACATGTCCCTTTTACAACTCCCAGACACTCAGTGTATAGCAGAGTGCTATTCCTTTGTTGAGCAAACTGAGCACCTACTATGTGCTAGAT\tGGBGGGBGEGGGGGGGDGGGGGGGG?EDE=CCC=ABDDDCEED-D:DDD?BFFDEFFGGGAGEFDFFEEFBFDBED5DDD?ADA=ADB5?A\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_310/1\t83\tchr4\t189625940\t60\t91M\t=\t189625553\t-478\tGACCCAGAAAAGAATTAAGCTGAGATATCTCTTAGTGATACTTTCACATCCAGATTTACGCTTAGATTCATGATGCATTACAACTAATAAA\tE=BAEGEDGGGGGGCGGGEGEFF=FFEDFDFDGGGGGGGFGGGGGGGGFEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_310/2\t163\tchr4\t189625553\t60\t91M\t=\t189625940\t478\tATAAGATTAGTCCAAATTATCAAGTATATTCCATCATTTTGAATACTCATAAAACATATTTTGATTTTTATTATTTTAAGTGAATATTTGG\tFGGGGGGFGGGGGGDGGGGGGGGGFGGGGGGGFGGGFGEFFGDGFGGGGGGDGGGGGGDGGGGEGGGEGGGGGFGGG>FGFG?FDDGGGGE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_312/1\t65\tchr4\t187482106\t37\t91M\t*\t0\t0\tAGTGATTACTTCTGCCTCAGTTAGGAAAGAATTAACAGGCGTAATTATAGTTGAGCCAGATAGGAGTTTTCCAGGAGGAGCACAACATCAA\t77,56::A=:?A91-:0557ACC83A:A8A;6B;CCCC:-B8.;?/;8?##########################################\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:39A7C43\nfoo_313/1\t83\tchr7\t7294158\t60\t91M\t=\t7293783\t-466\tTGGATAAATGGGTAAGGAAAATGTGGTATATATACAATAAATTATTATTCTGCCTTTAAAAAGAAGGAAATCCTGTCATTTGTAAGCACAC\tBF?FFGDGEGEAF?GGEFEEE=EEEFDEGFGGEEGDGEGGFEBGGGGGGGFEGGEEGGGGGGFGFGGGGGGGGFGDGGGFGGGGGGGDGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_313/2\t163\tchr7\t7293783\t60\t91M\t=\t7294158\t466\tTGAAAAACTGGTCAGCATCCACTAATCGTCACAGCCACAAGGAGACATCACCTCACACACGTTAGGGTGGCCATTATCAAAAAGTCAAAAA\tGFGGGGGGGGFGGGGGGGGGFGGGGGGDGGGFDEGGGGFFGGDGGGGEGDFEAEFDFFFFGFGGGGGEGGD?=:EFFFF5AA<?>EECEEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_315/1\t99\tchr3\t51577695\t60\t91M\t=\t51578044\t440\tAGCCCCATGGTTAGAAACTATGCAGACCTACATGGTACTCTTAACTGCTCTAGGATGAATGAGTATCCTTGAGCAGTAGGGTACCAGGTTG\tGGGGGGGGGGGGGDGGGGGFFFGGGGGGDGFFFFF:EEEEFGGGGFGGFFFGBGEGGBGDFEA@EEECCECBDD5C=DCBFDFFF?@D@CA\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_315/2\t147\tchr3\t51578044\t60\t91M\t=\t51577695\t-440\tGTCAGCATTCCAAAGTGGAGGAGTTAAGGTTTCACTTATGTAGGTAGAGACAGAGAAGTTCCAGCAGGATTATAACATTTTCCACAGAAGA\tFBAFDFBEE=EAEDDEGGFFDGGFGEGGEEGFDFFDGDFGGAGGGGGEGFGFGFGGGFFCGGBGGDGGGDFGDGFGEGGEGGGFGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_317/1\t83\tchr7\t9669075\t60\t91M\t=\t9668687\t-479\tTTTAAAAACACTGGAAGTAATATCATTTTTTGTTTTTGCTTGTTTGTTTGGTTGGTTGTTTTTGTTTGTTTTTTACCACAGGAGGGGTTCA\tE5AGGGGEGEDGDGGGE?GGDGEDEEGGGEGGGGGGFGGGGGEGFGGGGGGGGGGGGGGGGGGFFGGGGGGGGGEGGEGGGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:46G44\nfoo_317/2\t163\tchr7\t9668687\t60\t91M\t=\t9669075\t479\tAATATTATTGGATATTTTTGTATAATATATATGGATGTTTTTCATTTGTTTTTCCAGACTCATCTTCCACTCTCTATCCTGCTCCATATGC\tGEGGGGGFGGGGDGGGGGGGAGGEGGGGGGEGGGGFFDDGFCD5FGGFGGGGGFE5DDDDE5EDEGGFGGFFGFFEB?EE?BBAA=B@@:=\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_319/1\t99\tchr19\t44641888\t60\t91M\t=\t44642249\t452\tCCTCTTGCTCCTGGGTAGAAGCGTGGGGAAGTCAGTGAGTGTGAGGGAACCTGGATGTGAAACCAGAGGAAGGGGGTGGCCCTGGGGGTTC\tGGFGGGGGGGGGFGGFGGGGFGGFGFGGFGE?EEEBEBECFDFFFG=EEFFBGGCEGEG=@??@BAACA?EFFBFC'5A6?A5<A@>C-@#\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_319/2\t147\tchr19\t44642249\t60\t91M\t=\t44641888\t-452\tCATCCTTCTTTCTCTCTCCTCTCTTGGTCCTACTTTATTTTTCTTGCTGCTGTCTCCTTCCCCCTTTCCTGTGTCCTTTCCTCCATCCTGT\tD=@DBDC?CE:?5>*:AE?=EEGFGEFEFFGB>@@>5CEFFFABFFDFFDBFFEFFEBBEEEEFGGAFEDGGGFGGEEEEEFFGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_321/1\t99\tchr10\t22132987\t60\t91M\t=\t22133368\t472\tACCAAGACTACTCTAGTAAATTAAGTATATACTAGGATCTATGAGGCTTTGTTGGTTATATCCTGTGTATTCTTGGCATTTAAAAAATTTT\tGGGGGGFGGFGGGGGGFGFGGGEGGGFFGGGGGGGGGGGFEGGFGGGGGGGFGFGEGGGGGFGFGFFFBFFFAFEFFAGGGEGGGFDFFAG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_321/2\t147\tchr10\t22133368\t60\t91M\t=\t22132987\t-472\tCAGCTATGTAATTAGTAGGTCATAGTACTTAAAACATCAAAAACCATGGAATGTCTTTAATTTGGTCAGATATTTACTGATATGTCATTAT\tGFFBFGGFFGGGGGGGGGGGGFGEGGGGGGGGGGGFGFGGGGGGGGFFFAFFGGFGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_323/1\t99\tchr12\t118588121\t60\t91M\t=\t118588482\t452\tGCTTATTTAATTCAGCAAGCACATTCAACATTTCTGATTATACTGAATACATTCAAATTCTCAGTCATCAGCTTCAGGAAGACCCTCCTGG\tGFGGGGGGGGGGDEFGGDGGFGGGGGGEGGFGGGGGGGFDEDFFFGGGGGGGEGGFEDGGFGFAFGBFD-=DBCBBBD@?:3.BBA<?<<A\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_323/2\t147\tchr12\t118588482\t60\t91M\t=\t118588121\t-452\tCTCCTCTCCCCACAGAAGACCACTATTAACAATGTGTTGTATACCCTTCCAGAACTTTCCATTTGCTAGCCTAATAGTGCTAATAAAAGTG\tDAC:7?CD;-CCCCCC067?<6=?@6A?A:A?DEDE@@@C6CCCCC9;77/-A5>B=EE?FEED5DE?EDA?FFFEFGDGGCFDFEEEBEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_325/1\t99\tchr6\t48478084\t60\t91M\t=\t48478455\t462\tTTAAATTGTCTTAGTTTCTCATATGAGGGCAAAATAAACTTCTTGGGTCTGTAGATAGTATGTATTAGCCATATCATTTAATAAATATTGC\tGGGGGGGGGGGGFGGGGFGFDGFGGGGGGFGEFDFGGFGGGGFFGGAEGGGGEGGGGFEFEDBFEGFECGCEBBEFFFFFGFFGGDFGE?F\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_325/2\t147\tchr6\t48478455\t60\t91M\t=\t48478084\t-462\tTTAATCTTTAGAGTGGTCTTGAGAAAGCTCATAAGGTCTTTGATAAGTTAATGGAAAATAATGCTAAGATTCAATACTGATAAAATAAAGT\t?CCA=??=GDGGGDDGEG?GGGGGDGEFBAEGFGGGGGFBGGGEGGGGGGGGGFGGGGGFGGDGGGGGGFGFGGFGGGGGFFGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_327/1\t83\tchr2\t78122797\t60\t91M\t=\t78122426\t-462\tTGTGATATTCTCATTTTCTCAGCCTTAAGAAGTCAAATGTCTCTGTAGGATAAGATATTCAAGGTAATATTTTCCTGTAAAGAAACACAGA\t?FG?EFAGEGFFDDFFCEDGGBGGGGGGFGDGFFGGFEGDGGGGGEGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGFGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_327/2\t163\tchr2\t78122426\t60\t91M\t=\t78122797\t462\tAAAGTAGTGTCACTAGGGACTTCAATAATCTATCTGAAGTACATATAGCAGCATCTGGATTATTTGCAATCTATTAGCTTCAGATCCAAAA\tGGGGGGGGGGGGGGFGGGGGGGGGGGDGGGGGGGGBEEECEEEEEEFFEFGGEDGGGGBGGGBGGG=FGGGGDBGDEF?GE?D?AEE:EEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_329/1\t99\tchr10\t94708971\t60\t91M\t=\t94709341\t461\tAAAGGTAATAAAATGTAGCAAAGATAGTTTTTCAACAAATAGTGCTGAATCAACTGGACATCCACATGTAAAAAAATGAATCTAGACCAGA\tEEEEEEGGGGFGGGGDEGGFGGGGGGGGGGFGGGFGGGDGE4CEEFFFFDGF=GFGCFGFGGDFGFGG:CC@B?CECEEDFGEFFAEFFEF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_329/2\t147\tchr10\t94709341\t60\t91M\t=\t94708971\t-461\tAAGACTTATCTGATAAAGGACTCTTGTCTGAAATTGCAAAGAACTCCTAAAACTCAATGAGAAGAAAATGAACAATTGATTTTAAAATGGG\tEACBBE=EBCEEEBGGEEEC?G:GGEGFGFFDGGDGBFFFDEE?DEEEEEEBEEEEGGFGFFGGFGEFGGGGGGGGGGGAGFEGGFGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_331/1\t99\tchr13\t69209029\t60\t91M\t=\t69209430\t492\tGTAGCCAATATGAGAAATAGAAACAGCTCTGCTCTCGGCTCCCAGTGAGAAGGAAAAAATGGCAAGTGAATTCTGCATCTTCAATTGAGGT\tGDGGGGGFGGGGGFFDEDEEGGGDGFFGGGDFFGBFFFFGD=EAC<A:A>B@?=@AAC9'9>>:>4/<):>>?5;@:=C@E?CB?######\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:12G78\nfoo_331/2\t147\tchr13\t69209430\t60\t91M\t=\t69209029\t-492\tAACAGGAGTATTTGCATACTTATGCTCTGGAAACTCCCATGAGGCAGGAGATCCGTCCACTCCCGTGGGAAGGAGGCTGATGCCAGGGAGC\t######################?C@?B?CDCEBDEFEBE:FCEBBCEDECCAECCBBC2BBAGFGGGEGGGGEEFGFDF:FGGGFGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:0C90\nfoo_333/1\t83\tchr11\t88357992\t60\t91M\t=\t88357628\t-455\tTCTGGAAAAACTATTTTCATACCCACAATCCTAAAGGTTAGATGCCATAACTCATGCTCCTACATATATAACCAATTTTGTTTATTTATTT\t@BBEBDA?A;?<@CC@==???=@>=>@?;C?:FDEFCBBCB=BFAFEGEEGEDDGGGEFGGEFGCGGGGGDGEGGGGGGGGGFGGGGFFGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_333/2\t163\tchr11\t88357628\t60\t91M\t=\t88357992\t455\tTCCAGGGCATCATATACTAAAGAATCATTCCATCTAATTTCTTCAAATCTCAAGAACACACACCTGGCAAATCATGAAAATAAATTTTATG\tGGGGGGGGFGGGGGFGGGGGGGFGAGFGGEGGFGBGFFGEFEFFFGGFGGFGGGDEFGGDG?GGDBEEE=DDEBFEE-EE5C>>>DECE4A\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_335/1\t99\tchr3\t62264056\t39\t91M\t=\t62264419\t454\tCCCGGCTAATTTTTTGTATTTTTAGTAGAGACAGGGTTTCACCTTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCACCCGCC\tGGGGGGGGEGGGGGFFEGGGGGGGGGFGGGGGGGGDCGGGGEGGFBDFBBGGGGGDDDGBFBDDD=ECBEB=CEEEABCEBB:EBEEEECG\tXT:A:U\tNM:i:0\tSM:i:2\tAM:i:2\tX0:i:1\tX1:i:139\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_335/2\t147\tchr3\t62264419\t39\t91M\t=\t62264056\t-454\tACACTGAGGTGTTTCCCTGTCACACCCCTCACTTGAAATGCTATCCCCTTCACTCCAATGCTCAGATGATGACCTTCTCTTTGAAACTATT\t;@<=9><C99,@A?(C@<>>>+DADDEFDE5EFADEEEE5EED=EEEEEEEEAEEEFGGGGEGEGGGFGGGEFEEEEEE-EGGFGFGGGFG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:2\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:14A76\nfoo_337/1\t99\tchr1\t74251269\t60\t91M\t=\t74251637\t459\tAGTTTAGGACAGTGCCAGATGTCAGCTAGTGAGAGATAAAAGATGAAAAGTTCCTCAAACAGTGTTGTAATGCAATAAGCTACATTATACA\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFDFDFGGGGGEGGGGGGFEGEFEEGGGFFGGGEGBCECCEEGDEEGFGGFFGBGDGDAAEF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_337/2\t147\tchr1\t74251637\t60\t91M\t=\t74251269\t-459\tTCCTCTCCCTACTTCCCCCCTTCCCCCTCCAATAGGTTCCAGTGACTGTAGTTCCCTTCCTTGTGTCCATGAGTTCTCATCTTTTAGCTTC\t5==1=0=B>:7>>-C?'AA<:9C(DEGEGGGGGGGGDGGGGGGGGGGGGFGGGGGGGFGGGGGGGDGGGFGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:16A6A67\nfoo_339/2\t129\tchr12\t107536519\t25\t91M\t*\t0\t0\tGACCCCGGGCGTTAACTGACCCACCCCATGGCAGACACTCCCACCTTCTTCCATCAGCACCCCCAGGACACTTTGAACACCCCTGCCTGCG\t,333,+6,.,767,7766+3,77+611(86733;,7,6=(57+937367*6664*77776AAC8:A?-:A@6.>6.CC@?-?B5B>@.@##\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:9T13G56A8T1\nfoo_340/1\t99\tchr2\t169728182\t60\t91M\t=\t169728563\t472\tAAAAGGCTGTGAGACACGAAGGCCCAATAAAACCTCCTCATTGGATAAAATGACTGCCCATCTATTTTTCTACCTGGATTGGATGTAAGTG\tGGGGGDFGFGGGGGGGGGGFGGFGGGGFDGFFDGGGGFGGGBGGGGBGGDC=B?CCB@D:>=>>@7??#######################\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:73A17\nfoo_340/2\t147\tchr2\t169728563\t60\t91M\t=\t169728182\t-472\tATATTTCCCTTTCCCTCTTCATTTGCAGAGGAAAATAGACAAGAAAAAACTCTCAAAGGGTAGAGTCAGAAACAGTGAGAGACATTTTAAC\tEBEEEEEFFFEBBDFEDBEFDF=EDAGFGGGGGGGG?DFFGEEEEFEECEBGEAGGGFGFEGGGGGGGGGGFGGGGGGGEGGEGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_342/1\t81\tchr5\t137913472\t25\t91M\t*\t0\t0\tTAAGATCAACTTTAAAGTTATGAGACTACTGGGTAAAGCAGATTCTTGTTTGCTATAAAGATGTTAAAAGGTGTGAAAATTATATATATAT\t###########################################################################################\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:1C26T4A6T50\nfoo_343/1\t83\tchr3\t121089753\t60\t91M\t=\t121089378\t-466\tAAATTGTTTAAAGCAAAGTAACAACTGTTGCGGGAAGTCAGGGACCCTGAACACAGGGACCAGCTGGAGCCACGGCAGAGGAACATAAATT\tGEABEEDABDC:=;B>9;668=?5?6?CEE=AEBEBC@A=CC@::DEE=EEEEEEEDC:DDDEF=FFGGFGGGGGGGGGGGGGGGEGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_343/2\t163\tchr3\t121089378\t60\t91M\t=\t121089753\t466\tTCTTCAAGAATGCCAGAAACACATTATAGACAGGGTATATACAGGGTATCAACAGTTGACTTCTTAACAGAAACAATGAAAGCCAGAAGAA\tGGGGGGDFGGFC?DFAEEEEDG=GFEEEEEEEEEB-CA?:BDDDDDCEDBDD?DB=CC=:D?BCDBEAECBCEBBCE:CCFD:EF?5@=5C\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_345/1\t99\tchr9\t28435444\t60\t91M\t=\t28435819\t466\tGGAATGAGCTAGAAAAACAGGAAATAAACTGGAGAAATAGCTATGTACACAGTAGCAGGCTTGTGAAGGAGACAAAATAGTGTCACCATCT\tGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGFGGGGGGEGGGGFGEFFGGGEB?B#############################\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_345/2\t147\tchr9\t28435819\t60\t91M\t=\t28435444\t-466\tTAATTCCTTTTGTGATTCTCTGTTAACCCAGAAGGGGGAAAAAAACATGATTATGGGTTATGATTTACTACCTCACATTACTAAGGGCATC\tFDFEAFGEEEGBCCC5DFFFAGGFEGGFFGEGFGFGFCEEEGGFGGDEGGFGGEGGGGFGGFDFFFEFEDFGGGDGGFGGGGGGGGFGGEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_347/1\t99\tchr4\t167906195\t36\t91M\t=\t167906577\t473\tATATACAAAAGTTAACTCAAGATGGATTAAAGACTTAAATGTTAGACCTAAAACCATAAAAACCCTAGAAGAAAACCTAGGCAATACCATT\tGGFGGGGGGGGFGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGEGGGGGGGGGGEGGFGEEEGGGGEEFGGF;GGEGFGGGGEGGEFGE\tXT:A:R\tNM:i:1\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:27\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:52G38\nfoo_347/2\t147\tchr4\t167906577\t37\t91M\t=\t167906195\t-473\tTGAAGACATTTATGCCACCAACAGACACAGAAAAAATGCTCATCATCACTGGCCATCAGGGAAATGCATATCAAAACCACAATGAGATACC\t??C=??ABE=CC=BCA@EBBAGGGFGGGGEEFGGEGGDGDGGFGFGGGGDBGGG?GGEGGGGGGGGEGEGGGGGFGGGGGEFFFFF=DBDD\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:0C4T85\nfoo_349/1\t83\tchr1\t162223484\t60\t91M\t=\t162223102\t-473\tAAAATTGTGTATTTTCATCCAATAATAGTTTCTATATGTTCCCAGGTAATTTCTTTAGCAGGACTTTCTCTTCCCCTTTCAAATAGTGTCC\tEFFFBFEEEEADDEEEEDFFFEEEE=FFGGEGGBGGGGGEFFFFBFDGGGGGGGGDFG=GGGGGFGGGFGEGGGGGFGGGGGGGGGGGGGE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_349/2\t163\tchr1\t162223102\t60\t91M\t=\t162223484\t473\tAGAGATTTTTATCTCTCAGTGAAGTGACACAGCCATCTATCACTCAGGCTAATGTAAATGCTTTGGACAACTCTAAAAGGGTTGGGGGGGA\tFFFFFAFGGGFFGGGGGGGGGGGGEFGGGFGDFDGGGEGFGGGGGFDFFFGDGDBGFGGGFGGGGD=FFFGGGGEGEGGFFCFBEABCC>7\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_351/1\t83\tchr7\t90537017\t60\t91M\t=\t90536658\t-450\tCAGATTTCCAATGGAAACATTTTTTAAAATTTTTTGTTTGTTGTTTGTTAGCACAGGGTCTTGCTCTGTCACTCAGGCTGGAGTACGGTGG\tGAGGFGEEEGG?FGGGCFEFDFFFFFCEAA;EGGEGGFGGGGEFDGFFFCDDEGEGGFDGGEBGFGGGFGFFGGGGGDGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_351/2\t163\tchr7\t90536658\t60\t91M\t=\t90537017\t450\tGGTGAGTGCCTGTAGTCTCAGGTACTTGGGAGGCTGAGGTGGAAGGATCCCTTGAGCCCAAGAATTCAAGGCTACAGTGAGCTATGATTGT\tFFEFFEEFDFGGGFFFGGGGFF=FFEEBEEBGGF:C=CC:CCDDDEEEEEEEECAFFFFEGGFADGFEFEA5CCA:@>@??=???A@AAA#\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_353/1\t83\tchr20\t20401005\t60\t91M\t=\t20400634\t-462\tACCCTCCTTGCACTTCTCAGGTGGTCCCTGCCACATTCTTTGATTTATTAGGGTCATTTCATTTGCTTTGGTTTCTGGTAGGCACTCTGAA\t##########################CCCECFEGGGGCFGGGGFGGGEFCGFF?FFFDGGGFGGGGGDEGGGFEEGGGGFGGGEGFGFGGG\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:1A3A19A65\nfoo_353/2\t163\tchr20\t20400634\t60\t91M\t=\t20401005\t462\tCACAATATGTTTCAAATTATTATCAATACCAATTAGAAACAAAGAGATTCTAAGTTTTATTAGGAGGTCACTTTTTTTGCATCAATTTTAT\t?4?3;CACCCGEGGGFGGFGGFGGGGGFGFGCGFGGGGEGFFGGGGDGGGGGGGGGGEFGGGGGEFFCFFDFFDFGGGGAAEGGDEGEGFD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_355/1\t99\tchr9\t28773355\t60\t91M\t=\t28773721\t457\tCAATATAAATGCTGTGTAAATAGTTGTTATACTATATTTTTAAATTTTATTATTTCTATTTGTATTTTTTAATACTTAATATTTTTTTACT\tGGGGGGEGGGGGGGGGFGGG5CCCCFDFFFGDGGGGGGGDGGBFGGFEGGG>GGFCGGGEGGBAGEEDCC>CEBEEFFFFDGGGFF>GGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_355/2\t147\tchr9\t28773721\t60\t91M\t=\t28773355\t-457\tAGATGTACAAAATCAATGTCTTAGTCAATTCAGGCTGCTATAACAGAGTGCTATAGACTGGGTGACTTAAATAGTAGAAATTTATTTCTCA\tCECCEA4EEEEC=DDDBCBEDGDGGFFFFF:FGFGFDFGGGFGGGGGGGFGFGGEGGGG?GGGGGDGGFGGFGGBGGGGBFDGGGGGDDGB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_357/1\t83\tchr9\t28969310\t60\t91M\t=\t28968967\t-434\tCATGGATCTCTCTAGTGTACCATATGGTGGCTTTCAAAGTAGCTATGGGTGGTAGAAAAGGTAAAAAGGGATATTTCCAAAAAGAGAGCTG\tFFFFBEGEGGGEDAEEBB:BBEE-FCEEEECEGDEFGFGGGGGGDEGGGGGFBFGGGGGEGGGGGGGGFGGGGGGFGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_357/2\t163\tchr9\t28968967\t60\t91M\t=\t28969310\t434\tAGGAGTCTTGAATCCACAGGCAGGAGTGAGACTGAGCACTAAAAAACAGAAACTTCAATACTATCAGAACTGTCCAATATTTCTCTCTTCT\tGGGFGFGGGGGFGGGGGGEGGGFG?C?:ACAAAA?FGEDFEFEEFGAD@FFDFAF?AEDDD?EEBAB-ECBCB?DBDBCBEE:CA@@CCBC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_359/1\t83\tchr9\t28969310\t60\t91M\t=\t28968967\t-434\tCATGGATCTCTCTAGTGTACCATATGGTGGCTTTCAAAGTAGCTATGGGTGGTAGAAAAGGTAAAAAGGGATATTTCCAAAAAGAGAGCTG\tFGGFGGEGFGGGFGGGFDCFFEAEFGFFFGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_359/2\t163\tchr9\t28968967\t60\t91M\t=\t28969310\t434\tAGGAGTCTTGAATCCACAGGCAGGAGTGAGACTGAGCACTAAAAAACAGAAACTTCAATACTATCAGAACTGTCCAATATTTCTCTCTTCT\tGGGGGGGGGGGGGGGGGGGFGGGG=C=C?CCDCCBGEFGFFGEGGGGGFGGEFBGFGGFGGBGFDA?CEACECEE5@BB@CGEGFFF?FDD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_361/1\t65\tchr5\t58194288\t37\t91M\t*\t0\t0\tTTATGAGGGAGAAAGAGAGATTGCACTGAATGGAGAAGTAAATGGTGATGAACAAGATGCGACACTATTCTTTCAGGTACTTTGAAAGAAA\tAC=CAACC==DD:DDEEBAEE?EAED:ECED?C5D@CBBDEE:BA:>7@>AC5??;:8<)CC@?>AA:A,6;,95CC=?@C66@:@*>9;@\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:59A31\nfoo_362/1\t83\tchr20\t39570998\t60\t91M\t=\t39570641\t-448\tATTCACTCACTGAATAAGTTATTTGATAAGTTCAACTAATTAGGTCTTTATACTAGCTACTAACCAACAGAAACATACAGTTCTACCTATG\tAE:?CEEFDFAEFEFFEEGEGEFEEEAFDDD?FFCFEGFEEFGEDEGGEGGEFFGGEGGFGGGGGGGGGGGGGFGGFGGGGGGGFGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_362/2\t163\tchr20\t39570641\t60\t91M\t=\t39570998\t448\tAAGACAGTGTGGGAATTGGTGGTTGTACCCAGTGAAGAAATGGGAAGACTCAAGAAATTACCCACCCAGGCTGAGCTGACAGAAGCCTTAT\tGGGGGGGGGGGGGFGGGFGDFFFFEFFFFF=EEEEGGGAGFFEEEFFFFFGGFGFFGGGGGGGGGGG>GGEGGGGGGFGFFFEFFCDG?AG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_364/1\t99\tchr14\t26689993\t60\t91M\t=\t26690365\t463\tGTGATCCTTTGGTGATATCACTACATTCAGATTTTTCATGGTGCCAAAATTCTTGCTCTGTTTCCTTGTCATCTGGAAACACCAGCACTTT\tGGGGGGGGGGGGEGGFFFFBGGGGFGGGGGGGGFGGGFGEDEGGGGGDAFGEGGEFFGGGEEGDGEFFBDEFFFDE=EEE?FBCFGBFGGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_364/2\t147\tchr14\t26690365\t60\t91M\t=\t26689993\t-463\tGAGCCAGCTGAGGCCAATGTGACTGGGTATATACTAGATCCTTGATTACTGGAAGAAGCTCTCCGTTGCCTCAGGCAATGAGCTGATTCCT\t>BCDC:EE:DDDDE5DDDEFFEEAGGF?FFDGGGGGEDE5EEFGFFGFFEEGEDGGDGGGGFGGGEGFFGGGFGFGGGDGGGGGGGFEGGD\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:49C41\nfoo_366/1\t99\tchr3\t159878292\t60\t91M\t=\t159878696\t495\tAGAGATTTTCAGTGTAGATGAAACATCCTTCTTTTGGAAGAAGATGCCATCTGTGACTTTCATAGCTGGAGAAAAGTCAATGCCTGGCTTC\tGGGGGGGGGGGGGGGEGEGGGGGGGGGGGGFDGGGGGGGGGGGGFG?GGEGGGEEGGGGFFGGEBFEECDFCGEFF?EFFFFDFD?=:@DA\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:72G6G11\nfoo_366/2\t147\tchr3\t159878696\t60\t91M\t=\t159878292\t-495\tTCCCCCCCCCGCCAATACAACATTCATTCTGCAATCCCTGGATCAAAGAGTAATTTTGACTTTCAAGTCTTATTATTTAAGAAATACATTT\t#######BEA?GGGADFFFFADGGDGEGGGGGGDEGFEDFF=FG=GGGGFGGGGGGDFGGGGGFGGGGFGGGDGGFGEGFGGGGDFGGGGG\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:1T2A0T0G84\nfoo_368/1\t99\tchr12\t42102681\t60\t91M\t=\t42103053\t463\tTTCTATACAAACCTTTATTGTGATTTTTAAAACTTCAATACATTCAACTTATCAAAAAAATAAAAATACTACATTTCTTCATAGTGCAAAG\tGGFGGGGGGGGGGGGGGGFGGGGGGGGGFGGGFGGGEGDGF:FFFGBGGGGGGGEGG=EGCCCCA=CCCFGGGFFGFFGGGDDGEG=GDFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_368/2\t147\tchr12\t42103053\t60\t91M\t=\t42102681\t-463\tAGGTTGATAAACCTTAAAATGTAAAATATCAAAGGATCAAAAACATCTAAAACTGATCTGGAATGAATTATTTGTAATTTTAATATTCTAT\tGGGGGFGGGGEEE=GGGDGBGEEGGGGGGGGGGGEGGFEGGGGGDEGGGDGFGFGGFGGGGGGFGGGFGGGGGGEGGGGGGGGGGGGGGGF\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:56C34\nfoo_370/1\t83\tchr21\t15766367\t60\t91M\t=\t15765965\t-493\tCTGGACTTGACCTTTTAGATATATTTTGGTTCTGGTCTTGGCTTTTGGGTAAGTAGCTATACAGAGTTCATCCCTCACAGCCAGTTCAAGC\tA?C@?CCBB?C@DDCAAEEEEDDEFDDFEF:EEEBBDFEEDAGGGEGGGFGGFEEGFFFDEECEFFEEEEEEEEEEEGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_370/2\t163\tchr21\t15765965\t60\t91M\t=\t15766367\t493\tTTCTTACCCCAGAGAAAGTGGAAAGTCAAACCCTGATGCCCATATTTGCCTGTATCATGTCCAAGAAATTATGGTAAATTGTGAATCAGTC\tGGGGGGGGGGGGFGGGGGEGGGGGGGGGGFGGFGGGGGGGGFFGGGGGFGGFGGGGGGFGGEGGGDGFGGGFGGEGEGGGGEGGDGGEGEF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_372/1\t99\tchr4\t146338059\t60\t91M\t=\t146338451\t483\tAAGCCTCATTACCCTCTCAGAGGTGCCAGGGAGCATGCACTTCTCAGATGTCTTATCCCCAGTTCAGGAGGCCCTTCCTCCAAGGAGATGA\tGGFGGGGGFGFGGGEGDFGGBCC6CEEEEAFGFBE?FFFFGFGFEBFDEGGDED=FDEAFDEAEEEDD?A8>5A<CDBBCEEDBE:CAABB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_372/2\t147\tchr4\t146338451\t60\t91M\t=\t146338059\t-483\tAAATGGGTAAGTTTTGCTCCTTGGTTAACTCAACCCAAATCATCTTTCTAGCTAAATTTTTAAGTTCATCTCAGTTTGAACTTATGCACTT\tB=A-CABEE=:BBBD5E:FEFDFFDFDCEADFGDFGBEGEGGGGFGEE?EEAGGGFFDGFGGDGGGGG=GGDFGGGDFGGGGDGGGGAGGD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_374/1\t99\tchr5\t123282279\t60\t91M\t=\t123282651\t463\tATAAGTGAGATCGTGCGGTATTTGTCTTTCTATGCCAGACTTACTTTACTTAACATAATAACCTCCAGGTTCATCCATTTTATTGCAAATG\tEEEEEEFFFEFGGGGGGFFGFFFFFGGGFGGGGGGGDGGGGGGFDGFFGFGGGGGGGGGDGEBDFDFDGDGFG?EFAFDDGGEGEGBEGGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_374/2\t147\tchr5\t123282651\t60\t91M\t=\t123282279\t-463\tACCCATTTCTCATCTTGAATTGGACTCCCATAATTCCCATGTGTTGTAGGAGGGACCCTGTGGGAGATAATTGATTCATGGGGGTGGTTTC\t#E?EECGEDFGDEBGG@@=@=;:6B?DFGGGGFFFGGGGFGGGFFGGGGGGGGGEFFGBGDFGGGGEGGGGGGGGGGGFCEGFGGEGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:22T68\nfoo_376/1\t83\tchr8\t61453044\t60\t91M\t=\t61452663\t-472\tGAAGCTATAAGTGAAATTAATGCATAGAATGCATGCCATATAATCACGTGCACTTGCTTGGGGTGGCCTGAATATTTATCTCTGAGCTGTC\t###################A?9=7==>@=4C<:A,,??ADDDDD-DCC=5C?5AAA=<A8B;;;5;D?DDBBADDDAA:A5DDDDDDDDB?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_376/2\t163\tchr8\t61452663\t60\t91M\t=\t61453044\t472\tGTTAGAAAGCAACCCCATCCCCAATTACTTCTGTGATGTTTTTTGGGGGGTTGGGGACTCATATCCTGTCTGTGCCTGTCATTTTATTGCT\t?:?>A?A>AA=.=.=2;;1-2:=*>=C:@>?>C:ADADAAA6=::5?>=-/@2677(3?35+)70;<85;,36;3DDDD-?AAACFFC6E7\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:11T79\nfoo_378/1\t65\tchr7\t32304528\t37\t91M\t*\t0\t0\tAGTTCGGGGAGGTGTGATGCTGAAAACCAGCTTCCGCGCGACGCCGGGTCGGAGCCCGCGAGGGGGGCCGGGGGGGACAGCCCAGGGGGCG\tDFDGGGEEGDEE;EEFFDDEGGGGGGAFGFFDFFFGAGGEFFGGGE=D@ECD:DBEEAEEABDBD$4&<?@A=AA>3?CA:??-B######\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:65T21T3\nfoo_379/1\t99\tchr8\t75728528\t60\t91M\t=\t75728901\t464\tTAAATACAATGGCAAGTATAAAATAAAGTATTTTTTTGATTCCCCATTCAAGCAGTAATTAAATTACATGTACCCATAATGTACATTCCCA\tGGGGGGGGFGGGGGGGGGGGGGGGGGGFDGGGGGGGGEDGGGGGGGGGFGGGGFGGGDDGGFGGGGGDFGFFFDFFFFFFGGGGGGGEGGE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_379/2\t147\tchr8\t75728901\t60\t91M\t=\t75728528\t-464\tGAGAGAATCGTATTTGGGGTCTACAGAAGAGCAGTAGAGATATGCTTTTTTCTTCATTTTCAGTTAGAGGAGTTTCAAGGAAAAGGTTTGG\tGGGGFG=GGEGGGFGGEGGFAGGGGGGGGGGGFGGGGFGFGGGGGGGFGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_381/1\t83\tchr8\t53727412\t60\t91M\t=\t53727042\t-461\tTACTAAAAATACAACAAATTAGCTGGGCGTGGTGTCACGCACCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCACTTGAACC\t7AB7ACAC=>=AAAAC;;;;:5DADF@CDCF?@@?@?B0??C@@EAEFBFFECEEECGDGGGGFGGGGGGGGGGGGGGGGGF?FFFFFBFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_381/2\t163\tchr8\t53727042\t60\t91M\t=\t53727412\t461\tGCATATTCCACAATAAAAACTAATGTTCAGAAATTACCACTTGCTGAATTTTGATTTAGTAACAAAGAAGAATATCGACAATCATCTGAAA\tEE=EEFFEF5GFGGGGGGGGGEGFDGGGGGGGGGGDGGGDGFGFGFFFFFGGDAGGGEGGGGFGEGGGGGGDA=GFDFFDAFAFFD?EEEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_383/1\t83\tchr6\t132570259\t60\t91M\t=\t132569891\t-459\tTTTATTCCATTGTGGTCCAAAAGTGTGGTTGGTATGATTTCAATTTTCTTTTAATTTATTGAGAGGTACATTATGGCTGAGCATGTAATCA\tGGFFGEGGFGGEAFEFBDFFDFFBFFGGGEFGGEGGGGGGFGGGAGGGGGFFGGGGFGFGGGGGGGGGGFDGGGGGGEGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_383/2\t163\tchr6\t132569891\t60\t91M\t=\t132570259\t459\tTATGAATTTCTGGGTCTCAAATTCATTTATTTCTGATTTAATTTTAGTTATTTCTTTTCATAAGCTAGCTTTGGAGTTAGTTTGTTCTTGT\tGGGGGGGFGGGGGGEFFFEFGGGGGGGGFGGGFGGEFGGGFFFFFGEFGGGGFGGGF@GGEGGGGGEGGGFGG?DECFEE??DD@GGBGDG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_385/1\t99\tchr9\t127316361\t60\t91M\t=\t127316726\t456\tGTGCTCCCACACAGCGCCTTGTTCCTACCTTGGTAGGAACACTCATTGGGCTCTTCTATAATTTACCAGCTCAAGCTGCCTCGCTGCCTCC\tFFFFFFFFFFFFFFEFFFFFEEE9E>.@C:>2944?=5<.87782>A=<C==>>A1428-847<))7631::*:6@5>>@4;-71><>>?>\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_385/2\t147\tchr9\t127316726\t60\t91M\t=\t127316361\t-456\tAGGAGCTGAGGCTGAGAAGCTTGGCCCAGATTGTTGGTCAAGATTTGGATTTTCTCTCATATGCAACAGGTGCTAACAGAGCAGGAAAGTG\t9EEEEE?GGGGGGFGGDEEE?DFFF=GFGGGDGGGGGGGFGG?GGFAGFGGGGGFEEEGEG?GFDGFDFFBEFGFEGGGEFGGFGGFGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_387/1\t99\tchr13\t100847627\t60\t91M\t=\t100848010\t474\tTCCTGCATGTATCATGGTTAGGGGAAAGACAAGCACTGCAAGAGGCAAAAACATGTCACATTATATAACATTATAAAATATGTTCTAGATA\tGGGGGFGGGGGGGFGGGGFGFFFFDFGDDGEGGDGGGGGGEG=GGGGGGGFBCCFDFFFFGGGFGEEDEEEEEEEGFGGGFGEEFFGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:78C12\nfoo_387/2\t147\tchr13\t100848010\t60\t91M\t=\t100847627\t-474\tTATATACCTAAGACAATAGATCCATAAATATATGTATATTATATAAATATACACACGACCTACAATGTATATAATGTAAATATACACATAA\tFGFGEEEG?FGGGGFFEGFBBG=FGGFGGGGGGGGGEGFGGFDDFFEEEEEEBDAFGFGGGDGGGGGGFGGFGGGGFGDGDGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_389/1\t99\tchr18\t12919076\t60\t91M\t=\t12919448\t463\tGTTGGTGAAATACAACCAGCATTTTTCACGCCCATCGATTCCGCCCGTGGTCAGGAAATAAGAAGGGGCATAGCAGAGCAGCTCTTGTCTG\tGGFGGFGGGGGGGGGGGGGGFGGGGGGGGGEGGGGGGDGGGFGGGGGEGGDFBFFEEGGGGGGFFEEGAEEFEBEFBDGFG?GEEGEEEEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_389/2\t147\tchr18\t12919448\t60\t91M\t=\t12919076\t-463\tAAATACCACAGATGGGTGGCTTAAACAACAAATATTTATTTTTTTCTCACAGTTTTGGAGGCTGGAAGTCCGAGATCAAGGTGCCAGCAGG\tCCBCAEFDFEGGFGGFGGAFGGFGBGGFGGDGGGFGGGFGGGGGGGDGDEGG?GGGGGBGGGFGEGDGAGFGGGGGGGGGGGEGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_391/1\t99\tchr3\t188423127\t60\t91M\t=\t188423505\t469\tGACTTGAAACTATGGCTCCTTACTTTTAGTTAAGTGTTTTTTTTTTTTTTTCTTGAACTGATAGGAATATGAACTTCTAAGGGTAACTGCC\tDDDDDDDDDDDDDDDDDDD=C=@ACCC5C@C;>>:D@DDDCCCCCCACCA#########################################\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:50C30T9\nfoo_391/2\t147\tchr3\t188423505\t60\t91M\t=\t188423127\t-469\tACCCCCCACCCAAGCCTCAAAATCCCTAAGGGAACAATACGGAGCAGGAAGGCCCAGTGCAGAGCTGAGGTGTCACTGTCTGTAGGTTATA\t######?;A?-FFFFFGGGEGGGEGGGGGGGGGGGGGGGFGGGFFGGGGGGBFDGGGBGGGGGGGGGGFGGGGGFGGGGGFGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:1A89\nfoo_393/1\t99\tchr22\t29351711\t60\t91M\t=\t29352101\t481\tAAAGTGCTGGGATTCTAGGTATGAGCCACCCTGCTCGGCCTATAATGGCACTTTCCTATCCCATTGATGAGGCTCTACTCTCATGACCTAA\tFFGGGFFGGGGGGGGGGGGBFEDFDDGGFGGGDFFGFGGGGDFFGGEAFBECFFFGGGGGFE5FF@:@@?EEFEGFBDGFEE?DB8;C>?>\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_393/2\t147\tchr22\t29352101\t60\t91M\t=\t29351711\t-481\tGTAGTGAGTGAAAGCTGACTCCTGGGAGACTTCTGCGTGGTCCTGGTTCTCTCTCCAGACTGCACTGCGCAAGTTTCTCTTCCTGATGGTC\t?B-EABDBEAEC::AA@AE5CCEFFEBFDDF?AAAA=5EE?FFFFFEEE=EBFCFFAC@C:EECEE=FDFGGDGGGFGGGGGGGGFGGGEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_395/1\t83\tchr16\t31206541\t60\t91M\t=\t31206168\t-464\tCTGAACAGCTTGTCATAATTCTTTGCCAATAGTCAAATTCTTCACGTGGTCTCTCTAAACACATACTGGCCATTGATGGTGTCAATGCCTG\tFEEF?FBBFGGFF?EGFEGDEG=EEGGGGEGFGGGGB?EEECFEGGGGGFFEGDGGEEEEEFDFBDGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_395/2\t163\tchr16\t31206168\t60\t91M\t=\t31206541\t464\tATTTAGGCTCTTTTTTTGTTCCGTATTAATTTTAGAATAGTTTTTTCTATCTGTGAACAATGGCATTGGTAGTTTGATACAAATAGCATTG\tGFGGGGGGGGGFGGGGGBGGGGGGEGGGGGGGGADGGGGGFDFEEFFCDFGFGDGGGGGGFGGGGFFGDEFDDFFBGGGGGFFGGAGGAGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_397/2\t129\tchr2\t114554339\t25\t91M\t*\t0\t0\tCGATCATGAACAACTAACTTATTTCTCACAGGTCTGGAGGTTGCTATGTCCAAGATGAAGGCACCAGCAGATTTGGCATCTGGAGAGCGCC\t673777<7,,,>67;,77775AA?D@?5AC7+76,;:A;;B6?@>9;;5<3<?6)?A>?A:CA?A?#########################\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:2C28T51T3G3\nfoo_398/1\t83\tchr3\t146074136\t60\t91M\t=\t146073777\t-450\tGGACAGATGTGGAAATGGAGAAAACAATTTAAACAACATATTTTCTGGCTGTTGATTTTTTACTTGACCTTATGAGTATTGGTTAATTTGT\tGGGFGGGGGEGGGGFGFFGGGGGEGFGEEFGFFFGFGGGGFGGGGGGGFGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_398/2\t163\tchr3\t146073777\t60\t91M\t=\t146074136\t450\tGTAAGTTCTTTATATGTTTATTTTCTGCCAAGGTAAAAACTAAACCAAGTAAAATTGGATTCTTTATTTTAGAAAATATTTGTAATTCACC\tGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGAGGGGEGGGGFGGGGGGGGGDGGFFGGGGGGGFDGGGGGGDGGGGEGGFGEGGGGFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_400/1\t83\tchr2\t177102151\t60\t91M\t=\t177101786\t-456\tCTTCCCCTACCCCCTCCGTCCTCCTCCCATACCCTCCTGGAATCTAGACCCAGTGATGGAGGATCACCCTAACAATGCAGCGTTCCCTCTC\t######??*;<BCBBDBB=CB?C@?@EDEE>FFFDFGGGGEGEGFGGEGFGCGGGGGGGFGGGGG?DGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:5A85\nfoo_400/2\t163\tchr2\t177101786\t60\t91M\t=\t177102151\t456\tCATGATAATTGTGTAAAACACAGGGTGCGGTGAACTAGTCATTTTGACTGCAAGAAAAAGTTATATTTAACTTGTTAGAGGTTCAAAGTTT\tGGGGGGFGGGGGGGGGGGGEFFGGGCEAEEBFFFFGGGFGGDGDDGGGFGEGEGFGGGGGEGGGFGFGFEGGGGGGGFGGGEGGFDFEEFC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_402/1\t99\tchr22\t25528389\t60\t91M\t=\t25528784\t486\tTACTGGGAACCAGGCACATATATTATCTCGTTAATCCTTATATCCACCCTGAAAAGAAGGGGTTACTGTTAGCTCCATTTTACAGATGAAG\t?(666*..77645?6B6=2BBAAA:BB@@B40:BB5::57B##################################################\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_402/2\t147\tchr22\t25528784\t60\t91M\t=\t25528389\t-486\tGAGTTAGAAGCTATTACCACGCCCATTTTACAGATAAGAAAGCTGAAGCCCCATGAAGAGAAAGGATCCTACTGCTAGTAAGAGAAAAGCT\t###########################################A>>>AA:CB=:B=?@@6=737675;>:>=AA>?:;==>?=;?9A?5=:\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:16G24A49\nfoo_404/1\t99\tchr3\t175457907\t60\t91M\t=\t175458291\t475\tTATTAATTGTCCTATTATAAAGGTACATGCACACATATGTTCCTTGCAGCACTATTCGCAAATAGCAAAGACATGAAATCAACCAAAGTGC\tFEEFGGGFGGGGGDGGBGFFGGGGGEGFFEFFFEDFFFFFGFGGBGGGG=GDGGGGGGGDFGFGGGFGGGDGGGE=AEEEGGFGGDFGEDF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_404/2\t147\tchr3\t175458291\t60\t91M\t=\t175457907\t-475\tCATGTGTGATGGTTAATACTGAGTGTCAACTTGATTGGATTGAAGGATACAAAGTATTGATCCTGGGATGATCTGCGCAGCAACTTTATAT\t=5?DCFGGFGAFFBECC=BDDCAEEAEECEDDFAFDEEEAEDDDDDDD?C?DEFFF?FDFBFBFFFFDFDDC?ACADDDD?EED?EEFFBG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_406/1\t99\tchr1\t16990046\t15\t91M\t=\t16990439\t484\tCACACAGCTAGCTAGTAAATAAACTTTAAAAAATTGAAGTATTATGTACATAAAGACAGGCATAGTGTACAACTAAGTGGGTTATCATAAA\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGFGGDFFFFFGGFFGGGFGGFGGGGGGGEFEGEEGGGGGGFGGFGGGGGGGGGGGF\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\tXA:Z:chr1,+16990046,91M,0;\nfoo_406/2\t147\tchr1\t16990439\t23\t91M\t=\t16990046\t-484\tTTTATTTATACATTTTTCATTTTGCTGGTTATTTGAGTAGATTTTAATTTTGCCCCATGACAAATAATGCTACTTGGAATGTTCTTGGATA\tGGGGGGDFFEGGGGEGGGGGFGGGGGGGFEGGGGGGGGGGGGGFGGGGFGGGFGGGGGGGGGGGFGGGGGGEGGGGGFGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:23\tAM:i:0\tX0:i:1\tX1:i:1\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\tXA:Z:chr1,+232993482,91M,1;\nfoo_408/1\t83\tchr5\t104564853\t60\t91M\t=\t104564470\t-474\tGAAGAAGTTGAATCTCTGAATAGACCAATAATAGGTTCTGAAATTGAGGCAATAATTAATGGCCTAACAACCAAAAAAAAGTCCAGGACCA\tEGG:GFGEGGFFFFDFEBEECGEEGEGGFGGFGGGDBFDGGGGGGGGGG?GGGGGDFGGGGEGGGGGFGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_408/2\t163\tchr5\t104564470\t60\t91M\t=\t104564853\t474\tCCACCCTAACATCACAATTAAAAGAACTAGAGAAGCAAGAGCAAACACATTCTAAAGCTGGAAGGCAAGAAATAACTAAGATCAGAGCAGA\tGGGGGGGGGGGEGGGGFGDGGGEGGGGGGGGGGGGGGGGGGDGGDGGGGGGGGGFGDFGGDGFGAGGFDEFGDGGGGDEGGFDGGGGGFGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_410/1\t83\tchr2\t135252137\t60\t91M\t=\t135251766\t-462\tGAAGAGGAAAGGGGGCAAAAGATCAAATGCAAATGCCTTGGCTTGTCCTGGGGGGTGGAAACTTCCAGAAGACGTGTAATGAGCCCACTCT\tGBDEDGDBE=GGGGGEEDAEEEFGEDAFGEGGE?EGBEBF?EAGGEEGGGGGGGEFEFFFFGGEGFGGGGGFGGFGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_410/2\t163\tchr2\t135251766\t60\t91M\t=\t135252137\t462\tCACCCGGCCGATATAATTAAGACTTTTAAGTTGAAGGCTCATTGTTCTCTAGTACTGGTACTTGAATCAAGGAATGCTATTTATCATTACC\tGGGGGFFGGGGFGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGFGGDGGGEGGGGGDGFGGGFGFDEFEGBGGGBGGFEEEGGGDGGE=GG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_412/1\t99\tchr2\t85234289\t60\t91M\t=\t85234672\t474\tTTAATCTCTACACTGCACTATTCCTAGGACACTAACTTATGTTTAATTTTGGATTGCTAGTTTTCTAGAATACATATTCTTAAGTATGTTG\tGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGFFGGGGFBDGGGGGEFGFFEDE@EFFEEEEEEEEEGEEGGGGGEGGEE=ECEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_412/2\t147\tchr2\t85234672\t60\t91M\t=\t85234289\t-474\tTCGTCCCAGAATTGAGGCTGTACATCCTTTAAAAATAAATAAAGCCGAGGTCTGTGCTAGAGTTAGAACATTGAATCTGGCTCGCTGGTTT\tAA??@?DGGGFEGEFGDFEFBEGGEGFFDFGGGGGGGGGDEGGGDGGGFGGGGGGGGGGGGGGGGGGGFGGGGDGGGGGGGEGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_414/1\t99\tchr11\t83587338\t60\t91M\t=\t83587723\t476\tAATAAATGGTATACCTGAATATGAGTAAAATAAAAATTATAAAGTGGAGTGCTAACCCTTTATCACTATTGGGGAGACAAGGACTTAGAGA\tGGGFGGGGGGGGGGGGGGGGGGGFGFFGGGGGGGGEGGGFFEFFCFFEFCGGGGGGGFGGGGFGFFGGDGGGFGEGGGEGGGGGGGFGFGE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_414/2\t147\tchr11\t83587723\t60\t91M\t=\t83587338\t-476\tGTGCAGTATTATTTGCACGTCATAAAAAGTCTTCCTACCTTATTCTTTCCCAAGTTTACTGAGAAAAGGGAAGTGAAGATAGGCTTGAAAA\tEEGEGGEEFEEGFEFEEGGGGGGGGGGDGGGGFGGGEGGGGGGEFGGGGGGGGGGGGGGGFGGGGGEGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_416/1\t99\tchrX\t65347845\t60\t91M\t=\t65348206\t452\tATTTGTACAAATGTAAGGAGTATATGAGAAATTTTGTTACATGTATATTATGTATAGTGATCAAGTCAGAATATGTACAGTGTCCATCATC\tFEFDFFFDFF?EECBCCCCCBEEEEEEBEED,DDDECCCEFF=DFE=EEEFFDFEEEBE-@>C=C==@@>EE=EE@;@@>ADBACBD55A7\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_416/2\t147\tchrX\t65348206\t60\t91M\t=\t65347845\t-452\tTTTCCATCCATGTTGCTGCAGATTATATGATTTCATTCTTTTTTATGGCTGAATAGTATTCCATTGTGTATATATACCACATTTTCTTTAT\tED5:BC?B:5?DEBEF?BDAD=?@;>@;@==DB=<>BB?BE@@@<>ADDD?DDBD?DA?BDFEBBFEEEDDC?CCAACAC=8;>=:;;9@>\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_418/1\t83\tchr7\t13465890\t60\t91M\t=\t13465514\t-467\tGCTGGAGAATCTCCCTACCTTAAAAGAGCTTGGTGTTGGAGAAGCCAAGCTCTGTGGGGCCTTGCTACTGAACACACCAGAGCCAAAAAGA\t############AA:>;>=-7CCBAB:C@DDFCCEEAFDECEEEEAEFFFDGDBGGGGGGGGGDGGFFFFFFFFFEFGGGGGGGGGGGGGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_418/2\t163\tchr7\t13465514\t60\t91M\t=\t13465890\t467\tAGGATCGAAATATTGAGAGATTGCCTAGTAAGAAGTAAGGAGAACTCTAAGGAACATAATGAGTGCAGATATAAGAAGACATTTTCCTTAG\tFDGBGFGDGGDFGGGFGGGGGGFGGGGGDGFDFFBDFEDFEEEEEGFDGFGDFGGDEGBEFGGDGFFF5FF?FDFGGGGGFGFAFEFGBFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_420/1\t83\tchr11\t98265841\t60\t91M\t=\t98265465\t-467\tCATGAAAACAGCAAGGGGGAAGTCCATTCCCATGATTCAGTCATCTCCCACCAGGCTCTTCTTCCCACACTGAGAATTACAATTCAACATG\tFGFGEGGEGFGFFFGGEDGFEGAFGD?GGFGEGGGFGGDDGGEFGGGGGFGGGGGGGGFGGGDGGGGGGGGGGGFGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_420/2\t163\tchr11\t98265465\t60\t91M\t=\t98265841\t467\tATTTATTTATGAAGATGAAATATCTCTCCATTTATTTATTTGTTTGATATCTTTTTATCAGGGGTTTATAGTTTTCCTCAAATGTTTTTAC\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGFGGGGGEFGEGDGFGGEGGDGGGGGFGEFGFGGGGGGGBFGDFGGGGFGGFG6E\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_422/1\t99\tchr14\t70232998\t60\t91M\t=\t70233391\t484\tTCTTAGTGTTATGAGGTGCTGTGGAGAAACAGAGATGCACAGGATAGACCCCAGCCCTGCGCAGTAAGAATGGGAATAAGAATTACCGCTG\tEEEECEBEDEEDE::@9>??C>A?A>>B9>BDBDCBAEEEEAEE=D=:B?>@:@>BA:A-1..60??<:1-9;5):,5?7>@@C#######\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:59A26A4\nfoo_422/2\t147\tchr14\t70233391\t60\t91M\t=\t70232998\t-484\tGATTTGCTTGAACCTGGGAGGCAGAGGTTGCAGTGAGCTGAGGTAGTGCCACTGCACTCCAGCCTGGGCAATAGAGTGAGACTCAGTTAAA\t#################@:69AAA5A?A-A?AA<A?<;*:99A-;964=<?74B13?>?A??A:B<DBDAD4@>;@CCC??EEEAEE?E5F\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:2A88\nfoo_424/1\t83\tchr4\t117406443\t60\t91M\t=\t117406043\t-491\tTAGTATTACCATGCCCTGTTATTAAGGTCAATGGGAAAATACAGCAGCCCAATCCCGGCAGGACTACAAATGGTCCAGATCCTTCAGGAAT\tGGGEEFE@GDEFGFDGGFEEEEEGEFEEEEEDEEEGGGGFEGEGGGGGEGFGEGGGFFF5FGFGGGGGFGGGGGGGGGFFGGGGGGGGGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_424/2\t163\tchr4\t117406043\t60\t91M\t=\t117406443\t491\tCAATATATGGTACTCTTTCTCCCATAGCCAGGATTCCCGGGTCCACGAATCAAGGGGTGGAAGTAGAAGTGACACCACTCACCATCACCCC\tGGGGGGGGGGEGGGGGGGGGGGGGGGGDGBGGEGGGGGGGGEGGGGFGDGFBGFBEE>EA>DCCCECECBCBBB5DCBAADFED5EEEDEG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:37T53\nfoo_426/1\t83\tchr2\t108096607\t60\t91M\t=\t108096235\t-463\tAGTCCCCTTCTGAGGGATATGTACAGATGGATTTCCCACCTTGCCCGGGATCTTAAGGCTGGCACATGTAAAACTCCTGGGTTTTGGTAGG\tBGBGAEGGGGGGGGFFEFDEGFEEGGF:FDF=FEFFFCD?FGGGGGGGGEFGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_426/2\t163\tchr2\t108096235\t60\t91M\t=\t108096607\t463\tTCTGTGGGTAGAACTCTTGCTGGAGTGGCTGAAGCCCCCACAGGAAAGTCCCACCCAGTGAGGAAGAATGGATTCAGGGGCCCACTTAAAG\tGGGGGGGGGGFDGFGGFGGGFGGGG?FFFFC@C--DCDD=><DD=CCC?CEE?BE>)@:>8>@9>?5A>>A-:?#################\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:79T11\nfoo_428/1\t83\tchr16\t3069959\t60\t91M\t=\t3069588\t-462\tTTCCCCATTCCCACATACGGCACAATCAGGAGCTGTGATACACATTCTCCTGGCTCTGCTTTCCAGGGAACAGAAGTAGATATAACAGTTT\tE=DDCCCB=EEECDEECEEDFCCCC=DDGDEEGGGGGGGEGEGGGBGFGGDGFGGFGGGGGDGDGGGGFFFGFGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_428/2\t163\tchr16\t3069588\t60\t91M\t=\t3069959\t462\tCTTGTGCTCCCCATTGTTGTAATAAATCTCTTCTCCATAAATTTATAGGCACAGAAGTTATAATTGGTTGAATATTCCCAGGTTGTCCATC\tGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGFEFFFFGGGGFFGGGGGGGGGGGGGGGGGGGFGGGEGGGGGEFEFDDFFFEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_430/1\t83\tchr17\t7727714\t60\t91M\t=\t7727353\t-452\tCCCGCTTCTCATTTAAAAGATATGTACACAATTAAATGCGTTGTGACTTCAATGAGGTATGTAGAAACAGAGTTATATATAAACATATATT\t=GGBFGEDGFGEEEDEEGEEEFFCFDBBEGGGBGGFEEGGGFGGGGGGFGGGDFGFGGGFGGGDGGFDGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_430/2\t163\tchr17\t7727353\t60\t91M\t=\t7727714\t452\tGAGATCATGGAGGGAAGGCAAATATACTGAAACGGAAGAAAAGAGAACATAATTTCACGATCTTCAGATTTGACTCTTTCCTCTTTATCCT\tGGGFGGGGGGFGGGGGEGGGGGGGGGGGGGGEGGGGFGGGGGGGGGFGGGGGGGGGGGGGGGGFGGGGGGGGGEGBEFEAGFGDGEEBFEF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_432/1\t99\tchr9\t7161074\t60\t91M\t=\t7161455\t472\tTTTTTTCCAGTCAGATGGGATCCTGACAGCAGAGTAACTGCATCAGGTGGGAACTGACTTTCCTCTCAGCTGCATCCTTCAGAGGAAGGGC\tGGGGGGGGGGGGGGGGGGGGDGGGGGGFGEGGGGFGGGGGGGGGGGG@GGFAFFFGEFGGGEGFGGFEFGE=EGEGGEGGGFGFGBEEE##\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:89T1\nfoo_432/2\t147\tchr9\t7161455\t60\t91M\t=\t7161074\t-472\tCTCCCCTCTTCCCTGCCTTATTCCCAGCTCCTCCCGCTCCTTTCTTTTCCTTTTTCAGCACCTAGAGATTTAGAGGATGGGGAGTGGAGTA\tA-7??6.<.6=EB::BFEEEF:FFFEGG:GFEGGFGFDGGGGBFDGGFGGGFGGFGGGG?GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_434/1\t99\tchr14\t53456345\t60\t91M\t=\t53456729\t475\tAGGGGTAAAAAATTATTTTCCATCAAATAATAATCATTCTCCTAAAATCTTAGTCTCGCATTCCTTGAAATTAAATTTTTTCCTAAATTTT\tFFFEDEGGGGGGGGGGGGGGGAGGGGFEGGGGGGGGGEFDGGEGGDFGGGEFEEGEF:DADECECGGADDGGGFFGGGGGGGGGE=FFFDG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_434/2\t147\tchr14\t53456729\t60\t91M\t=\t53456345\t-475\tTTGATCTAACACTCACGTAAAATCTGTTTACTAAGCTCATCCGCAATTAAATAAATACAAATAAAAACAACAGAGAGATACTTTTGCCATC\tGBEBBEEEEE@GFGFFGFEEGDGBGGGBDFGEGEFGGDGDGGGGDGGGDGGFFGGDFDGEGCGGGGGGGGFGGGFGGGGAGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_436/1\t99\tchr6\t62612893\t60\t91M\t=\t62613264\t462\tCTACTTGACAAGATAAACTCTAGATATTTTAATTATTCAGTTTTTACTTAAAATGGTAGATTTCATGACATATTTTGTTTCGGAATTATTG\tGDGGGGGFGGGEFCFGGFDDFCCFFGGDGDFDEBAEFFEFEGGE4CBBBCFGGGE@:=@?@BB=AED@ADBBBBBEB??>(:154BBCCBB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_436/2\t147\tchr6\t62613264\t60\t91M\t=\t62612893\t-462\tCCCGGTTTCGGGCGATTCCCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGTGCATGCCACTACGCCCAGCTAATTTTTGTATTCTTA\t###################?9E=EEDC>=@;AACA?CAC:CAAAB?EADEEEEE:ECAC5CDDDD?DEEEDFEFFFEBFFDEEE?EDFFFF\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:5G4A7T72\nfoo_438/1\t83\tchr1\t176775580\t60\t91M\t=\t176775191\t-480\tTGTCTTTAAACCTAGGAAATAAGGCCAGGCATGGTGGCTCATGCCTGTAATCCCAGCACTATAGGGGGCTGAGGAGGGTGGATCACTTGAG\tGGEBGGFBGDGEBGGGB:DFEDEEGGEFEGFGGGGGEGGBFGGGGGGGGGGGGGGGGEGGGGGGGGGFGGGGGGFGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_438/2\t163\tchr1\t176775191\t60\t91M\t=\t176775580\t480\tCAGGCTGTGGTGAGGATACCGTGGCCGGGACTGCTGCCACAGGACACGGGATGCTACTCTCACAGATGGTCTCCTGGCCTAGCTGCCATGG\tGGGFGGGGFG:FFFFGGFFGFDFFDEDEE:EED=CBBEAC=BDBAADDDDB?BE?CCCC=ECEEA=CBC@DC?E:BC5E-@??>=AD?AAE\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:47A43\nfoo_440/1\t99\tchr2\t4083031\t60\t91M\t=\t4083401\t461\tTAACTCCTGCCTTCAGAAGCAGATACTGAGCCTCAAATCTAAGATTGCCCTGAGTCAGAGTCTTATTTCCTGTAGAGAAAGAGCTGACATT\tGGGGGGGGGFGGGGGGGGGGGGGGGFFG?GDGGGGGFGFGEGGGGEEFEGFFEGEGGFEEEGEEEEGGEGEECDEGGEEGGEEGEFDDAEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_440/2\t147\tchr2\t4083401\t60\t91M\t=\t4083031\t-461\tTTTTGTCTGAGGAGATAAAACCTGTGCTACCTGAGGCAACAGTGATGGCCACCCCGAGGCAATTGCTAGGCAAGATAATGTTGATTCTCCT\tAE:EEACBE5EADBCACCC?CD=D5@DCAD?EEFGGGEGBGFEEEDD@@C:GEGGGFGGGGAGFFGEGEFGGGGGGGGGGGFGGGGGGFGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:19C71\nfoo_442/1\t99\tchr3\t116679418\t60\t91M\t=\t116679795\t468\tGGGGCTAGTCTAGTCATCTTCAAGGAAATGATCATGAACAAAGAGTAAAGGACACAGGCATTTGGGAGTTGTTAGGAGAACTTTGACGTTA\tFFFFFFF??FFBFDFFDE?FDFDDFDEEAECEDEDF?DCEDDD=D@CEEED:EEEFBDFDFEFEFEBE@BC@C?BCC=?C=ABC::BA>A=\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_442/2\t147\tchr3\t116679795\t60\t91M\t=\t116679418\t-468\tGGTGCCTTTATCCAGGTAAAGAACAGTGTATAAAAAAGGGCAACAGATCTCCAACCAGAGTTGAGTGGTTGTGCAGAGACTATATGGCACA\tDBDEFFFF:EEEEAEBBEBEDF=FF?FGDGDEGEGGFDFDEFFDFFFBDEFGGFGFFF=FF=FFEADGGGGDBFFGDGDGDGGGGGGGFGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_444/1\t81\tchr4\t48956355\t15\t91M\t*\t0\t0\tAAGGTGTGAGCCACTGTGCCTGACCGGTGTAACCACTTTGAAAAACAACGTGGCAGTTTCTCAAAGACTAAATGTATAGTAATCACATAAT\t#######?@@C???@A?@C@A<8><2BABCCEGGCGGDGGGGGGEGGGGGFGGGGGFGGGGGGGGGGGGGGFGGGGEGGGGGGGGGGGGGG\tXT:A:R\tNM:i:1\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:1\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:25A65\tXA:Z:chr4,-48956355,91M,1;chr4,+11734865,91M,2;\nfoo_445/1\t99\tchr4\t22642402\t60\t91M\t=\t22642765\t454\tTGCACATCTGTTTTTGTTGATTGCATGGAATCTGCTGAAATATTCTTGAAAGTACAACTAGAAATAGGTGTAAAAACCACCTTTCCATTGA\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFEGGGGGGFGEFGGGGGEGFGGGGGGGGGGGGGGGGGGEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_445/2\t147\tchr4\t22642765\t60\t91M\t=\t22642402\t-454\tAAAGAGAGACTTGAGGAAGAGAGACAAGAAAGCTCTTGGAATGATTGAGAAGAAAATTAAAACGCCCAAAGTAAAGTACAATCTTATGTTA\tGFGGGGGGGFFGGGGGGGGGGFGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGDGGGFGGGGGFGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_447/1\t99\tchr3\t9479055\t60\t91M\t=\t9479407\t443\tAGGTCAAGAGATCAAGACCATCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAAATTAGCCGGGTGTGGTAGCGAGCAC\tGGGFGGFGGGFGGGGGGGGGGGGGGGGGGGGGFGGEGEGGFGGGGFGGGGGGEGGGGGGGGEEGGE=EDGEA?:?################\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_447/2\t147\tchr3\t9479407\t60\t91M\t=\t9479055\t-443\tACCCTGTCTCTACTAAAAATACAAAAATTAAGAATGTAGATATATGGGAGCAAGAATACCTGTTTTGTCTCTCTCCACACATCAAGTTTCT\tAFFF=FFGGEFGGGDGGGGGEGGGGGGFGGGGGGDGGGGGGGGGGGGGGGGGGGGEG=GGFGGGFFGFGGGGGGGFGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_449/1\t83\tchr18\t32443236\t60\t91M\t=\t32442861\t-466\tAGAGGCTGCAGGGTGGAACTGGGAGGGGAGGCCCAGGGACAGGTGGAAGTGTGGTAGAATTTGATGAGCCTATTAGAGAATCAGCCAAAAA\t=FEEEFECFFFEEEDEC@B3=CFCCEEEBEDAEEFFFEFFBFFDFFFFFFFDFFFFFFFFFFFDFFFFFFF=FFFFFFFFBFFDFDFFFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_449/2\t163\tchr18\t32442861\t60\t91M\t=\t32443236\t466\tACCTGTTGAAGGAGATTTGTGTGTTTATAGCTTTTGGCTTCCACAGATAGAGCTGCTATGAACATTCATGTACTGGCTTTGCCACAATGGT\tDGGEGGDGFGGFGFDEEBEEEEEEEFFDFFGGGFDGGFGGFGADEDF?FDBD=E:D?DB:DDE=EEABEBBDA:5==.?>8.?5?######\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_451/1\t99\tchr4\t75590706\t36\t91M\t=\t75591067\t452\tAGCAATATTTGCTGTTCTGCAGCCTCTGCTGGTGATACCCAGGCAAACAGGGTCTGGAGTGGACCTCCAGCAAACTTCAACAGACCTGCAG\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGFAGGDGGDGGGGGGGGGGGGGGGGEGEGGEEAEFCDFEGGEFEGGEEGGFDGG?FGEFEEFGE\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:11\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_451/2\t147\tchr4\t75591067\t37\t91M\t=\t75590706\t-452\tTAGGCTTCAGAAGGTCAGTAATAACAAACTTCTCTGAGCTAAAGAAGCATGTTCTAAACCAACATAAAGAAGATAAAAACCTTGAAAGAAG\tGFGAGAFGGGGFFGGGFGAGGGGFGGGEGGEGGGGGFEDGGGGGGGGGGGGGGFGGGGGGGGGGEGGGGGGGGGGGGGFGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_453/2\t145\tchr10\t103682977\t37\t91M\t*\t0\t0\tTATTTTTATCACAAGTAATAAAAAATAGGGCAAACATAGAAAAAGACTCTAAGAAAATTTCAGGATAAGTTTAGGAGGGGGTTTAGATGCT\t####@7>?>AC;:.84:;<A:;B5?A:5:-DACA::6774*AABB:C:A>A:+;40;A:?-?@@@@==7:=.<A1@B:=5;;5BB=:339@\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_454/1\t83\tchr5\t38679842\t60\t91M\t=\t38679464\t-469\tAGAAGCGAGTCTAGAATTTAGTCTCCTAAATGTGAAAGAGACAGCAAGAAGAATCATTCTATCAGGGCAGAGTGTAGGGAAACCCTAAGGC\t;=@;:9BD-DD:DDAD=CA:>5ED=CEBEEEDDAEECCC@A??D5DACB=EEEEDAAC:C:BD=DD?=CCA?FEDFEEDEE=EDDEDFEFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_454/2\t163\tchr5\t38679464\t60\t91M\t=\t38679842\t469\tGCACTGTCCATCTGTCCTGTGGCTTCCCAGGAGTTTAAGGAGTTTACTTAAACCCCTTTTCCTGGCTTTGCTCTGTTCTGCTGTTCAGTCT\tFC@CEEFEF?D?EEEDDEEE==:CCAEE:D;-;85A?A?A5AAAA?BCA:CD-DDEDEEEBD=?D6==C-;;;+;=@<-,;@@<@?5:CC=\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:52A38\nfoo_456/1\t99\tchr2\t142269256\t60\t91M\t=\t142269639\t474\tTAGGATATCTTTCTGTAGGTTAAGCAAGGTGAGTACCATTTGTCCTGTTTTTTATGCTGATCATAATCTCCTAGTCTCCATGCTGTCACAT\tC?CCCACC;?C?@@B4/*73@3096CCC;A6@6)C8:*:;4'5'56(5=0@0;22>C@?@9<AA>+:):?B3=?#################\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:17A23A1A2T44\nfoo_456/2\t147\tchr2\t142269639\t60\t91M\t=\t142269256\t-474\tACTGGGTGGCACATGTACAAATTGGAAAGAACAGAAAATAAGATGTAAGCCAAGGGGATGTCAATTCCAGTCACGGTTCTGATGTTTATTA\t###########################BFFFC93@C;3*455@42*A:A178.:0<>9C:@.6B:B:>6><>:@C@AA?5ADDD>BD:DDD\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:38G52\nfoo_458/1\t99\tchr11\t41351080\t60\t91M\t=\t41351473\t484\tATAGAACTTACTCTATTTCTTATTTAAAAATCTAGAGAAAGAATACTTTTCCATTTTTGTACAGCTATGACTATTCTAACACATCCATTTA\tGGGGFGGGGGGGGGEGGGFGGGGGGGGFGGGGGGGGGGGGGGGFGGGGGGGGFGFGGGGEAFFGGGGGGEEFGGFGGAGEGGGEGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_458/2\t147\tchr11\t41351473\t60\t91M\t=\t41351080\t-484\tTATTTCTTAAGATATACGTATACATACTTACACATCTAAATGTAGTACACTGTAAAACTGTCAGTTAAATTTTTAAACTATTAAGAAACAG\tFGGGEGGGGGGAGGFGGGFGFGGGGFGGGGGGGGGGGGGGGGGGGGGGGFDGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:49T41\nfoo_460/1\t83\tchr9\t117548613\t60\t91M\t=\t117548242\t-462\tGCATTGTGAGATTTCATCCTTGAGAGCATCTAGCAGTGGGAATGATTTAAGATGACAGCACAGCTTACTGTTCTATCCTTCAAATCTTTCC\tGGGGGEEEFEEDDGBEEEFGEGEFGAGGEFGGG=EGGGFEGGGGGGGGGGEGGGDGFFFDAEBFGGEGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_460/2\t163\tchr9\t117548242\t60\t91M\t=\t117548613\t462\tAGTTAAGATGTGGAACAAAAGGGAACACATAGATGCTGAAGTTAGGAGACAAAGATGATTTTGAGAACAAAAAAATTCCTAGCAACTAACA\tGFGGGGGFGFGGGDGGGGFEGFDGGGGFGFGGDGGGFDFGDGGEBGGGFGGFBDGGGGFGGGGBGGGFGGGGBGDDEGGFEGGGDGGGGED\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_462/1\t83\tchr1\t101394399\t60\t91M\t=\t101394004\t-486\tCCCTCTTTCACTCTTAACAGTGTCTCGGTTTGGACAATAAATTACATGATCACATTAATTTCTCCCATATTCTATGAGTGAGAATTCAATC\tFEDEFFDDFAFFGGDGCEEEE=EEEGGGFEFGEGEGGGGGGGEEGGFGGFGEGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_462/2\t163\tchr1\t101394004\t60\t91M\t=\t101394399\t486\tGAAAAACTGACATATAAACAGGCATTTAAAAATGTATGAGCATTCAGTTTGCTAGAGTTTAAATTCAAGCTCAAGACTGATGAAAGGAGAG\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGEGGGGGGGGGGGEGGGGGGGBGGFGGGGGGGGGGBFFGGGGGGGFEGGGFGFGEGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_464/1\t83\tchr5\t60820050\t60\t91M\t=\t60819675\t-466\tGGCTGTAAATCCATCTGATCCTGGGCTTTTCTTTGCTGGGAGATTCTTTATTACCGATTCAGTGTTGCCACTCATTACTGGTCTGCTCAGG\tGFGGGGGGEEFGFFGGGGBGFEGFBDCCECGFFGEGGGEGGGEFGBGEGEGGEFGGGFGGGGGGGGDGGGGGGGGGGGGGGGGGGGFGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_464/2\t163\tchr5\t60819675\t60\t91M\t=\t60820050\t466\tCTGAATTTTACAAATGGTTTTTCTGTCTATTGAGATAATCATATGGTTTTTGTTCTTTATTCTGGTATCACATTTCTCAATTTGCATGTTA\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGEGFGGGGGGGGGGGGGGGGGGGGFGGGGGGGDGGG?FFFFFGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_466/1\t65\tchr18\t23435820\t37\t91M\t*\t0\t0\tTAACATATATAAGTGCGTGTGTGTGAGAGTGTGTGTGTGTGTGTGTGTGTGTATTTTTCAACCTCAGGAAATTCACAACATGCTGCTTTTT\tGGGGGGGGGGGGGEGGGGGGFFFFFCECEBECECEFFEFFECEEEEFFFFFDEFFGGEEEEDGDEBFDACCDBEBGFFG=AD?DCEGGGEG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:44A46\nfoo_467/1\t83\tchr2\t198599324\t60\t91M\t=\t198598930\t-485\tGTTTTGGGAAAGTTAGATTTAATGGTTAAGTTTCTTATAATTGTGAATGACTTAGTGAGTTAATTGGAGGGAGAGAGAGAATTAATTGAAC\tGEEGGGBGGGAGGGEGGEEGEEDEFAEGFEDFEEGFFGGFEFECGEFFF@FGGGGGGGGFGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_467/2\t163\tchr2\t198598930\t60\t91M\t=\t198599324\t485\tAGTTAATCATATTTGCAATGGGGCCATTGTTGTCTCTTTATAGGTTTAGTTTTAAGGCTGGCATTAAAGAGTTTTGCAAAATTATTTATTT\tGGGGGGGGGGGGGGGGFGGGGGDFGFGGGFGBGGGGFGGFFFFBFGGGFGGGGGGGEGFGFDDGFF?EFFGCGFGFFFEEGGGBEBDDFFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_469/1\t83\tchr2\t214053692\t60\t91M\t=\t214053296\t-487\tTTGTACACTTTCCCTTTGAATATTTTTAGGTTTGCAACACTTTTATGTGATTATCATAGTGTTAAACAGAAGTTATAATTTACTGTCCAGG\tFCDC:CAEFFEDADDCBBEC?GGBGGFGEGGFGBGGEGDFGGGG=FGGGGGGFAGFGGGGGFDGGGGGGGGEEEE?GGDFGFGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_469/2\t163\tchr2\t214053296\t60\t91M\t=\t214053692\t487\tGTACTATTAGGCATATCCCCTTAGCCAATAATACAAATAAGTTAGCCTTGCTACAGCAAAGTTTTGCCTTGCCTTTGAGGAATGGAAGCAG\tEDEE?EEEEBF:EFFGGGGFGDGGGGGFG?GGGGGGDGGGFBDFFAFFFFFG?GGGGGDGFEFFFGGGGFGGGGGGGDGEGGGGABFFEEF\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:32G58\nfoo_471/1\t99\tchr4\t100638765\t60\t91M\t=\t100639152\t478\tAGTACCCACTGTGTCTCTACTCGAATCTCATGATTCTTGAGATGGACAACTCAGCTCCTCAATAGAAATGGGTATGTTTGGTTCCCACACC\tGGFGGGGGGGGDEGGGGGGGGGGGGGGGFGGGGGGGGGFGGAGGFGFGGGGGFDGCGGGGGGGFAGEGEFCE?CBEEEECE?EBEFFFEDC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_471/2\t147\tchr4\t100639152\t60\t91M\t=\t100638765\t-478\tAGTTTATTAAGAGCTAGTTTTCTACCAGCTTTGAAACGGATTTTCTATTTCTTCATTTGGGCACAAGATCATGAAGTTAACGTCTATTTAT\tBFDEBBGGEGEGFEGGF?AFFEEEGGEGGGGGGGGGGGGGGGGGGGGFGGGGGDEGFGGGFFD?GGDGGGDFGGGGGFGFGGFDGGGGGDG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_473/1\t99\tchr1\t187310324\t60\t91M\t=\t187310672\t439\tGACCTTTTTGGCCTTATTATGCATTTTCTTGCTTAGATTTAATTTAACTTAGATTTCTACTTTGTGCACAATAGCTACTTAGTTCCAGGCA\tGGGGGGGGGGGFGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGFGGGGEEGGGFGEGDDGGGFFFGDGGGGFGEGEGGGGGA\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_473/2\t147\tchr1\t187310672\t60\t91M\t=\t187310324\t-439\tTCCCAAAATTCTAGCTTGTATAGCTGCTAAGGAAGAGGTTCAAAACTTCAGTTTTTATACATGGGTCTGAAATTTAAGAAATAATAACTCA\tAGEFGGGGGEGGFFEFGGGDGGGFGGGAGGGGGGGGGGGGGGGGGGFGGGGGGGBGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_475/1\t99\tchr20\t52340463\t60\t91M\t=\t52340837\t465\tAGTTATCCTTGAATACTAGCGTGTACTCTTAAGTCTAGTTGCCTGAGTTCAAAGTCAATTGTCTGGCTCTGCCACTTACTTGAAGGGGCTT\tGGGFGGGGGGGFDFGGGGGFGEFEGFFGGFFGG?GGGGGGGGGFGFFGGGFD?DFEDD=BFEFFFGBBGADBFE?DED=CDDAEECD,A?D\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:87A3\nfoo_475/2\t147\tchr20\t52340837\t60\t91M\t=\t52340463\t-465\tAGATGGCGATGAAAGTGACCTCTGGTCGTCCTCACTGCTCATTATATGCTAATTATAATGCATTAGGATGCTAAAAGGCCCTCCCACCAGC\tBFGEGEEFGBGGEGDBEBGEEGDGEGEFGGGFGFEGDGEGGGGGGGFEFFEGGFEGFFF:DFGDEGGGGDDGGGGGF>@FFFGFGDFFFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_477/1\t83\tchr10\t12108214\t60\t91M\t=\t12107819\t-486\tTTAAAAATTGTATTTCACTAGTTTAAGGTGAAGAAGAAGTGGCTTGTCAGCAGTTCATATGAGAAATAAAGCTTTAGTGCAAATAAGTTTA\tE=FFFFAAAA9DD?AD>A>@CE-CFFGDFDGADEGFBAFDGGGGFGGGF=GGGGGFGAFDGEEGDGFGFDFGGGFGGFGEGGGGGGEGDGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:74C16\nfoo_477/2\t163\tchr10\t12107819\t60\t91M\t=\t12108214\t486\tAGACAGGGTCTCTCTCTGTCGCCCAGGCTAGAGTGCAGTGGTGCGATCATGGCTTACTGCAGTCTCGACTTTCCAGGCTCAACTGATCCTC\tDDADD=CC?CFFFFFFFAAFEEEEEGGGCGGFCDGBFBDFECEEEEFBEEEE?DECE:BD?A<CAD?5B5CBBD:?A:-A;=>:688:<.E\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_479/1\t99\tchr8\t21158099\t60\t91M\t=\t21158474\t466\tAAATGAGATGCATGTGTTTAATTTCCCATTGTTACCCGCTAGTTCTTGCTTTCATAATTAAAAGAAAATTGCTTATAAGGAAATTTGCCTG\tGGGGGEGGGGGGGGGGGGGGGGFGGGGEGGGGGFGGFGFGFDEFFEC?CCEECEEFFFGAEEED+7:;3C?EEECFFFEDFFEEFFEGEGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_479/2\t147\tchr8\t21158474\t60\t91M\t=\t21158099\t-466\tCAGACCTGGGACGCTTTTCGTATGCCATTACCAAAATCCTTCATGAGTTGATATTATCTTTAACACACAGAGGGAAAACTAAGTTCTAAGG\tEEFDBFEGGGEFFDFFDGFGGGGGGGEGGFGGGGGFGGGGGGGGGGG?GGGDGGEGDGGFGFFFFFGGGGGGGGGGGGEBGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_481/1\t99\tchr6\t49209149\t60\t91M\t=\t49209520\t462\tTTTGAATAAAACTTTATTTACAAAAAGAGGCAGTGGGCTGGATTTGGCTAAAATCCAAAAATTTGAGTTTTCTTGGATAAAGAAAATGTGG\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGEFGGEGGGGGGBGFEGBFGEGEGEEFGEEGEEGFEEC8GEFFFFEEGGGEAEEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_481/2\t147\tchr6\t49209520\t60\t91M\t=\t49209149\t-462\tTGAAATAATCTGTACACTAAATCCCCAGGACATGCAATTTACCCATATAACAAACATGCACATGTATCCTCTTGAATGTAAAACAAAATTT\t4EGGBDFEDFEGCDFDGDGGDEDGFFGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGDGGFGGGGGGGGGGGGGGGGGGGGGGGGFFGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_483/1\t99\tchr16\t63481174\t60\t91M\t=\t63481544\t461\tTAATTGTTTTCGTGCCACTGGTCCTTTGCAAGGCTAAAAATAATGCCCTCCAAATTAAGTGGACACCACAAAGCTTCATTTTCACTGTCCT\tEEEE=DBDD>7=C?CC:CC?C>C:AAAA?C:BDDCB5DC?EEE<AC=C@CC<>=CC>CA5<;:>;7<:C<1&746DDCBDEAEABDDAA:?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_483/2\t147\tchr16\t63481544\t60\t91M\t=\t63481174\t-461\tTGCTGAATGAATGAACACCCATCCATTCATTCATTCATGTATCATCTATTTACTCTTTTAATGTGCATTTCTGGAAGATTGTTTTATTGTG\t:9<=BC?A=?A.2:*61C=CE=C=EED:BBE?ACD@C:::DECE5CDB@BAAC9CCGGEGAFFDBEEEE?EDAAD=EA;EAA@CC=>C@@C\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_485/1\t83\tchr9\t125074494\t60\t91M\t=\t125074118\t-467\tATCTGATACTTCTTTTTTTTTAGACAGAGTTTTGTTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTCGGCTCACTGAAACCTCTGTC\t############?@@B>@>>:2825/49(0=:--7-8;<)90BAA@FBB?FGDGFE?EEEEGDEFGFCFEFGDGFGEFFFFGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_485/2\t163\tchr9\t125074118\t60\t91M\t=\t125074494\t467\tGAGGCCAAGGTGGGCAGATCACGAGGTCAGGAGATCGAGACCATCCTGACCACCATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATT\tGGGGGGGGGGGGGGGGGEGGGGGGGGEGGGGFGBGGGGGGGGGGGGGFEGGGGGGGGFFGBFFDGGGGEGEEGGBGGFF=EFCEEDFGF=E\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_487/1\t83\tchr5\t42213812\t60\t91M\t=\t42213434\t-469\tAAATGCCACCAGTCACCTTGCTAAAGCACAGCAAGAGTCAACTTTGCTCCAGTTCCCGACAAGTTCCTCGTCTTCCTCAGAGACCATCTCA\tC76527D:DDDDDABDCCA=C:=ACC@>>@B-C=CCD:DDDDDB-BCCA=CD==?D@CB??==>C?9:=7=BDD:C=77;:CDA?DB<=B5\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_487/2\t163\tchr5\t42213434\t60\t91M\t=\t42213812\t469\tAGGCACCAAGTCCCTAGGCTGTACACAACAAGGGAGCCCTGGCCCTGGCCCAAGAAATCATTTTTCTCTCCTAGGCCTATGGCCCTGTGAT\tD?E=DD=:D=@6@@@?:ACA:=9>BDDEEEFF?=-;B===?:AACC=?=CC5CCCEBE=EAD?BDDACAEGEFDGBD?A534::80618+A\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_489/1\t83\tchr8\t26167640\t60\t91M\t=\t26167273\t-458\tCCAGCACTTTGGGAGGCCAAGGCAGGTGGATCACCTGAGGTCAGGAGTTTGAGAGCAGCCTGGCCAACATGGTGAAAACTTGTCTCCACTA\tE?B5EB?BEAFFDFFEB=BBE=B=FBBCBB?A?BAACC?CBAAA-AAEE?EDAGGGCCCC:GGFGFGDGGGGEGGGFF=FFGGGGDFFEEF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_489/2\t163\tchr8\t26167273\t60\t37M4I50M\t=\t26167640\t458\tTCCAGCCTGGGCAACAAGAGCGAAACTAAGTCTCAAAAAATAAATAAATAAATAAATAAATAAATAAATAAATAAATAAATAAAATAGGTC\tGEGDGDD?FFEDGGGFEDGGGFAGFEEDEACD5CDEEEE?A>@@>EECAEA@8EE@CFF@<EAAAFFA;FFCCDF6<BE8GGDFC?<FD@9\tXT:A:U\tNM:i:4\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:4\tMD:Z:87\nfoo_491/1\t65\tchr9\t122157831\t37\t35M1D56M\t*\t0\t0\tCTGGATATTCATCAGGTACCAGGTGCCTAATACTTAAATGTATTTATCTCATTTAAACCTCATCACATCTCTGCCAGACTTGACAGATGAA\tDDDDDDDDDBDDDDBA5CCBCAA<?>=>=BD=D?DDBC:D38?B=?BDCDD?CDCBDDB?CCBBBBABBBBB:-DBABD=CBBDACD:B##\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:35^A56\nfoo_492/1\t99\tchr13\t96453707\t60\t91M\t=\t96454068\t452\tATTTTGTTATTTTATTACATATTTATTTGAAAAGAAAATATTCTCACTCTGTTCTTTCATGACTTCTTTACATTAACCTGAAATTTTTTCA\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGFGGFGGGGGGGGGGGGGGGFGGGGGGGDGFGGFDFGGFGGGEGGGGGGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_492/2\t147\tchr13\t96454068\t60\t91M\t=\t96453707\t-452\tACTTAACACAGTTCTTTCCACCATAGTCCTATGCACTTATTTAGACCTTGAGCAGGGCCAGCTATATAATTCACAGGTCTTAGTGTTGAAA\tDGFEGFGEGGGFFGGFGGGGGGGGGGGGGGGGGGGGFFFFEFFFFFGGGGGGGFGGGGGGFGGGGFGGGFGGGGFGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_494/1\t99\tchr1\t187310324\t60\t91M\t=\t187310672\t439\tGACCTTTTTGGCCTTATTATGCATTTTCTTGCTTAGATTTAATTTAACTTAGATTTCTACTTTGTGCACAATAGCTACTTAGTTCCAGGCA\tGGGGGGGGGGGGGGGGGGDGGGEGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGEEGGGFGEDGEFGFFDFFEGGEGFFGFAGDEEE?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_494/2\t147\tchr1\t187310672\t60\t91M\t=\t187310324\t-439\tTCCCAAAATTCTAGCTTGTATAGCTGCTAAGGAAGAGGTTCAAAACTTCAGTTTTTATACATGGGTCTGAAATTTAAGAAATAATAACTCA\t?EEBEEGG=FEGGGGGGGGGGGFGGGEGEGGGGGGGGGGFDGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_496/1\t81\tchr11\t73965428\t37\t91M\t*\t0\t0\tGGAGACAGAAAGACCAGTGGGGCGGCTGAAAGATGGCCGGGCCTGTGCTCAGCGAGGGGCAGTAAGGATGGGAGTAGACTTGGCACCTGCT\tB?=9BB698:.=37::4'/6%3)8332-9>)@==@@<)67:C=2CC@ACA@?A1C=B?;B+?AAC@=?:CCCC5CC66680748<1540=?\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:22A68\nfoo_497/1\t81\tchr16\t30480960\t37\t91M\t*\t0\t0\tAATATCGATCCGGTGTTGCTGACCTAGTTTGTTCCTGCTGGAAAAGTCTTCTCTTGCTGTTCCCCCCAGGCGGCCTTGGCAGCGCCTCTTG\t@@>=4C;=-B7=;*?;>?;=:5=BC?AC?BCD?BBE<7=975A?CA=-?1:>>C=.@?@@@=C4>;C@?-;?DDBDDA=DEEA=EEEEEDD\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:63T27\nfoo_498/1\t65\tchr1\t206890991\t37\t91M\t*\t0\t0\tCCTTAGGAAAATAGAGAGGGCTTTCTCTTATAAATATATTTTTTAAACAAGTAACATCTAGTAAAAGCAAACCCATCCTTCGCTTGCTATG\t>0776*65.(04+:0;<9));:989;(=?3;/:.=);B;7BD8>8BBC?B0>AA-6.>):7)962A?AA??0?@=@5C@@>55>?=C@>@D\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:58C2A29\nfoo_499/1\t83\tchr17\t38911344\t60\t91M\t=\t38910967\t-468\tTACTGGGGAGAAATGGAGATCCTGGAGGAGCAGTTTCTTCTGTGTTAGTGGCAAGCCTGTGGCTTAAGACAGAGCTTTGGCCCTCGGCTCA\t######E4CAEC>=A@>?:=@?9BA@BAA:ABBCCC?CBABBAACBAEBE??GGGGGFGGGGGGGGGGGGGGGGGGGGDGGGGGGGGFFGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:4T86\n"
  },
  {
    "path": "examples/pydoop_script/data/base_histogram_input/example_2.sam",
    "content": "foo_499/2\t163\tchr17\t38910967\t60\t91M\t=\t38911344\t468\tTATTGAACCAGGCAGGGGAACCTGGGCCCCTGAACTCTGTCTCTTTATACTGCATTTTGAAAGCAGCACTTGGCTCTCTAATTGCCCCATA\tGGGGGGGGGGGGGGGGGGEGGGEGDG?GGGBGFGGGFGGEGGFFGGFGDGGGGGDGDGGFFGGEEGGGGEC5EEEGGGEFEADDEEFGBEF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_501/1\t83\tchr14\t44191907\t60\t91M\t=\t44191517\t-481\tCAATCAAGAAATTAGAAATATATGTTTGAATTATTTACCCACTACTTAGTAGATTTTTGAACAAAACCTTTTTCTTTTCAATTTTTAATTG\tEFGEBGGGGEGGFGGGEGGGGGEGEEBEGGFEGFGGEFGFDGDEDGGGGGGGGGFGFGGGGDGGGGGGGFGFGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_501/2\t163\tchr14\t44191517\t60\t91M\t=\t44191907\t481\tCTGACCACTGAGTCAGGAAACACAAATACAGAATAAATACTTTATTGGCAACACATTATAATATACATTAAAATAATAGTATCTGTATTTC\tGGGEGFGGGFGGGGGGEGGFDDEEGGFFGGBFFFFGFGAGGGGGFEGFGGFGEGGGGGGGGGGGGGGGGEFGGGGGGFEFGEG=GEFGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_503/1\t83\tchr10\t28470051\t60\t91M\t=\t28469666\t-476\tTTATGCTAACAACTAAAAAAGTTTCTATGATCACTTTGATAAGTACCATAAAGTCACTTGACAAAATTTAACATAATTCTGATTTATTTTT\tGGGGAFGFGGGCEEBEE:FEFGFGFGEFGGEGEGFGGFGGGGGGEGGGGFGGGGGEGGGFGFFGGGGGGGGGGGGGGGGGGGGGFGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_503/2\t163\tchr10\t28469666\t60\t91M\t=\t28470051\t476\tAAAAATAAATAATGAAGTAATTCTGGAAGAGCTTGAAAGCTGGATTAAATCAATAACTACAGAAGAAATTAAAAGCACTATCAAAAAACTT\tGGGFGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGGGGGFGFGFGGGGGEGGGEGGGGGDGGGGGGGGGGGGGGGGFGGGGBGFGGGFFEFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_505/1\t99\tchr6\t52691824\t60\t91M\t=\t52692215\t482\tGTGGAAAATAAAGCTTTGTAAAATAATCGGAGATAGCTAGATAAGCAAACAAGTTAGTTTATTTAAACTGTATTGAAAAAAAATTAAGCAA\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGFFGGGGFFEGEGFGEGFFFEEGFEGGEGFFFFGGGE;GGGBG5F\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_505/2\t147\tchr6\t52692215\t60\t91M\t=\t52691824\t-482\tGTGAGTTTAAGGGGGATGCTCTTGGAGTATATTTAAATTGTTCTGTTAAAATGTTTCGATTGACATTTACCTTCTCTGTGAGCTGTATCTG\tGGGCGGGGBFGGGGGGGGGGGGGFGGGGGGGGGGGEGGGGGGGGGGGEGFGGGGGGGGGGGGGFGGGGFGGGFGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_507/1\t99\tchr3\t142788081\t60\t91M\t=\t142788459\t469\tACTACTATTCATGTCATTTAAAGTTAAAGGATACTTCTTTGTTTTGGATTAACTTTTAATTTTTATAGCTAAATGTTTACATCTGTTATGT\tGGGGGGGGFGGFGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGDGGFGGFGGGGEGDGGGEFEGGGDDGFGGGGGAAFFDFFGGGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_507/2\t147\tchr3\t142788459\t60\t91M\t=\t142788081\t-469\tTTATGTTTTTCTCACATTAAATTTCTTGAGTTTCTGAAAATGGTGTTCCTTCAGTGTGCTCTGTTTTCTAAAAATCCATAGTAATCCATAC\tGAGGGFGEGEGGGEGGGGGGGFGGGGGGGGGGGGDGGFGGGGGEGGGGGGGGGGGGGGDDGGGGGGGFGGGGFG=GGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_509/1\t99\tchr6\t3270997\t60\t91M\t=\t3271365\t459\tGAAATCTATATTAGCCACGGTAAATTCAGTGTTGGAAGCTCAGCATGGGCTAGTATCAGATAAGGAAATTCAGCACTTGGCCATCCAGGTA\tGGGGGGGGGGGGGGGGGGGGEGGGGGGGGFGGGGGGFGGGGGGFEFGGGEFFFEFEEBECE@FEEDEEDDECCC?CBC@CEFEEDEAEECC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_509/2\t147\tchr6\t3271365\t60\t91M\t=\t3270997\t-459\tAAAATGCTCCCTTGCTGGACCAGTCTGGCTTCAGTCTCCTCTTGCTACAAGTGAGTTGCTGTGGCTGGCACATCTCTTAGTGTGACTGTCA\tGFEEDEFBGGFEGGFGF?EFGFGEGFGGGGGFGGGFGGGEGGGGGGGGGGGGGGGEGFGGGDGGGGGGGGGGGGGGGGGGGGGGGGFGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_511/1\t99\tchr3\t142226380\t60\t91M\t=\t142226747\t458\tGAAACTTGTTTTGTGGCTCATTATATAGTCTATGTTGCTGAACATACCTTATGGGCTTGGAAAAATAATATGTATTTTTTCTTATTACAGA\tGGGGGGGGGGGGGGGGGGGGGGEGDGGGEFGGGFFGGGGFFGGFEFGGFGEFFGGBBC=C=CCCCA;?<A>?;<>EEE@A;?C?CE:BCEC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_511/2\t147\tchr3\t142226747\t60\t91M\t=\t142226380\t-458\tGCTTTCTTAATCTTAGTGTTTACCTGCTGTATTTTTAAAAATCTATTTTCTTTCAGTCGCTCTGGATTTTTATTTTTAAAGTGCATCTCTT\tDFGEBGGFGGFGGFGFGFGFFEGGFGGGGGGFGGGFGGGGGFGGGGGGGGGFGGGGFGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_513/1\t99\tchr4\t108147713\t60\t91M\t=\t108148076\t454\tTTATAGAAAGTTTACTTTTTTAGTTTAGTGATTTTTTTCCTCCAGTGGGAGATCTTGAATTTATAAAGCATAGGTGATTTGTTTGTGATGA\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGEGGGEGEBGGGGDFFGGGGEECEFEECEE>EBEFEGEGGEEBFFFE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_513/2\t147\tchr4\t108148076\t60\t91M\t=\t108147713\t-454\tTTTTAAAATAAATATATTTTGAATAAAAACACTACAAAATAAGAAATGATTTGAAAAGAAAGCAAATATTTTGGGTGTGATGGTGCTACAG\tGAGGGGGGGGGGGGGGFGGFGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_515/1\t83\tchr8\t1835390\t60\t91M\t=\t1835014\t-467\tGATGAATGAGCTTCCTCGTGTGAGGGAGTCTGGCGCTTGGGAGGAAGGGGGTCAGGCCGCCTGTTAGACATTAGTTATTCAGCCACAGCTG\tDABADC5A<??@>C<6AA-><EAFEEBB:?=CAA-BEBEEBEEBEDFFFBF?BEEE?BBDDBDDAD?CADDEE=FFDDDD=EFFFDFFEFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_515/2\t163\tchr8\t1835014\t60\t91M\t=\t1835390\t467\tATCGCCAGAAATGAACTGGCGCCTGGGAAATAGTGTGAAGTCCCCTTCCATTGAGGTTATTTTTATTTTTGACCAACCAAATCCATGGCAC\tEEEEGF=BEFFFFBF:DDD:DDDDDGGDFFEEA:EFFFEEGGBGDEACDDFFF?DDCEBEE>CCCGFGAGAFBEFE:EEAEDFADE-DAEF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_517/1\t99\tchr1\t121186790\t54\t91M\t=\t121186832\t133\tCATAAACTCCTTTGTGATGTGTGCGTTCATCTCACAGAGTTTAACCTTTCTTTTCATAGAGCAGTTAGGAAACACTTTGTTTGTAAAGTCT\tFGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGFGFGGGGGGGGGGGGGGGGGGGBGGGGEFFFFCGEEDDGGFGGGGGGGGEFEGD\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:17\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:2G26A46C14\nfoo_517/2\t147\tchr1\t121186832\t54\t91M\t=\t121186790\t-133\tAACTTTTCTTTTCATAGAGCAGTTAGGAAACACTCTGTTTGTAAAATCTGCAAGTGGATATTCAGACCTCTTTGAGGCCTTCGTTGGAAAC\tCE>4>@EDDFGEGFFGGDGFGGGFEGFFFEFCEB=EGGGGGGGGGBGGGGGGGGGGGGGGGGGGGBGGGGGGGGGGGGGFGGGGGGGGGGG\tXT:A:U\tNM:i:3\tSM:i:17\tAM:i:17\tX0:i:1\tX1:i:4\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:3C41G24C20\nfoo_519/1\t99\tchr9\t84058926\t60\t91M\t=\t84059308\t473\tAAAAATTATTTTCATGTGTTAATCATATCTTTCAGGCAAGATAACTTAGAAATTATTCATCCTTTATAACTACTAGCAAACTTAGTGTGGT\tGGGGGGGGGGGGGGGGFGGGGGFGGGGGGGGGGGGGGGGGDDGFFGGGGGGGGGDGFGGGEEGGGGGGDGGGGGGEFGGGGFEGGDGGGGE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_519/2\t147\tchr9\t84059308\t60\t91M\t=\t84058926\t-473\tATTACTACAGTTGTTATCTTTATAATTAAAGTAGACTGACTACAGCATGAATGAATTAGGTTAGTTATAATGAAGATGAAAATATTTTTAT\tGGGEFGGGGGGFGGGDGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_521/1\t99\tchr1\t206206389\t60\t91M\t=\t206206755\t457\tGGACAGGCAAGAAGACTATACTATTCCTGCCTCCTATTTGCTCCCTCTTGCCAGTGGGGAAATCAGTGGCAACCTTAGAGTAATCTTGGAG\tGGGGGGGGGGGGGGFGGGGGGFGEGGGGGFGGGGGFGGEGGGGGGFGGFFFF?FCEEEECEE:EEEEEEECCBCBDBB?CA=CACBCBB5E\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_521/2\t147\tchr1\t206206755\t60\t91M\t=\t206206389\t-457\tTAAATATATAATTGGAGGAAGAAATGTGTCAGGAAATAACTTCTTTACTTGTGGAAAGTCATTGAAAACCCTTGGTTTTGCGTATGAGCAC\tDGFGFFGGFGFGGGGGGGGGGGGGGGGGFGGGGGGGGFGGFGGGFGFGGGGGGGGGGGGBGGGGGGGEGGGGGGGGGGGGGGGGGGGGGFG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:32A58\nfoo_523/1\t99\tchr9\t91731195\t60\t91M\t=\t91731593\t489\tTAAGGGGCTGATTCTGGCACCCAGCCCAGCTGTCCTCATAAGCCAGGTGGTCTGCATGCTTGTCCTAGATAGTTCTAGACTGACAAAGAGC\tFFFFDDGGG?GEDFGDDFDGFGEFEGGFGD?DGEDEFGGDDGFGFBE:=EBEEEDFFDEBGGDBGGBFEGFB:FF?CCDDFF=AFFFEEF#\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_523/2\t147\tchr9\t91731593\t60\t91M\t=\t91731195\t-489\tCGCCCCAGCCTGTGGAATGCTGTCCAACCTCTTTCACTTAGGAGAGACAGGACAAGCTGTAAGCGTTCATATGCCAGCCTGGAGTATGGGG\t5>5A???-DCC??A>A44;;;0*547(==1;FEEFDEEEEDBFDFDF?BFFD?DGBEDFGGDGGGGADFFF=?AEGGFEGGGFGDGGGGFG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:2G88\nfoo_525/1\t83\tchr2\t105643826\t60\t91M\t=\t105643428\t-489\tTTTAAACTTACATTTCCCCTTTCCATGGACTTCCTGTCCATTCATGCATCTTCTTTGGTGAAGTATCTGTTCAAATCTTTTGCTCATTTTT\tEBA?>-AAA-?:>;7?';?::@BB@BE?A=EB=EBBE:FEFCEA?DCBEA>FFGFGEEAEEF=FDFAFFDF?FEDGEGGGGEFGDDGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:16A74\nfoo_525/2\t163\tchr2\t105643428\t60\t91M\t=\t105643826\t489\tGTCATCGTTGTTATTGCATGAATCACTAATTCATTCCTCAATATTGCTGAGTGTGAGTGCACAACAGCTTGCTTGTCCATTCGTCATCACT\tEEE:EFFFFFGAGGGAFDEFDFFFFFGGEGGBGGGFEDGGFBDFFGDDGDD5D?DDD@DADDAEEDBGGEGCGG?EDD:CEBEBC:C:CDD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_527/1\t99\tchr1\t23303761\t60\t91M\t=\t23304138\t468\tTACAGTGACAGGGGCAAGGTAGGAACCTTATAAATGTTTTGTAAATAAATTAATGTAATAGTTTTATTGTATTCTGTACTGGCCAAATTTA\tGGGGGEGGGGDGGGFFFFFAFFFDFGGFGGGGGGGGEGGGGEGGFGGGGGGGFGGEGGGGGEGGEFGEGEGGEGEGFGGGEBGGFGDEGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_527/2\t147\tchr1\t23304138\t60\t91M\t=\t23303761\t-468\tCAACCTGTATTGGACAAAGGGATTAAGTAAGAGAGTAACACAATCAGAACTGCAGTTTTTGAAAGGCCATTCTAGCAGGATAAACTAGGGT\tGFEFBDGFGGGEFFFDGGGFGDFGFGGFFGGGGGGGGGGGDGGGGFGGGGFGGGGFGGFGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_529/1\t99\tchr11\t130722616\t60\t91M\t=\t130722997\t472\tTGTAAGTGGTAAGTACCATGATGGGAAGAAGCCGGGGATCTGTAGAGATGCAAGGGCAGTATGTCTGGCTCAGCCTAAGGAAAGCAGGGCA\tGFDGGFFFFBFFFFFGGGGGDDGGFGGGGGGGFGGGGGGGGFAGDFAFFFEGGFGGGFGBFGGEFFFF-?B?==@:;B?A6>@>>?5BC##\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_529/2\t147\tchr11\t130722997\t60\t91M\t=\t130722616\t-472\tTGTTAGAAGTGCACTGGGATCCTAGTAAGTCACTGAGAAGTGAGATGCTCCTTTTGAAGACTTCATCTTTAACCAGTGCTCAATATTTTGG\tGFEBDDG?FGG?EGFFAFAEGGGGGDGEGGGEGDGGGFFGGGGGGGGGGFGFGGEFGEGDGGGGGGGGGGGEGGGGGGGDGGGGGGEGGGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_531/2\t145\tchr7\t145234\t37\t91M\t*\t0\t0\tCCATGACGACACCGGGAGTTCCCGCTCCTTTCCATGGCAATGACTCAATGTCCCAAAAGTTACTATGCCTTCCTTAGAAATTTCTGCATAA\t################B::;(D=CDD?C:CC::C=AB.?B=5>50+;;A<(:BB7:968872)355:73?:97:62?=<B=4>665/4:;:\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:20A29A40\nfoo_532/1\t99\tchr5\t27195861\t60\t91M\t=\t27196237\t467\tGCCAACGAAATCCTGAACAAAAAGAACAAAGCTGGAAGTTTCACATTACCTGACTTCAAATTATGCTACAAAATCACAGTTAAAAAAAATA\tGGGGGGGGGGGGGGGGGGGGGGGGEGFGGGGGGGGGGGEGGFGGGGGGGGGGGGGGGGDGEGGFGFFEGGGGGGGGGGGGGGGGGGGEGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_532/2\t147\tchr5\t27196237\t60\t91M\t=\t27195861\t-467\tCACTCAAAGACATTGTTCATGGCAAAGATTTCTTGAGTAGAAGCTCAAAAGCACAGGCAACAAAAGCAAAAATGAACAAATGCAATCTCAA\t?=CAC=GEGFGGGGFEGGG:GGFGEGBG=GGGEFGFGFGGGGGGEGFFFFFGBFFGG@GGDGGGGFGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_534/1\t83\tchr11\t66280042\t60\t91M\t=\t66279654\t-479\tTTAAAACTATCTTTTGGGGGACATAGTTCAACCCACAACAGACTAGCAAATTTGATATCTGAGATGATTCATCTATCAGAAGCCAAGAGAT\tC>CB?5AB??CDECBEEEEBECBBB?CAADABECBDGDFFFGGDGGAGGGGFGGGGGGGGGGFFDFFGEGEDFEDFEGGGGGGGGFGGEGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_534/2\t163\tchr11\t66279654\t60\t91M\t=\t66280042\t479\tAAATCTCTGCTCTCTAGGGCTGCTGTAACAAAGTACCACAAACTGGATGGCTTGAAACAAAATATATTTATTTTCTCACAGTTCTGGAGGC\tGGGFEGGGGGGGGGGGGGGGGGFGDFGGGGGGGGGGG=GGGGGGGGGFGGEDGDFFFFDDGGEGEGDGGFDGGGGGBGE5CBECCAD=ADC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_536/1\t83\tchr12\t30789694\t60\t91M\t=\t30789324\t-461\tAGAGGCTGCAAGAGCCATATAGAACATTACAAGTTTTATTTACATTATTATAAGTACCTGTTGGGTTAAAGAAGAAAATCAAACTAAAATT\tGFFFEFC?GGFGGGGGGGFGGFFEFFBDGFGGGGEGFGGDGGFGFGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGFGGGGFGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_536/2\t163\tchr12\t30789324\t60\t91M\t=\t30789694\t461\tAGATGTATTAAATGTAACAAGGGATCACAGTTCAGCAGCAAAAACAGGTACCACATTCTAATTCTTTAACAACAAAAACTTTGGTAGAACC\tGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGFGFGGGFGGGG?GGGGGGGGDGGGGGGGGGD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_538/1\t99\tchr18\t14052702\t60\t91M\t=\t14053081\t470\tCAACAATGGTGGTGCTCATGCTAAAGTCTAAAAAATACTTGTTTCACACACATTCCCCACACAGACAAACACCCACTGAACACATAAAACA\tDBDDDDD-BD*7)77DDDDDDDBBDC5C?CC?CCC>@<CCDDDDAD-DDDCDDDAC@?C,(:-64?BA4B;<;61CC=?==AC=CB?;DD?\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:5G85\nfoo_538/2\t147\tchr18\t14053081\t60\t91M\t=\t14052702\t-470\tAGTAAGATTAACTCACCCTTTGGAACCTGAAGTCTACAATTAGGAGTCAAATAAAGATGTATTAAACATGACTCTGAGCTTATGGTTACAT\t################?>A=?@=6=B9@@>C85?9)>@6>>B:?DDE@@EE4DFEDA=?DC?AD-D:ECBC==B8=624(5-=CACD=DAD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_540/1\t83\tchr5\t16736316\t60\t91M\t=\t16735955\t-452\tTCTTACTGAGGGCAGAAGACAATTTTGATTCTTTCTCTGGTCCGTCCTAGAGAAACTTTTCTTAAATTCTGCTGCAGGCCAGAAGTAGAAC\tEEEDAFGEEGGGFFGGEEFFGGEGDEFGFEGGGFGBGFGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_540/2\t163\tchr5\t16735955\t60\t91M\t=\t16736316\t452\tCAGTTGAGGAAGAGAGAAAACCCTGAATGAGGATGCAAAATTCTAAGGATGGATCATCACATGCCCAGCCTGGAGGAGTGAAGGGCTTGGG\tGGGFGGGGGGGGFGGGFGGGGGGGGGGGGGGFEFGGGGGGFGGEGGFGGGGGGGGEEGGGGFGFGGGGGGDFGEGEAEAEABCCC=BBBCG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_542/1\t99\tchr16\t1641294\t60\t91M\t=\t1641669\t466\tCACACACAGGTTATGGAGAGGTCGACTGAGAGGTCACACAGAGGTCATCTAGGGGTGGATTTAGGTCACGCACAGGTCACGGAAAGGTGGA\tGGGGGGGGGDDGFGDFFAFFFBFF?EEEEEDEAAFGGFGEE=EEBFBFEFDEAED=@>-=BC?=CAEABE?BBBBE@ECEBDBDA@B?BB4\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_542/2\t147\tchr16\t1641669\t60\t91M\t=\t1641294\t-466\tAGAGGTGGACTGAGAGGTCACGGAGAGGTGGACTGAGGTCACACAGAGGTCACCTAGAGGTGGATTGAGGTCACACAGAGGTCATAGAGAG\t4?C=@5FFGGADGGGGGG=EGGFGGGFDEGFFGGGGFFEGGGGGFGEEFEEEEGGGGGGGFGGFFGGGGGEGGGGGGGGGFGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_544/1\t83\tchr7\t61606857\t60\t91M\t=\t61606477\t-471\tTTGCTTTAGAGAGAGCAGATTTGAAACACTCTTGCTGTGGCATTTTCAGGTGGAGATTTCAAACGATTTGAGGACAATTGCAGAAAAGGAA\tDGFFGFEFGGGGGGGGEGEEEDEFECFAFEFEDEFGGFGGGGGFEGGGGFGGGGGGFGGGGGFGGGGGGGFGGGGGGGGGGGGGGFGFFGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:23\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:62G28\nfoo_544/2\t163\tchr7\t61606477\t60\t91M\t=\t61606857\t471\tTCTTTGTGCTGTGTGTATTCAACTCACAGAGTGGAACGTCCCTTTGCACAGAGCAGATTTGAAACACTCTTTTTGTGGAGTTTGCAAGTGG\tFGGFGGGGGDFBFFFDGGGGGGGGFBGGGFFBFFFFFFFFFFGGGGGGGGGGFEGGDBGFGGEFGGDGDGGGGG<GGGDFEDFFFEAFEFG\tXT:A:U\tNM:i:2\tSM:i:23\tAM:i:23\tX0:i:1\tX1:i:1\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:45A33A11\tXA:Z:chr7,+61604608,91M,3;\nfoo_546/1\t99\tchr7\t47639512\t60\t91M\t=\t47639885\t464\tAGACAAAGCTCACAGTGAGGGGGGCAGGAGGGACTTCCACTGCTGGGGTCTGCTTTCCCTGCTTAGCGCCCTCACCTCCTCTTGAGCTTCC\tFFFFFFFFFDEEEE==EBEEFFFCFFDEEFFFDDFDEEEE=FFEFEEF@FC?CBBBBFFBFEEE:E@EDDEBDDDFFFFEEBA=BBABBBF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_546/2\t147\tchr7\t47639885\t60\t91M\t=\t47639512\t-464\tTCAAGTGTTCCACCCGCCTTGGCCTCCCAAAGGGCTGGGATTACAGGCTTGAGCCAACACACCCGGCCTGTCTGCTTGTTTTTAACAGACA\t############@A5AEDDDE?@8?==EEEED?ECAEFFFFD5D=DEEEBA?ACA@=D3D?FGFGGGFFFGGFGGGGGFGFFGGGGGGDGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:7A83\nfoo_548/2\t129\tchr22\t24507978\t25\t91M\t*\t0\t0\tTGGTCACATGTCATCCACCCTGAGCCGCGGTTTTTTCCTTTGTAATGGGGAAACAACACCTTGCATAGACCTTATAGGATTAAGTGAGTTA\t,367,22737==5==:=:::67+,7-A95;B=7-=9@;=;=?5-578+8,999;<A6=D=EDDDBC5C5:;A@9;==.?############\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:33C13T36A3A2\nfoo_549/1\t83\tchr18\t67459745\t60\t91M\t=\t67459370\t-466\tCTTTTTTTCCCTTTCCTTCCTCCATGCAGGATTGCTTGTACTTAGTCATTCAGTAGATGGTGGTCACTATTTGACTAATAATTATTGCCTT\t@>ABBEA>ACABBADDD=FFCGFGFGFFGDGBGFDGGGGEEGFGGGGGGGGGGGFGFBDFFGGGGDEEEEEFFGGGGGGGGDGFGGGFGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_549/2\t163\tchr18\t67459370\t60\t91M\t=\t67459745\t466\tATTCTCACTTTGGGCTGCGGGTTTCAGGCTTTTTGCCTTGAAGGTAGGGTTTTGCCTGAGTTCCATTCCTATCTGCCTAGAGTTTCTCTGT\tGGGGGGGGGEEGBGGGGGGGGFG?GGGGEGGGDGGGGFGABBDD=DDDDDDEFFFGGGFDGGDGGFGDGGGGGEFBFDFF:ECDFF?F?FG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_551/1\t99\tchr17\t15311771\t60\t91M\t=\t15312143\t463\tCGGTCACCAGCAATGCAGTCCTCAAACACGGACATTAGAGTGATTATTCCCACCCCCAAGAGTGAACACCTACTATGTGCTGGACCCTAGA\tGGGGGGGGGGFAFFFFFFFFGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGEGGGGGGGGFGEGEFGDFGEFGFFGGGGGEBGEGGGEFF?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_551/2\t147\tchr17\t15312143\t60\t91M\t=\t15311771\t-463\tGCCTTCTTCTGAAATAAAATCCTCTTTAGATAAGCTGGAGAGAGAATGGCTGTCACTAACAGGCTGTTGAGAATTAGCCTCCTTCTCCCCA\tCECC:CB=CC:GGEEFFFD?FEFEGGEGFFFE?EF=FFBFEFGGGFGFBGGGGGEGFGGGEGGFGDGEGEGGGGGGGGGCGGGFGBGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_553/1\t99\tchr21\t46880953\t60\t91M\t=\t46881316\t454\tGCCTTAAGTCGACACCTGATCTAACAGAAACTAACAGGCTTCAGCAAATGAACAAGATTAACACCTTGCAGACTAGTGAAGAAAACACACT\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGFGGGGGGGEGFGGGGGEGFDGEFDFFBEGCCFEFFD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_553/2\t147\tchr21\t46881316\t60\t91M\t=\t46880953\t-454\tAGAAGTGAATCGCAGGTAATTTCCGTTCCACTTCCTAAAAATCTGTGTTTAATGAATGTGAGCATTACAGAGAGCTAAGCGTCAGCTCACA\tBGGGGGGGGEGGGGGGGGGGGGGFGGGGGGGGGGFGGGGGGFGGGGGGEGGGGGGGGGGGGGEGGGGFDGGGGGGDGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_555/1\t83\tchr21\t19675384\t60\t91M\t=\t19675012\t-463\tATGATTTTAAAGCAAATAAAAAGGTGGACATATTATCCTGGAGGAAAAGAAACAGTACTTGATGTGAGCTATTTTATTGTTGCCATGTTCT\tF?FBGEDBGGGECEACCGFGEEDBFFCCEBEFEBF?FFDFFGAGGGGGFGGGGDGGEGGGGGDGEGGGGGGGGGGGGGGGFGGGGGGGFGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_555/2\t163\tchr21\t19675012\t60\t91M\t=\t19675384\t463\tACCATCTGATGCTTTTATGTGACATATATTTATGTTACACATTAAAAGCTTTGTATATTAACATATCTTCTTATCTGACGTAAGTTACAGT\tGGDGFFFFFDGFGGGGEFFFEGGDBFDGGGGDGGGGGGGFFFEGGGGGGGGGGGDGGGFDGFGGFG=DGGGGFGDE?EEFDFDFCF=DEFE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_557/1\t83\tchr4\t98523834\t60\t91M\t=\t98523445\t-480\tCCTGCACCAAGGGCTTCATGGGGAGTTCCCTATGATCAGTTGACAGAGGAAGAGAAGACTAGGGCCTGGTTCACAGATGGTTCCACATGAT\tB@>@C=:EEBFEFFF?=EDED@DDEE@BB?A5?DCC::E:EAF?GFFFF=DEAFEFFFABFFDEGGGGGGFGFGGGG:EGGGGGGGGGEGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_557/2\t163\tchr4\t98523445\t60\t91M\t=\t98523834\t480\tATGCGGCTTGGAGCCTTTGACAGGCCCCCATAGGTGAAACACAATGGAGGGCTCTAGGGTCTTGGGACAAGGCCCTGCCATCTTCTGCAGA\tGFFGFDGGG?GGGEGFFFDEECEDDGGGGE=@5D@B>:4>?@CAADD-BDC;CA@AACC-<7=3B##########################\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_559/1\t83\tchr17\t27857630\t60\t91M\t=\t27857610\t-111\tAGTAGAAGGGAGGGTATAAAACAAATCTGATCCATCAGATAGAAGGCAGGAAAGGGGAACAAAAGTAGCAAAGAAGAAACACGGGAACAAG\t@81CCA@>CCCC-C?A8@?7@;?<:3-2:2AC5CB?CACBC:@;:97>3@C@B>;>5A@;@;A?1;6CCCCB=@=@?-C@>=>=:BCC?CC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_559/2\t163\tchr17\t27857610\t60\t91M\t=\t27857630\t111\tTGATGTACAACTCTCAGAGCAGTAGAAGGGAGGGTATAAAACAAATCTGATCCATCAGATAGAAGGCAGGAAAGGGGAACAAAAGTAGCAA\tDEBD:EDDBEFFBFE==9;BDDDDCFFF=55AA39:>@=?@.@:A2?C-=EEA5A@@6>6C=:CC:DABD:A5A@################\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_561/1\t83\tchr7\t109887272\t60\t91M\t=\t109886912\t-451\tATTTTGGTGTGTTTATTGGCCATTTGGTTTTTTGTGAATTGATCACATTTCTAGTGGTTTGCTTCTCTTGTCTGCCTTGTCTTTATGTTGT\tCC=CC=:AFDFDDDDD>@6B@5A=EBACCAFEE5B:FFDFFFDEEFFDFFFFEFEFFFFFFFDDFFFFFFDFDFDFEEEBEEEEDDDEEDE\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:19T71\nfoo_561/2\t163\tchr7\t109886912\t60\t91M\t=\t109887272\t451\tTAATTCATTTGTTCATTTAAATAACTAAAATAAATCATGGTTTGGATATTTTATAATTTATTTAGGAATTGCCCTAGCACTGAGCAACATA\tGG?EEGAGGEGGFFGGDGGDFGGDGF=FFDBFC@CD5:D5?ACCC=D=DBC;ACAEEDD?EE8ACD-DD?@####################\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:66C24\nfoo_563/1\t99\tchr16\t76387682\t60\t91M\t=\t76388052\t461\tAAGGGAATGGGGACTGATTATTGCACCCACCCTACCATCCATCTGCAGCATCTTTGCAGTGACAAAGCCTACTATGTACCTCAGCTTTTCA\tGGGGGGGGGGGGFGGGGGGDGEGFGEGGGGGFCGGFGGGGEEG?BGDGGGE=GGFFEBFDEA?:=CA?CACBBB:D?BDBFFFE=CFBFC:\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_563/2\t147\tchr16\t76388052\t60\t91M\t=\t76387682\t-461\tGGCACTCAATAAAGGTTGTTTCTTCCTAAATGACTTGGGCTTTTACATACAGAACTAGGACATGAATTGCCATCTCTTGTTTTCTTGCCTA\t#@>>@>6CEEEDD-BA;CEBE5EDE?>C@=;D?DDBB5EEEEEEADED:EEEEDEE:DAEF=FFFAB?GGGDGGGGFDDGGFDGGGGGGDD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_565/1\t99\tchr13\t34433741\t60\t91M\t=\t34434104\t454\tATGTAAAGCTAGGCACTTCGGATACAATGATCAAGATAAGGTCTCTGCCCTGGAGTTTATAGAGTTGTAGAGGGGGACAAATATGGGCAAT\tEEECEDFFDFGGGGFGGFGGFEEGFGGFBDF?FFFGGEGFBEEE?FEBFDEEE@CBEBBD2>>A5<=73;07=1'.*;30<?;?->,7<=D\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:74T16\nfoo_565/2\t147\tchr13\t34434104\t60\t91M\t=\t34433741\t-454\tATCTCAATAATTTCATAATATTTACTACATTTTGAAATGATATTTTGGATATTTGGGTATATGTACTGAGTTAATACAAGTTAATTTCACC\t24::2>5ADD?EAE=EC?=CCAEEEACEEAEE=EE?BGGGGFFDFFD=ADDDBA:EDD?ADE:EEE:EAEEEFEEFEEE:DFFFFFEEEEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_567/1\t99\tchr10\t24568194\t60\t91M\t=\t24568578\t475\tTCCGTTCCAGACAAAAACTTCCAGAAGAATTAGATTTTCTGCTTTAACTGCCGCCTCACCTGTGGAGCTCTGGGTACAGGTAAATATGCGG\tFFFFFGFGGFGGGFGGGGGGDFGGGEGDGGGGG?GGGGGGAGGGGDGGGFGEDGBGEFGGGGBGG5B@BBBBBECB@:DB@B?BAEEE?EF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_567/2\t147\tchr10\t24568578\t60\t91M\t=\t24568194\t-475\tATGCTCCATGTATTTCTTATTTATTCCCAGATGTGTGTGTGTGTGTGTGTGTGTTTGTGTGTGTGTGTGTGTGTGTTCTTTTACTCTTACG\t###########################C=5DD:EDE-DFFEEEDEDDDB5BE4AAEFDFDFA?EEEB5:DEF=F:EGFFGDFFFDDGBGGD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_569/2\t145\tchr11\t112652768\t37\t91M\t*\t0\t0\tGGGGGGGGAGAACGGAAAAGCCAGCCCTTTGTATCGAAATTTTGCTTTTTTTTCCCTCATTCTACTTTAGAACTGCAAGCTTGTGCACTGT\t########################B=-;?;;?=A-CCBBAD-CA<@14:=-=6CC?AA-AA71,7.,27-,---?AA:A?A=7772+.13:\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:12G21A56\nfoo_570/1\t99\tchr11\t125184448\t60\t91M\t=\t125184829\t472\tATACTGTATTTTTGCAGTCCACTTACATTCAACTTTGGTATCCTTATACTGTGTCTTATGTAAACAGTATATGCTGTTTTTAAAGTTTTAG\tEGFEFGGFGDFGGGFGGGDDFFFFFEDEFFGEBGGFGGFFGGEDGD:EEEEBEBDDFFFFBEBABED@CCB@B5BFFFFFGG=G=ABDDDD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_570/2\t147\tchr11\t125184829\t60\t91M\t=\t125184448\t-472\tAGGACCCCAAAATATCTTAACTCCATTTATTTTTCCAAGTAATATTATATTATTGTCATGTATTTTAATTTTATATACATTTCAAACCCCA\tBDAADADFF?FEEEEF?EDBEE5DB>ECFFFD?AFEEEEEBDEABEEEFEFGGGGGEE=EEDGEFGDDGFGDAFDFEE?EEFFFFFDEEEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_572/1\t99\tchr2\t126452154\t60\t91M\t=\t126452553\t490\tTACCTAAAGTCAACAATATGCAATAGTTGTGTCTAGATTAAAGTCTGGGCCATGCAAACATAAAAATCACATAGAGAGTGGCATTATAGGC\tGGGGGGGGGGGGGGGGGFFGGGGGGGGGEEGGGFFGDGDEGGGEGGEGGGGFBEFGGDDEEEFDFG=BFGF=FEEGGDCEGGGGGE:=BEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_572/2\t147\tchr2\t126452553\t60\t91M\t=\t126452154\t-490\tCATAGCACTTATGGTCTCATCAAGGAAATAAACATATACATACATTAGTGTGCAGAAGGGTCTTTATAGTGTTTTTTAGGAGAAACTGTGA\tDGFAGGGGGGGFFEDDBDGGAGGGGGEDFFDFFFFDGBGFEFFFDFEGGGFGFFGFFGGGGGFGGGFGGGGGGGGEEGGBGGGGGFGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_574/1\t83\tchr12\t57128540\t60\t91M\t=\t57128155\t-476\tTTTTTCTCCTTTTGCCCTTTCAGTATAGAGGTTTCTGAAGAATCTGATCCTGAGACCCTCGTTCTAATAGGTCAGTTCCAGACGACCAAAT\tAD@D?EE=EDE?BBBB@E@@DDEDDDCCCD@BBBBEFEBEFBBEECB??CDDBD7CEEECEDFFFFDFDFFEEDEDEDBEEFBFDFFFFDF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_574/2\t163\tchr12\t57128155\t60\t91M\t=\t57128540\t476\tAATGAGTATATAATAAGCATTAAACAATTAGCACAGTTATTGGTCCACAGCAAGTACCCACTAAGTGGTGGTTACTCTTACTATCATCATC\tDDDD?B@DDBDCDDDEEE=EDDCD:A9C?CC5?<?D@?DDFFFEEEFFFDEDBEDD=DACCCC@CBD?ADC??5B?@?A?AAAA=B5B:CE\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:82G8\nfoo_576/1\t99\tchr4\t21534749\t60\t91M\t=\t21535109\t451\tTTCTACTTCTACAATGCAAATCCTTTTACACTTAGACCAAAAATTATAGGTGTCAATAATCACATATCCAGAAAATTAGTAGATACAAATA\tGGGGGGGGGFGGGGGGGGGGGGGGGDGEGFGGGGGFGGDGEGEGGEDEEEBFFFFGGCDDGDGGGEGFFGFDEFFFGGGFGGFDGGGBDFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_576/2\t147\tchr4\t21535109\t60\t91M\t=\t21534749\t-451\tTGCCCACCACCACACCTGGCTAATTTTTTTGTATTTTTAGTAGAGACAGGGTTTCACTGTGTTAGCCAAAATGGTCTCGATCTGCTGACCT\t>B?CC@@@;>B'@6<4GE=GGG>D8GGGDGGDFFGGFEDGFGFBGGGEFGEFFFFEEFEFFGFFGFEGFGGFGGDFGGGDGGFGFFFEFFF\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:78T12\nfoo_578/2\t129\tchr2\t86499980\t37\t91M\t*\t0\t0\tAGGATATAAATAAATTTTTTAAAAAAGGTAAGAAATATTCAGCTTTACTTGGAATCTAAGAAAATCAGTTTTAAAACCAACCAGGTACAAA\tDDD5DDAEEEGEFCG?EEA?EFEEDDD:5DDDDD?DEEA:DBCCBFFDFFAE5EAF:FFFGEECGFGFGGA?DFFGG?FBDDDDDACD###\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_579/1\t99\tchr2\t225371035\t60\t91M\t=\t225371427\t483\tGTGCTAGAAATAAAGGATTGAACAAGGAGACACAGTTCATGTTTACTGTGAAACTTACATAGCAGTGATTGAAAAAAACATGACAAATGGA\tA?BBAEDEEEAE?EE5CCCCEEDEEBEB:EDDBDA=>C>>=CA;CDA:CC=?C@=C:C:?AA<5B?C5=BBACB=;B:=>CAA5:EA6EEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_579/2\t147\tchr2\t225371427\t60\t91M\t=\t225371035\t-483\tTAAAATTTGTGAGATTTGAGTGACATTTTCATGGCCCAATAAGAATCAATATAGTCATTACTTTCTCTACCACCATGCTGACATTTCATCT\tEAAAA->>@@:8.=B:;898;:+:8B8;9;;@6@@;;>@;;=:?>:9A-?5?5707@;@6?BB>B4A5B;CCB?5ADDDDE;;;;;;5;>;\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_581/2\t129\tchr10\t36765904\t37\t91M\t*\t0\t0\tAGCTTTGTAAAGGTGGATATAAAGTCAGTCTTGGCAGGGCACTGTGGCTTATTCCTGTAATCCCAGCACTTTGGGAGGCTGAGGCGGGGGG\tEGGFGGGGFGDDD@BDDDDDD?DEAD?DDD:DBDCEEAE-C-A>A=B4:=.@?######################################\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:88T2\nfoo_582/1\t99\tchr2\t205362768\t60\t91M\t=\t205363149\t472\tAAGAACCTGTGACCTCCTCAAGTCCTTTACATAAATTCTCTAACTTGGTGAAGGTGAATTTTTAGTTTTCTTCTTCTTTTTTTTTTTTTTT\tGGGGGGGGGGGGGGFGGGGGDGEGGGGGGGDGGGEGGGGGFFGGGGGGEGCDEECEC=E:ECCCC=CBBB?CCEFFDGDGGGGGGFEGGEG\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:75T13A0A0\nfoo_582/2\t147\tchr2\t205363149\t60\t91M\t=\t205362768\t-472\tTATAAAATAGTTCATGGAACAATTTAGACCTCTTGAGTTGCATTTTTCTACCAGCTCTTTATCTCTGGTTGTTTGTTTTGCAGTTAAAAAC\tB?=EDFADGEFEE?FAFEEFFAF?DFF=FFEGGFGGEDE5E;CEEEDFD@EGGGFGEDEB=EG?GFAGGGGGGGGGEFGGGDGEGGFFFBF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_584/1\t99\tchr6\t123435813\t60\t91M\t=\t123436181\t459\tCATCATTAACTAAATTTCTGTAAATTTAGAAAACATCATATCAAATAGCTATGAAGAAATGTGTCATATAGAGACATATGAATTTGGTGCT\tGGGGGGGGGFGFGGGGGGGFFGGGGFGGGGGGFGGEGDGGFFAFFFFFFF?GGGGGFDGGFEFFFDDEEEGBGGFDFFFFGGFGGGDEGDG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_584/2\t147\tchr6\t123436181\t60\t91M\t=\t123435813\t-459\tGTGTACTGGGGGTATGGGAATAAACAAAACAATGTCTTTGCCCTTCTGAAAGTCACACTCTAGTGGAGACAACATATAAAAAGCCAGTAAG\tFEC-EEEGGGGCGDDGGGGGDGGCGFGGGDEGGGGGGGGGDGGGFGFGFGGGGEGFFGCGGGGDGGFGFGGFFEFGFGGGGGGGGFGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_586/1\t99\tchr2\t99374000\t60\t91M\t=\t99374385\t476\tACTAACATGTTTTGTATGTTTTATATATTACATACTGCATTCTTACAGTAAAACAAGCTAGAGAAAAGAAAACGTGATTAAGAAAATCATA\tGGGGGGGGGGGGGGFFGGGGFFGGGGGGGGGGGGGGGG?GGFGEGFDGGDGGGGGEGGGEFEFEFEAFECA?AE?F=EEEGGDGGGAGGBG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_586/2\t147\tchr2\t99374385\t60\t91M\t=\t99374000\t-476\tGGAGAATCAGAACGCTGCTGGCACCCAGAAGCTTCCTGTGTGCTTCTTCCTGATCAACTCCCCTGCCTTCCCCTCCTACCCCCTGACCTTT\t>?+@A?.0282,77<5??8A=C<CEA=BD@ACCCABD5=EEEEEDBEEBEDCA@EC?BACCGG=EGGEEEGFEEFGCAECEGGFGEFFFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_588/1\t99\tchr1\t21048631\t60\t91M\t=\t21049019\t479\tAAAAAGTAAAAATAATAAAAATTACTATTAAAATAGTGAGATTATGGTTGACAATATTATGTGCTATTACATTATTTTAATAGCTACAAGA\tGGFGGGDGFFGGGDGFGGDEGGGGGGGGGFDGGGGGEGGGGFGGDGGEGGEGGGGGEGGDBDEFEGGGGFEGGEFGGFFFEFGG=D?5D##\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_588/2\t147\tchr1\t21049019\t60\t91M\t=\t21048631\t-479\tTCACAGTAATCCTGTCTGGAGTGCTCTTTCCCTCTGTTTCACACCCATCCTTGCCTACGAACCTGTTTCATTCCTACTGGTCCTTCGGATG\t9B5BAC5C?:CCAB5AB=AE=:BEEEBE:EEGAGGGEEBEEACDDDGGGGFFFFGDEFFFEFFDFEGGEFGFEFGFGEGGGGGGGGGFGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_590/1\t83\tchr6\t158851552\t60\t91M\t=\t158851161\t-482\tATTTCACACCATTGCCTTTGTGTAGAGAAATATTTCTTTTCCTGTGTTAATGAGCTATGTACTGAATATAAACCAGTGCATTTAAAGTAAT\t>:CAEBFCF?FFDFFFDD@GDGGBGFAGGGGCCCBE?FFEFGGGEGFGGGGGGGGGGDGGGFGGGGGGGFGFGGGGGGGGGBGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_590/2\t163\tchr6\t158851161\t60\t91M\t=\t158851552\t482\tACCCATTGAATGTATATCCTGAGAAAAATTGGGGCCAAAGAAGCAGGAAAATCTCAAAGCTCTAATGGCAGCCTAAATCCAAGAATTTCAC\tGGEEGGGGGDGGGGDGFDGGFGGFGFBEBFF=GEGDGG?DGFGBEFFBFFFFFFFDDGFEGAAGDF=F5EC####################\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:72A6A11\nfoo_592/1\t81\tchr18\t3888272\t37\t91M\t*\t0\t0\tTTGAGATGCTTGACCAGCAATATGTTCCAGAAAATATTAGAAAAATGGATATGTCTCTGTGGTAAACCCTGTCCTAGGCATCTTATATGTA\t#################BD?BCB5?D<974;>4<;@:2226>=B5=C=C?C==DDD@C@>?CC?CC59=@9?;;<5;9;+;;A<;;;-<>;\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_593/1\t83\tchr2\t153249797\t60\t91M\t=\t153249400\t-488\tCTGAGGCAGGAGAATCACCTGAACCCGGGAGGCAGAGGTTGCAGTGAGCCAAGATTGCACCATTGCACTCCAGCTTGGGTAACGAGCAAAA\t?5E:EED=FFFBEBCE:?@BB??CB<EBEEGEFEGGGGGFGGFGFGGGGFGGGGDGGG?GGGGEGGEGGGGGGGEGGGGFGGGGGFGGFGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:55C35\nfoo_593/2\t163\tchr2\t153249400\t60\t91M\t=\t153249797\t488\tGTCAGAAAAAGCTAGTCAACAAGACACAACAGTTTTAAAGAAATTCAATATACTCAAACTCAACCTTCTGTGTGTCATGGAGACAAACACA\tFAFFFFGFFGFEGGDGGGGGDGDDFGGF?GFGDEFGDFEFFDFGGFGGBGGDGDGFG?G=FDFDFEGGGAEEEECFDEFEGGEFBEB?EE?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_595/1\t83\tchr6\t20295563\t60\t91M\t=\t20295183\t-471\tTAAAATTCCATAACAAGATACATACAACACATCCTGCACAAGCATCGAAATGGCCCCTCCACATGCAACTAATGCATACAGTGGGTGTTCA\t?CBB:A=B?B?C@BBC>:==>>;:>?5>7<:=C@=@E=:EEFEEBFEGEGEGGDFGFEFGFFGGGFEGGGGGFGGFGGFGGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:79G11\nfoo_595/2\t163\tchr6\t20295183\t60\t91M\t=\t20295563\t471\tAATGTTTATTGCAAAGTTAAACTTATTGGGATAAAAATAACCTTTTATGTTTATTCCTCTAAAACATCTTTGACTACCATGACTTGCTCGT\tFGGEGGGFGGGGAFGGGGFEGFGGDGFFGGDFEGGEGEGGGGGGFGGGDGGGFGDGGGFGBFDFFGGFDGEDEGEFGGBFGEF==:DDBCA\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_597/1\t83\tchr18\t2385287\t60\t91M\t=\t2384901\t-477\tGTCCCTGGCTATTGTCTATCAAAGACTCCTCTTATAGCAGCAGCTGTAAACCGGCAGATGTGGAGCAAAATAAATCCAATTCAGATTTTCT\t######@,:BBEEEAE????@CBB8ADCCCECCEE?DBEEFFGGEFGGFEGGGEFGGGFGGDGGGGGGGGFGGGGGGGGGEGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:24C66\nfoo_597/2\t163\tchr18\t2384901\t60\t91M\t=\t2385287\t477\tCCCTGCTAAGACTGCATGGTCTCGGGTTATTTTTAGTGGTCTGCTTCCTTCTTTAACTTAATTTTGTGGTGATGTGAATAGAAAGGTTATA\tGFGEGFGFFGBGDFGDFFFFGGGGGFEFDFGFGGFGEGGGGFFGFGGGGFAGFGAGGGF??GGGGGG?FEFDFFEFEEF=ECEEED?A@:;\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_599/1\t83\tchr3\t25544180\t60\t91M\t=\t25543812\t-459\tAAGGGTTTTTTGAGGTTTTCGAGGGTAGATAGATGCTCTTGGCATTCCTCTAACAAACCTAAATGTGATTATTTCTCCTGAGTGACTAAAA\t>E?EBDEB5DEDDDC?BB@B?EFDDFE?BBCGEFEGBGCFGDGGEGCDDD:EEEAEEFFBFGDGGEFFFFBAGGEGFFFFFFFFBFGBFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_599/2\t163\tchr3\t25543812\t60\t91M\t=\t25544180\t459\tTGTAAACCCAGCAGTGGGCCTTTGTCTGTGACTCGATTTCCTCCTTAGGGAGACCTAGCTGGCGAGGCCTGCAACTGAGAAACACCAAAAA\tEDGFGGDEGBFFFF?EE?EDFFFFDEBABDEEAE?EFDFC?=ED?DEE?-5DCD=ECAEEEEEE=CADC::=@?D?5?#############\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_601/1\t83\tchr7\t72008981\t23\t91M\t=\t72008612\t-460\tTAAGTTTTATTTTTTGAGATCGAGTCTTGCTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTCACTGCAACCTCCACCTCCTG\t####A1;?BEE9>:>>146)%66*262/07(:0.1>(C>>>B@ADAB=D:7CCDACBE:EE<>=>6@?5D=:C:??EBEEEFBAFEFFFFF\tXT:A:U\tNM:i:2\tSM:i:23\tAM:i:0\tX0:i:1\tX1:i:1\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:3T16G70\tXA:Z:chr7,+74933732,91M,3;\nfoo_601/2\t163\tchr7\t72008612\t14\t91M\t=\t72008981\t460\tGGCATTTGGATAATCAGTCCTAAGGCCCCAGGCCCTGCTCTGTCATTGACTGTCATTTTGAGTGAGTCATTTGATCTCTGGGTCTTTTTCC\tBDGG?GFGGDF?FDFEGFGGE=EE=BEDE,?CCCCFFGGGB=DE:DDCCD?DBDDFDBF55C:C?B:A-5=8==.751/:6?,?=?CB<?5\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\tXA:Z:chr7,+72008612,91M,0;\nfoo_603/2\t145\tchrX\t136132778\t37\t91M\t*\t0\t0\tCTATTGGGTAGTGCTCTTGTCATGGTTGAATGAGATAATGTATGTGGTATACATTGCACATGTGAGGTACATAACCTGTGCTCAATAAACA\tC::5=AEEE:ED:DB::AB:AEE=B?FBEEDEFA?DDGFGAEACEEABDDDD-DCAAACC>5ABDBDEF:FCCCCAE5CBDE;D?CDFF?E\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_604/1\t99\tchrY\t5048249\t23\t91M\t=\t5048614\t456\tAGTATAAAAAAATGTACATAGCACAGCATAAAGGAGTCAATAAGTATAAGACTTCTTCTACATAGATGAACTCATCTAATCTTCTTATAAT\tFFFFFGGGFFGGGGGGGGGGFEGGGGGGGGGGGDFFEFFFFFDFEGGGGGGGGGGBDGGGGEGDB=FFFEFEF=EGGDFFGGGEGGEGDGF\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\tXA:Z:chrY,+91039763,91M,0;\nfoo_604/2\t147\tchrY\t5048614\t23\t91M\t=\t5048249\t-456\tCTTTAAAGCCTTTCCAGTTGTAGGTGAATGTACTTTTAAATATAGCTGATATAAAGTTTTGTCTTCTAATGATAGTGAAAATTATTGACAA\tD=5B=EC=CC=D=E=EBBEF=GGGGGGGFGGGFFDGDGFGEGDFGFFDF=FGGGGGDGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGFEGG\tXT:A:U\tNM:i:0\tSM:i:23\tAM:i:0\tX0:i:1\tX1:i:1\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\tXA:Z:chrY,-91040127,91M,1;\nfoo_606/1\t83\tchr6\t58885728\t60\t91M\t=\t58885351\t-468\tGAAAACATAGATAGAATCATTCTCAGAAACAACTTTGTGATGTGTGCGTTGAACTCACCGTCTTTAACCTTTCTTTTGGTAGAGAAGTTTT\tGGGEEEEEGGGGGGFGDED?DDBFGFEECBECGE?EGGGDGGEGGEGFGGGGEGEGEFGGGFGGGGGGGGAGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:67A23\nfoo_606/2\t163\tchr6\t58885351\t60\t91M\t=\t58885728\t468\tCATTGAAGCCCACAGTAGAAAAGGAAATAACTTCACCTAAAACCTAGACAGAAGCAATCTCAGAAACTACTTTGTGATGTGTACATTCAAC\tGGGGGGGGEGGGGFGFGGGGGGGGGGGABEGGGGGGGDGGGGGGEGGGGGFDFGGEGGGGEGGGGGGGGGGGGFFGFAGEGEGGGFE?EGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_608/1\t99\tchr13\t76961580\t60\t91M\t=\t76961935\t446\tATGTATCTGTGTGTTTACTGCTCTGCGGAAATACAAACTAGTATAAAACATACAAACTAGTATAAAACATGATCCTCTAGTTAGGGAGAAT\tA=DABDDD?D;;<;>?@>=C>B;@;C:5CC672@>289;422667;2<A9==;=92@99;:===;=8??9:;B=BAACABC5CA=9+)A36\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_608/2\t147\tchr13\t76961935\t60\t91M\t=\t76961580\t-446\tATGTCTCATAAAGCCAGTATGATGGAGATTTACCAATTTCTTTTTTGCCAGCCAGGTATCAGAACCCACTCTCTATGTTTGGGAAATGCTC\t###CB?DDCDC?DEEE=DDBD??2;;7*569.:=B8<8>BEA<<;;6773*;;:?@/7.67-;2=;D-=DDEAE;ECCC:CCCC:ABD?5?\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:2T47T40\nfoo_610/1\t83\tchr22\t32658358\t60\t91M\t=\t32657992\t-457\tTGGTCTTGAACTCCTGACCTCGTGATCTGCCCGCCTTGGCCTCCCAAAGGGCTGGGATTACAGGTGTGAGCCACAGCGCCCGGCTGGTTGT\tEC=?B??@B?CACA?B5>>?@BEB@AE=CBEDBFEDGFEGEEDFDFGFGGFFDGGGGAGFFDGGGGDGGGGGFGGGGGGFGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_610/2\t163\tchr22\t32657992\t60\t91M\t=\t32658358\t457\tCTAATTTTTTTGTATTTTCTAGTAGAGATGGGGTTTCACCGTGCTGGCCAAACTGGTCTCGAACTCCTGACCTCGTGATCCGCCCACCTTA\tGGEGGGGGGGGGGGGGGGFGGEEGFGGGDGGGGFGEGGGFEFGGGGFGGFGGG-GGFGFFG:GGFFFF=FFGGGGDEDAD,CAAA'B8?<C\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_612/1\t83\tchr1\t237354673\t60\t91M\t=\t237354277\t-487\tGGACACAGTCAGTAAACAAATAAATGGAGTAAATTCATATAATGGCAGATGCAGATAGTGCTATGGAGAGAAATAAAACAGAGAAAGAGAC\tEA?5BBEE::ED>@DD8@CD?BEEBEAAEEE?<B-7A<CBADDEEEEE=EED?EE5EEEDE?D?DD:ED@BA>>:===EEED?DEEAEEEA\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_612/2\t163\tchr1\t237354277\t60\t91M\t=\t237354673\t487\tAAAGTTGCCACGTAAGACTAAACATAATATGCCATCCAGTCCTAGGCTGAAAACTTTCTACATGGTCCCGAGCTTGGGCAATTCTTCAGTT\tE:EDDEFFF:BEEDDBBEDDE?BEDB:?DD=D:?ACCCCCFDDFFDBBDADDEEDGEGFDGDEGEFFDDFBA=EDEEDE?=-AAAD=:BAB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_614/2\t145\tchr13\t46459041\t37\t91M\t*\t0\t0\tAATTCACATACATTGTTTTTCCTTCTTTGTGAGGTTATTTTGTCAATTAAATGATTTCTTAGTGCCGTATGAGTTATAATAGGGTGGGTAG\tFFF?FEDD?BADDGGFGEGDFFD=FEFGGGGGDGGGEFFGGGGDFFEDFFFAF;FFFAF?EFDFFEGGAGGGGDBFFGF?GGGGGGFEEFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_615/1\t99\tchr6\t27942052\t60\t91M\t=\t27942427\t466\tTGAGGAAAAAAAAATAATCCAGTACAGAATGACACGGTCATATCAAATTGGCATGTAATAGAGACCCCATCTTGAAACTTGCTACTATATT\tGGGGGGGGGGGGGGGGGGGGBGGGGEGGGGGGGFGGEEGGD?GFGEGGEFEGGFGCFGEAFEDCCBEFDEGEAEEBFFFFEAGAEE@?DDB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_615/2\t147\tchr6\t27942427\t60\t91M\t=\t27942052\t-466\tGACTCTTGCTTGCCTTGCGTTACTCGATTAGGGTTGAGGGAGAACTGCCCGAGATTTTTCGAATGCAAGGAATCTTGCTGCCTGATCTAAC\tC2>?=?B?EEEDEEDEE:CEEEF?GGEGGFGGGGGDGGGDGGGBGGGGGGGEGGGFGGDGGGFFGEGGGDGFFFGGGGFGGGFGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_617/1\t83\tchr1\t102444187\t60\t91M\t=\t102443805\t-473\tCACATTGTGGTTTTTATTTGCATTTCTCTGATGATTAGTGAGGTTGAGAATTTTTCCATATGCTTGTTGGCCATTTGTATATCTTCTTTTG\tCDDGGGGGGGACEFFFDFFBFG=EFEFGGGEEGGGFDGGGGGGGGDGGFGGGEGGGGGGFGGGGGGGGGFGGGFGGGFGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_617/2\t163\tchr1\t102443805\t60\t91M\t=\t102444187\t473\tTTTCATTCCTTTTTATGGCTGAGTAGTATTCCACGGTAAATACATACCACATTTTCTTTATCCACTCATTGATTGATGGGCATTTGGGCTG\tGFGGGGGGGGFEGGEDGFGGEBE=ED:CCDAC5CCD?DDD=AC=CACC-CD=EEEAECC<A4C5C@@,;@=;@@.>=??A?-:BC######\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_619/1\t83\tchr10\t68684819\t60\t91M\t=\t68684446\t-464\tGGAGGCTGAGGCAGGTGGATCACCTGAGATCAGGAGTTCAAGACCAGCCTGACCAACAGGGAGAAACCATGTCTCTACTAAAAATACAAAA\tC:4?4:??=CB?AEFF>?>*;;?>:>:=@?@;?56=@?@C@B=EBEGBGGEEEEGGFFFFFF?GGEGGFGGGGGGGGGGGGGGEGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_619/2\t163\tchr10\t68684446\t60\t91M\t=\t68684819\t464\tCTTTTCTATGTTTAGATATGCTTAAGTAAATAAACACCATTGTGTTACAATCCCCTGTAGTATTTAGTAAGTAACATGCTGTACAGGTTTG\tGGGGGGGEGGGEGGGFFFGFGGGFGGFGGFGGGFGGGGGGGGGDGFGGF?FGFGGGGDGGEFGGGFFFF=EEEEEFGGDGGEGGGFF=FF?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_621/1\t83\tchr11\t88857527\t60\t91M\t=\t88857151\t-467\tTATTGTAAATAGATAAAGAATATAATTTCATCAAAGAAATATAACCCATTTCACCCATCAATAAACAAGAAAATTGTGTCTAAATAACAGA\tBFFFDEEEEEBE=DEEBEFEFDEGGGEE:GGEGFGGGGGGFFFEEFEEACEF@FFFEBEEEEGGDGGGFGGGEGFGGGGGGFFFFDEEEEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_621/2\t163\tchr11\t88857151\t60\t91M\t=\t88857527\t467\tATTCCTTCACAATTTATTGGCAAGTCTCAAACAACTATAATACTAAAAATATATTTTATTTTGAGAGTAAATCAATCATTTTCGCTAGCTT\tGDGGGGEGGFFGFGGDGGEEEEEEEFGGDEEGAFGDFGDGGG?GEGDD=BCCDFEG?AGGGG@GFFGGGGGGGEGG?GEGDGGDBB5EEEF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_623/1\t81\tchr22\t44321715\t37\t91M\t*\t0\t0\tCAGCTCAGCGATGTGGATGGAGTCACCTGTGAAGGTGCGGCCGCCCCTGCCTGCTGAGGGGGAAAGCACCCCCTGGTCTTGCCATGACACA\t#########################################CAB?DDDBABEEDC@DABEEBDDD>A2C?CDBDB5EDEDEEBEEEEEEEA\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:40A50\nfoo_624/2\t145\tchr16\t82783352\t37\t91M\t*\t0\t0\tAGTGATCCCCCCGCCTCAGCCTCCCGAGGAGCTGGGACGACAGGTGGACTCCACTACACCTGGCTAATTTTTGTATTTTTTTTGTAGAGAT\t###########ABCB:F-EFFDGGGGEFGAGEEEFFAE-BEDGEGDFFFFFDADCAEAEEEFDGDGBGBFGGGGFGFGGGG:GGEGGGGGG\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:6A1T29T52\nfoo_625/1\t83\tchr1\t21759810\t60\t91M\t=\t21759426\t-475\tATGTTCCTGGGAGATGGTGAGGCCCAGGGGCCTGTGGGAGGGGTGGAACAGGACACCTAGCTAGGAGCCCCGGGAGCCAGGCTGAGTTGAA\tC;><55CBE=EEFBAC><>>?'>.<5A>==?2DFCCEEEEGEEGF?EEDEEE?ECEGEGGEFGGGDFGGGGGGGGGFGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_625/2\t163\tchr1\t21759426\t60\t91M\t=\t21759810\t475\tCCTGTGCTGTGTGGCCTTAGGCAAGTTGCTCGTTGTCTCTGAGCCTTGGTTTCTTCATTCTGAGATGTGAGGAGAAGACCCCAGGGTCCAC\tGGGDFGGGBGGGGGFGGGDGEBFFEGFDGGEE?EDEDEE5AEEEEGGDGDGGFGGGFFGGEFFFFFD?DB:B?AAC:=C?EDDEEFEBDD:\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_627/1\t99\tchr12\t107001154\t60\t91M\t=\t107001506\t443\tATCTCTCTACAACAACATAGGCCTCCTCCCCTGCAGCCCAGGTTTTCAAAACCTGACCTGAGAAAGATAAAGAAAATGTTGCTTTTAGAGT\tFFFEFFFFAFFFFEFFFEFFDE:EDFFFEFEABFDFFFCFFF?EFDEEDCF?AEFFDFD?BC=BE?B?BDBAA?<@<EBEDD?BAEEBBA#\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_627/2\t147\tchr12\t107001506\t60\t91M\t=\t107001154\t-443\tACTCTCATGCCTTTGAATATAAATTGGTCCAGGTAGGAAAATTTTGCAGTATATATTAAAAACTTTGAAATATATGCACTTTGATCAAACA\t#FFBFFEDEE:DBAFBFFGEGEEEDBDDGGGFGGGGFGGDGDGGFGGFGGGFEGEGFBFEFFFDFEFFGGDFFFAEGGGEGGDGGGDGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_629/1\t99\tchr5\t104383077\t60\t91M\t=\t104383462\t476\tAAAACTGGATGGAACTGCATGAATACAAATATGACCAGATTACAAATATTATCCTAAAAAAAGAAGTTTATTAAATGTCCAAAATTGGAGT\tGGGGGGGGFDGGGGFGDGFGEDEDEGGGGGFGFGGGGGEGGGGGGGGGGGGAGGGEGFGGFGGEECCFFEFGEFFEFEFFGGGGFGEGFF:\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_629/2\t147\tchr5\t104383462\t60\t91M\t=\t104383077\t-476\tGAGAAAAGATGAAAGAAAAAATTTCTTACTGGTTTACATGGGATCATTACTTCCATTGATTCCATATTTACATTTAAAATAAATTTAAATT\tFFDDGG=GGAGDFGGGGEGEFGGFFGGFGGGGFFGGGGGGGGGFGGGGGGGGAGGGGGGGGGGFFGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_631/1\t99\tchr4\t33893505\t60\t91M\t=\t33893885\t471\tCATTTACAACTGCCTTAAGAAATGTAGCTATTGCCAGGTGCGATGGCTCATGCCTGTAATCCCAGCCCTTTGGGAGGCTGAGGCGAGTGAA\tFDGGGFGFGGGGFGGGGEGGGGGGGGGGGGGGGGGEFGEGGGFGEGGEGCEFFGECEFEEEEFEEGEGFEEDEBBBDAAD5?CBA######\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_631/2\t147\tchr4\t33893885\t60\t91M\t=\t33893505\t-471\tTTAGCATCTAAGAATAGTCTGTGCTATGGCTTATAGAAGAAGCAACACCAGAATTTCAGTGGCAATGCAAACAACGTTATTTCTTGCTCTG\tE:A?=?5EDAEEFFFFD=D=?DDDD:BGGGDFGG?GGGFGGFBBFFFFGGFGFEGGGGGGGGGDGFFDGGFGGFGFGEDFGGGGDGEGGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_633/1\t99\tchr1\t74251269\t60\t91M\t=\t74251637\t459\tAGTTTAGGACAGTGCCAGATGTCAGCTAGTGAGAGATAAAAGATGAAAAGTTCCTCAAACAGTGTTGTAATGCAATAAGCTACATTATACA\tGGGGFGGGGGGGGGGGGGGGDGGFGGEGG?GGGFGFFGGGGGFAGGGGGGEGGFGGGGGGGGEFEGGGEFFGFB=GGGFFGGGGGFFGDGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_633/2\t147\tchr1\t74251637\t60\t91M\t=\t74251269\t-459\tTCCTCTCCCTACTTCCCCCCTTCACCCTCCAATAGGTTCCAGTGACTGTAGTTCCCTTCCTTGTGTCCATGAGTTCTCATCTTTTAGCTTC\t####?5CCA?>=>.=@%>A;:?D2DDDFFFFGGGFGEGGGGGGGFEGGDGGFGGGGDFFFGGGGFEGGFGGGGGGGGGGGGGGGGGGGGFG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:16A74\nfoo_635/1\t99\tchr7\t96851416\t60\t91M\t=\t96851798\t473\tGTTAGCCAGGATGGTCTTGATCTCCTGACCTTGTGATCCGCCCGCCTCGGCCTCCCAAGGGAGGGATTACAGGTGTGAGCCACTGCGCCTG\tEEEDDEEEEEFAFFFDEFFAFFFBEFFDFFFDFFFFEFDEEB?>BBBB@=:BAABA###################################\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:9A28T52\nfoo_635/2\t147\tchr7\t96851798\t60\t91M\t=\t96851416\t-473\tAGGGAAAGGAAGAAAAACTAATAGATTAACATGGACATCTAATCTGCCTTGTGAAAAGAGAAATTTTCCTAAATTTGCTCCATCTTTCCAA\tCF=FDFAFE3CDD=B:=@>;;DDDDBA:A?A?BF?GEEEAED?D=-DGGFFGFDGDFFBBFDGFGG?DDEEE:EEE:5AA:FGGGFGBEE:\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_637/1\t83\tchr5\t20705782\t60\t91M\t=\t20705411\t-462\tTTTCATTAAAATTTGCAATCTTCAATCGAGAGGTCCTACAAACTTTTGTTATATTTATTCTATGCGTTTTACATTTTTATGCTATTGTATG\tGFGFGGGGGFGFGGGEFEDEFEGGGEGGGFGBGFGGEFGGGEGGGGDGGGDGGGFGEGGGFGGGGGGGGFGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_637/2\t163\tchr5\t20705411\t60\t91M\t=\t20705782\t462\tTAAATAAAATCATATGGTTTTCCTTCTTTTGTATCTGATTCTTTTGATTAATATTAAGAATAGGAGATTCATCCACACTGTGGCAATGAAT\tGGGGGGGGEGGFGFGGGEGGGGGGGFGGGCED?FFGD=GGGGGGGGDGGFGEBGGGGGDGGGGGGDFAGEGGDGGGGGFBBBEEDFEEGEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_639/1\t99\tchr8\t141319749\t60\t91M\t=\t141320136\t478\tCACTCCTGGTTAAGACCTTCCTGCCTCAACTTCGGGTGGTTACACTTACCGCCTGCCATACACAGGTAGGTGAATCTCAGATGACACTCCC\tGFGGGGGDGGFGGGGGGFFGGGGGGGDGGEGBGFGGDGDDEEEEEABEAEF?BFFE=:BEEEEA:?@@B@:????################\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_639/2\t147\tchr8\t141320136\t60\t91M\t=\t141319749\t-478\tAAACAGAGTTTCCCATCATACCAGCCTTCTTTTAAAAAGTTGTCAGCGGCTCTCCAGTATCTTTCAATGAAGTCCAAGTTACTTCTGGAAC\t@B=B??A5BCAEE?EEECB@CECC=>>6DC=GDFGFFFA=FAEEEEADFDEFDFBFE=B=EBADEEFABFFDDFFFE=EDEDDBEEFGGGD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_641/1\t81\tchr2\t91684006\t29\t91M\t*\t0\t0\tCAGTTTTGAAACACTCTTTTTGTAAAATCTGCAAGAGGATATTTGGATAGCTTTGAGGATTTCGTTGGAAACGGGATTGTCTTCATATAAA\tA>>>@A.101+5./&-05:>:?=,?BBA?DEDFEEE??C=CBBBC?C=EBA@@?=@BBCEEDBACBCBECBDBBBBFFCBFFFDAFFFFEF\tXT:A:R\tNM:i:1\tSM:i:0\tAM:i:0\tX0:i:5\tX1:i:6\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:63A27\nfoo_642/1\t99\tchr2\t196243843\t60\t91M\t=\t196244217\t465\tTTTTACTAATTGAAGGTTGGAATAAACTTCATGTGGGGAATAAAAATTAAGTTTTGGATGAAATATAACACAGCTACAGAAAAGTACTCAA\tGGGGGGGGFGGGGGGGGGGGGGGGFDGGGGFGGFGFGGEG?EEEAAGGFDGDGFGGF?GGEEEFFCEDBDEEEEEGGGDEADFFEFAGGEF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_642/2\t147\tchr2\t196244217\t60\t91M\t=\t196243843\t-465\tGTAAATTAGCCCCACTCTTAATGCAGAAATTATACAGTGATAGAATACTTAGGCTTTTAACTGTGGTTATGTATTGGAAATCAACAGCAAA\tDACDB?EFBEFFFAFEGGFFGGGFGGGFGFGGFGGFGDGGGFGGGGGGFGEGGGGGGDGGGEGGGFGDGGGGGFEGGGGGGGGGFGGGDGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_644/1\t99\tchr3\t179457661\t60\t91M\t=\t179458030\t460\tCTTGTATTCAGACTATATAATGAATTCTCAAACAACAAACAATGGAACATTAATAAATTAAAAATTGGACAAAAGATTTGAACATTTCACC\tGGGGGGGGGGGGGGGGGGGGGGGFGGFGGFDGGGGGGGGGGDGGGGGGGGGGEGGGGGGGFGGEDGFGGGFEEEGDGGGGGGGGGGFFGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_644/2\t147\tchr3\t179458030\t60\t91M\t=\t179457661\t-460\tTAGTATTTGGAGATGAGAACTTTGAGTGGTTATGAGATCATGAAGGTGGAACTGTCATGATAGGATTAGTGCCCTTCTAAGAAGAGACATA\tEGGEBEGEGFEGGDGGFGGGGGFGGGFGGGGGGFGGGAGFGGFGGGGGGGGGFGAGGGGGFGGGGGGGGGBGGGEFFGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_646/1\t99\tchr5\t92679006\t60\t91M\t=\t92679385\t470\tGGAGACAGAAATAAGGACTGACAGAATGACAGAGCTCAGAGACGGCTCTTGAAAAGAGCAAGAAAGAAGCTGGCGGAATCTACTTGCTATT\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFGEGGEGFGBBCB?FDDFFEEEA?ACB5??(=.=;9=:>???<?C\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_646/2\t147\tchr5\t92679385\t60\t91M\t=\t92679006\t-470\tAACACCCCAGTTAAATATAAATACATCCTTAAAATACATTGCCAGGATAGAACATGTAAATACAGAGACATTGAATCTCATCTTTTTTCCT\tFEFCFDGEEGGGFGGGDGGBEGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGFGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_648/1\t83\tchr6\t73950495\t60\t91M\t=\t73950125\t-461\tAACAGGCTCTTCCCTCTCTTTTCCCTGGACGGTTTTGTTTTGCTATTAGGTCTTTCTTTACTTTTGCTGCTTATCATGTTGTAGTATTACA\t#?A:;??BCB:DCEEE9=:26;A??<:;9A2=??5DE=B?E?C?BCADDEECCEEA:FFFDFFFFDDFFBFEDAEDFFFFFCFFFEEEEEE\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:30A60\nfoo_648/2\t163\tchr6\t73950125\t60\t91M\t=\t73950495\t461\tTCTCTCTACCTTCTATTTTGATCATGAAAATCGTGTACTGTCCTGAAAGAAGTGAGAGGGGATTGGGTGGAAAGAGGGAGGCTGAGATAAT\tFGFGGGE?DGGGGBGFDDFFADE=D;=@B=DDD@DECB:D>:@@@AC=:?CC?C=::>?-A5CA?DAADDADD544A?#############\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_650/1\t65\tchr1\t233845047\t37\t91M\t*\t0\t0\tGTGTGGGGATATAGTCTCCTCTGTGTGACCATTATCATGGATTATCCTGGTGGAACCAATTACCATGGATTATCACAAGGGACCTTGTAAG\t757577=7767;2606887;27>5,<>5?<====9ABAA:,=@@@3*872);;-<:6*40?::<<,=B5=>>41+898<:?:C-?66>6<=\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_651/1\t99\tchr14\t29389867\t60\t91M\t=\t29390236\t460\tAATGGGACAGAATAGAGGACCCAGAAATAAAGCCACACACAACCAACCATCTGATCTTTGACAAAGTCAACAAAAATAAGCAATGGGGAAA\tGGGGGGFGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGDGGGFGGGGFEGGGGGGFGGGGGGGGBGDGFEGFEDCFGAGGGGGEGBCFEFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_651/2\t147\tchr14\t29390236\t60\t91M\t=\t29389867\t-460\tGTAAACAGATAACCTACAGAATGGGAGAAAATATTCACAAACTGTGCATCCTACAAAGGTCTAATATCCAGAATCTGTAAGAAACTTAAAT\tFFFEDBFGGDGFFFFFFBDFFDGGGGGG?EGGGGFGGGGGGGGGDFGGFGGFGGGGGGGFGGGGDGGGDGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_653/1\t83\tchr3\t47502467\t60\t91M\t=\t47502081\t-477\tTGTGCTGTCCGTTTTAAGCCAACTGGGATTTGAGGGCTGTTTGTTACTACAGGAGAGCTGTAAGTGGTTACGAGTCAGGATCGTAACCTCA\tC?BA-AGEFGDFFFFF?CD?CCEBECCEEEAEEGGFF?FFFAGBGGGGGGEGGGGGGGGBGGGFEGFGGGGGGGGGGGGFGGGGGFGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_653/2\t163\tchr3\t47502081\t60\t91M\t=\t47502467\t477\tAAGAAAGAAATGTCCAATCACTTGAGTAAGATCACTATAGTCAGTTATTTCGTTGTTGTCTTCTCTGTAAGCTGATCAATCATACACATGC\tGFGGGGGGGGGBGGGGG?GGGGGGGGGGFGFFGGGGGGGGGGGGGFFGDGGGGGGFGGGGGGGGGGGGEFGGGG5DFFFDGGEGG:EEDFE\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:2A0T87\nfoo_655/1\t83\tchr7\t140220044\t60\t91M\t=\t140219671\t-464\tAGCCCGGAGGAAAGCAAAAGCCAGGTCAGGCAACATGGTGGCCTGAAGTTTGAATATACTCCCTGATACACAGATAGACACACAAAGAAAG\tBEGGBFFGGEGFEGAGEDEDEFFEE@EDEEFFEGFDGGEGGEEEBEFFF=DFFFDFFDFFFGGGGFGGGFGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_655/2\t163\tchr7\t140219671\t60\t91M\t=\t140220044\t464\tTCAGGTGATCCATCTGCCTCGGCCTCCCAAAGTGCTGAAATAAGAGGCGTGAGCCACCATGCCCAGCCATCTGTGGCATTTTTGCTTGAGG\tFFFFFEGGGGGFGGGGGGEGGGEGGGGFDGGGDGGEGGGGGEGGFGGEGFGEG?G:CEE?FAFF?GGEFGGCG@FGGF?GFGGGEFE=AE?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_657/1\t83\tchr6\t136312167\t60\t91M\t=\t136311784\t-474\tTAGATTTGAATTCACTTAAAGTAAGATCAATTATCTTTCCATCTCTTCTAGATTATGTGGTATGGTTTATATAGCTGCCAGAAAACAATCC\tBFF?DFEGGGGEGEGFDFGFDEEGEFEEGDGDFEFFFDFDE?FDDEEAEGGGGEGFGGFGGEAGEGFGG=GGGFGGGGFGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_657/2\t163\tchr6\t136311784\t60\t91M\t=\t136312167\t474\tAAAAAAATCCTTTGTTAAGCCATGTGTCCCAATTTTGGATTTGGGAAACTGTTTTACCATCACATCAAATAATCTATTCAACAATTAACTC\tFGGGGGGGGGGGGGEGGGGGGGFGGGGGGGGGGGEGFGBGDFFFEGGFEAGGGGF?EFFEGFGGGGGGFGGGGGDEGDGEEGFGGGGGFG:\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:31G59\nfoo_659/1\t99\tchr5\t29693887\t60\t91M\t=\t29694260\t464\tGGAAAAAAGTTAAAAATAGCATAAAAATAAAATTTAAAAAAATTGTGTTAAACATTAAAAATACACAAACCTAATGGTTGGAAAATGAAAA\tGGGGGGGGGGGGGGGGGGGGGGFGEGGGGGGGGGGGGGFGGEGGGGGGGGGGFGGGGGGGGFAGGFFG?GGGEDGDFEFFGFGGFEEGDFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_659/2\t147\tchr5\t29694260\t60\t91M\t=\t29693887\t-464\tTTTGTAAATTTTATGACATTTATGAAAACAGTGTTATTGAATGCAAATGTTTAGCATATGTTCATATTTTAAAGGGATAGAGACATGGAAA\tGGGFGGG>GGGFGGFGDGGFGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGDGGGGGGGGGGGGGGGGFGFGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_661/1\t99\tchr7\t142210459\t60\t91M\t=\t142210832\t464\tCTTCTCTCTTTTTTCTGTTTCCCTGAAGATTGAGCTCCCAACCCCCAAGTACGAAATAGGCTAAACCAATAAAAAATTGTGTGTTGGGCCT\tGGFGGGGGGGGGGGGGGGGGGGGGGDFGEGGFGGEFGGDGGGGGGGFEGEGFGGEGF?EEDDGGE:DGCEBGGEE:5C@:EBCBCD?FEFE\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:77G13\nfoo_661/2\t147\tchr7\t142210832\t60\t91M\t=\t142210459\t-464\tCCCAGTGCAGGACAGAGGATGCGGGCAGACCTATGGGTTACAATGTCTGGTCATTTCCCAATTCCAGATTAAACTGTCACCTGTTTTACCT\t?DFB?FEFAFBAGGGGFEDGGGGGGGFF5FDGGGGGGGGFGGGGGFEGGGGGGGGGGEGGGGGGGEGGGGGFBFFFGGEGGGGGGGGDFDG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_663/1\t83\tchr10\t46796347\t20\t91M\t=\t46795944\t-494\tCGATTCTCATGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGTGCGTGCCACCATACCTGGCTAATTTTCGTATTTTTATGGGATTTTACC\t;C@EACDGCDDECCDA?EACECEFDEEEFBGFGGEGFCEEGFEFDGFFFFCEEBECFFFFEFGGGGGGGGGGGGGGGGGGGGGGGGGGEGG\tXT:A:U\tNM:i:0\tSM:i:20\tAM:i:0\tX0:i:1\tX1:i:2\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\tXA:Z:chr10,-46314267,91M,1;chr10,-48704017,91M,1;\nfoo_663/2\t163\tchr10\t46795944\t20\t91M\t=\t46796347\t494\tGTAAATGTCGTATATGCCTTTTAAACATTTTGGAGATTACCCATAGTAGGCCTGTCCCTTGGGTAAGGCAATTAGGGCAGCTTTCCTGGGC\tE5EEEFFFFFGGGGDGGGGGGEGGGGGGFGGFGGFBFGEGGGGGGGGGGGGGFGGGGGGGGGG=FGGEGGFGGGEBGE?GBEFGDGBEDFE\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:3\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\tXA:Z:chr10,+48703612,91M,0;chr10,+46313861,91M,0;\nfoo_665/1\t83\tchr5\t150028023\t60\t91M\t=\t150027640\t-474\tAACTGCTTTAGTGAAAATTACCAATGACCTTGTATTGCTAAATATAATAGTGGATTTTCAGCCCTCATCTTCCTTGACACATCAGCAGCAC\tGEGGGGEGGEGEFGGGEEGBGGGGEG@GGBFGGFGGEGGGFGFGGGGGGEGGGGGGGGEGGGGGFGGGGDFGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_665/2\t163\tchr5\t150027640\t60\t91M\t=\t150028023\t474\tATAATAAAAGGGCATTTCCTTACATGCCACAAAGATATTAGCATACTGAACAAAATTTGCAGTAATGATTTGATACCATTTAATAAGCGGT\tGGGGGGGGGGGFGGGGGGGEGGGGGGGGGEGFGGGEGGGGGGGDGGGGEGGGGGEFGGGGGGGFGGGGGFDGGGGGGGGGGGFGGGGF?FE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_667/1\t83\tchr5\t130967669\t60\t91M\t=\t130967305\t-455\tAGTTTATGGGTTTTCTCCACAGCAGCTTAGGCCAAATCAAGATTCCTGCTCCTTTCACTTGTATAATACAACACTATAATACAACATACCA\tE@@A-CEECB=DDBC:<7;??B(<-=A=CC@?-5A==CDEBDC=DDEEEAEBDDDDAAECEEEDEE=DEEEDDDD?DDDDDEEDEED?BA=\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:22G68\nfoo_667/2\t163\tchr5\t130967305\t60\t91M\t=\t130967669\t455\tCAGTTTGCAATAAAAAATATTTTGAGTATTACAAAATGCTAATTTTTCAGTTTTCTTTCTTCCTGATATAATTATTTGGCATACACTAACA\tDDDDDAADD=D:D>@>@@6@DB??:DCDDDBEBAB?EEEDECDEBEEA?EDFFFEFGGFFBDEEEDEDEEEDEEEDGGGFDFDFD5FDDAG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_669/1\t83\tchr9\t88092094\t60\t91M\t=\t88091712\t-473\tTGTTTAAAAAAGAAGGGCACAGGGATGTGCGTCTTCTGAGTCCTCAAACATCCAGAATGTTCGTCCAGCAAGCACCCCCTGAGCCTCCCTG\tECCC=A>>BC:CDEFECCACCE?CCCE@EEEEAEDEEEEEEFD??FGEFBEGGGGGGEGGEGGFGGGGEGGGGEGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_669/2\t163\tchr9\t88091712\t60\t91M\t=\t88092094\t473\tAATTACAGGGATAAAATTAAAAAACAGAAGCACCCAGGCAGAAAAACAAAACCTACAAGAGAACAAAAATCAGCCTGGTCTTAGACTTGTC\tFGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGFGGGGGGGDFFFGGDGGGDEGGFGGEGGGEGFGDDDEE=GGFGGGFEFD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_671/1\t99\tchrX\t34997401\t60\t91M\t=\t34997773\t463\tTATGGGCAAGGACTTCATGTCTAAAGCACAAAAAGCAATGGCAACAAAAGCCAAAATTGACAAATGAGATCTAATTAAACTAAAGAGCTTC\tGGGGGGGGGGGGGGGGGFGGGGGGGFFGGGGGGGGDGGGGFGGGDGFGGEGGGDGFGGGGGGFFGFDFADEECEEGGGGGGGGFGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_671/2\t147\tchrX\t34997773\t60\t91M\t=\t34997401\t-463\tTTTGGAACGTCTTTAGAATTTTTTCAAATATGAGATCATATCATCTGCAAACAAGGGTAATTTGACTTCTTTAATTCCAATTTCAGTGCCT\tFF=GGGFDGGFFDGGGGGGBGGGDGGGGGEGGGGFGGGGGGGDGGGGGGGGGGGGGGFGGGFGGGGGGGGGGFGGGGGGGGGGGGGGEGGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_673/1\t99\tchr8\t104387376\t60\t91M\t=\t104387743\t458\tGTCATGTGTTCATTCATATTTATTGAATATGTACCTGAATATGTAGCTTTCCCAGAATTACAGTTTTTCAAGTTCTAAGGTTTACATTAAA\tDBDDDDBDBDD:DDD7C1CBD=DDD>@C,CDBBDDBD=BCCBCBCD@BCCBCB:BBBBBBBBC@BBBBBB?B@BBDCCDDABDBBBBCBBC\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:48C42\nfoo_673/2\t147\tchr8\t104387743\t60\t91M\t=\t104387376\t-458\tCCACTTTCCTATACAGTAGTCCATTGACATTCTCAAAGAATTTACTCAAGACTTTTGTAACCACTTAAACAGTATTTGCCCAGTGAAGTCT\tCB-AAA=EBACDAEDFFDGEGDFFFFBDDFDGGEGGGDGEGEEEEEEFFFFBFFFFEGGGGGGGGGGGGFGGGDGGGGGGGGGDGGGGGG=\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_675/1\t99\tchr3\t85782589\t60\t91M\t=\t85782968\t470\tTAAATGATATGATTACAGGTGATTTATTTTCTTCCTGGGACTTAATGAATATGTATTTTGTTTAAATTTATAAAGCACCATAAATGTTAAT\tGGGGGGGGGGGEGGGGGGGDEEEEEEGGGGFGGGGFGGEFFGGGFGGEGGGGEEFFGGGGAAGCEEFEGFGEGE:GGGGGFGGDBGEGFEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_675/2\t147\tchr3\t85782968\t60\t91M\t=\t85782589\t-470\tGCATAAACTGCCATAGCTTGAGTGTAGAATTATTTAATTTATTTCAAAAATTCAAATCAGAATTAATGTAATACATTCTGAAAAATTCAGT\tGFDDDEDFFDFGAGGFG=FGGGGFGGDEGGGGGFGGGFEGGEGGGGFFFFEBGGGGGFGGGGGGGGGGGGGGEGGGFGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_677/1\t99\tchr9\t100178052\t60\t91M\t=\t100178403\t442\tATAGCTACTTCACTTTTTCTCCTAAAGTCTTACGTGTGTTTATTTTTCTGATTTAACATCTTTCTTTTTCTCCCCTTAGCCATCAGGTACT\tGGGGFGGGGGGGGGGGGGGGGGGGGGFFGGGFGGGGFGGGGGGGGGGGGGGGFGGFFGGDFFGFEGGGE@GEEGGGFEBFGGGFEGEDFFA\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_677/2\t147\tchr9\t100178403\t60\t91M\t=\t100178052\t-442\tTGGCAGGCCTGTTCTGACAGATGACGGATGGGACTGCTGTGCTCCGGGTCTCCTTGTTGCCACAGATTCATGTGGGCTTCAGGCCAGCAAG\tDFFFE:EEEABG5?BFEFEGEEGEGGGF?GGFEFF?GBDDDFFEFFEE:EEGFGFGGGFEGEGGGGGGGDGGBGGFGGFGGGGGGGGAGFG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:26C64\nfoo_679/1\t83\tchr3\t104658691\t60\t91M\t=\t104658326\t-456\tTTTCAGTCATAGAGAGAGTGTATTCATCATTAATAGATTAATGCTCTCCCTAGGGGAGGGATGAATGAATTCTCACCTAGTTAGTTCCCAC\tFE=EAEFD=BDEE?EEEEAEEECBECBCDB5FD=FFGGGFGDFGGFFFFFBEGGGFGGGGGG?FGGEGGGGEDG?GGFGGGGGGGFGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_679/2\t163\tchr3\t104658326\t60\t91M\t=\t104658691\t456\tAAAAGAACCAGTAGGAGACACATCCACACCCACACACCCCCACCCCCACCACCATCCACACAGAGCAAGGCATTGACTTACACCACTCTGG\tAFFDFECFGGE:DEEFDFFFFFGFGGDGGGFFFBFGGAF>@@<@?BCDG@>E-EDDEEEDGGGDGFGAEDEAD:?AAA5ADDDDD=EDEEF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_681/1\t83\tchr10\t758714\t60\t91M\t=\t758338\t-467\tAAAGATGACGCAGGCAGAGATGTTCTCCTGTCCTTACTCAACACCACCTGAAATGCTGGTAACAACTGTCCATGTGGCAGTGACTCCCACT\tFCBCA5BB?EECGFFGCBCB?E=BEAEBEE?A?CBAA@AD@ABEEBFFFFFGG?EGFFFFFDDDBD?FDGGFGGFGFDGGGFFFFFGGAGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:11G79\nfoo_681/2\t163\tchr10\t758338\t60\t91M\t=\t758714\t467\tTCTGAAGCTCAGCAAGAGAGCGTGTGGCCCCCAGGCGCCCTTGCCCAGAGGTGTCCAGTTCCCTCTCGCTGGCCTCCATCTGGTCTGGACT\tGGGGGGGGGGGGGFGGFGBAFF?DFGFBFFGG=GGBFFDF:EEEEFFF?FD:DADEEDCCFDFFFGGCFFADEF:FFFEFEFF:FEEBAE5\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_683/1\t83\tchr22\t23283893\t60\t91M\t=\t23283507\t-477\tGCATACCAGTCAGGTGCTGTTATAGGCTCTGGGGGTGTGGGCATGAACGAGACACATAATCTTGAGGCAGAAAAGCCATTTGGAGGTTTGG\t@BBB,CCDFBDEFFFEAFEEEDDCF?CEC=GEEEFEEGGGEDEEEG?EGFGGGEEFGFGGGGGGGGFGDGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_683/2\t163\tchr22\t23283507\t60\t91M\t=\t23283893\t477\tCAGCTTTGGTGTTTTTGCTAAGTTTTATGGAGTGCCAGGCCTTAAAGGAAGCATGATGTGTGCCGCATGCCTTTTCAGACTCTGAAAGATG\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDFDFFGGGGFFGGGGGGGGGGGGGGFGGGGFGGEGEEEDEGGGGEGG?GGAGGGGGFAGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:64A26\nfoo_685/1\t99\tchr13\t112558128\t60\t91M\t=\t112558510\t473\tGGATTTGTAGCATCTGTCGATATCCAAAAACAATACAGTCTTCTAAAATCTATAGATTTAGTGATTGAGCAATCTTTCTGACTCACGTACT\tGGGGGGGGFGGGGGGFGGGFGGGGGFGFGGGGGGCGGGGGGGGGFGFGEGFEGBG:EEECGEGDAEACE@?AAAACDBCBBGFFEAEBAED\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_685/2\t147\tchr13\t112558510\t60\t91M\t=\t112558128\t-473\tCTGTTAAAAATAGAAGCAACCGTGATAGAGATTTAAAACGCAGATCTTGTTTTGCTTGTCGGGCCAGCTTCTGTGATTAGTGGGTGATGAC\tGFDE>CFFF?=EEG=EGGBGGGEGEFGGGGGGA?EFFEEEBFFFFEEFDBDGBDGFFG?GFEFGGCGGGGGFGDGEGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_687/1\t83\tchr16\t53565823\t60\t91M\t=\t53565447\t-467\tAGTGCCAAACACATTTGTACTACAGAGCTTCCCAACTTTAACAATGGGGCATTCTGAATCTTTGGATTTAAAAAGTCATAGTTCTCAAATT\tGGEEEGFEEFEFFFFEFDDDEEEEGEGEEGEEGFEGGEGGEGGGFGGGGGGGGGGGGGGGGGFGGGGGFGGGGGGFGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_687/2\t163\tchr16\t53565447\t60\t91M\t=\t53565823\t467\tTGAATAAATCTTTCTTATATTAAACCACTGAGGCTTTTAAAGTTATTCGTTGCGCAGCATGGCCTAGCCTGTCCTGACTGACACACTTAGT\tFGGGGGGFGGGGGGGGFGGGGGGGGFFFEGGGEGGGGGGGGGGGGGGGGGGGFGDGGGGGEGEFGGGEG?GGGGFGGGFBBGGB?FDFEF:\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_689/1\t83\tchr1\t19767185\t60\t91M\t=\t19766786\t-490\tGGAAAAAAAAGTCATTAAAATGTCTTGATCATGCCAGGAACTCTGCCAGGAACTTTATATTCATGACCTCATTACTCCTCACAATTGCAAA\tD?;ACEAEE-ECDCE=BBB?E<?A?=A?:C:??AA=ACC5CBE=EB5CD?BADED=EEEEEEEEEEDDEEECC>=CAB:AA=DDCDDEEEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_689/2\t163\tchr1\t19766786\t60\t91M\t=\t19767185\t490\tCATGTTGGGAGACCAAGGAGAGTGGAGTGCTTGAGCCCAGGAGTTACCACCCTGGGCCACATGGTGAAACCCCGTCTCTACAAAAAAAAAA\tDDADDDDD=?EE?EDCA:->-4'6424,4:?<9<:7B7;=6.>/26?665B;?>AD5C5?,1530>29>9CCCC:?AA?A###########\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:18T72\nfoo_691/1\t99\tchr2\t135944461\t60\t91M\t=\t135944864\t494\tCACGCCTGTAATCCCAGTACTTTGGGAGGCCGAGGCAGGTGGATCACCTGAAGTCAGGAGTTTGAGATTAGCCTGACCAATATAGTGAAAT\tGGFGGGGGGGGGGGGGGGGGGEEGGGEGGGGGGGFFFFF4BCAACDEECECBBCDECE?C9AAA@;9':=98;8<?AA:@:><>;?@=@@A\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_691/2\t147\tchr2\t135944864\t60\t91M\t=\t135944461\t-494\tTGGAGTCTCACTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCAATCTCGACTCACTGCAAGCTCTGCCTCCTGGGTTCACAGCATTCTCCTA\t############??CC=A@A?AD:@BC@@?@CDCE:GDCBGGG?GGFFFFDFEGGGGGEDEFFFFDGGFFFGGGGGEGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_693/1\t83\tchr12\t106669874\t60\t91M\t=\t106669496\t-469\tTTGCAGTGTCCAAGGCCACCTCATGGCCATCACTGGGATGAAGAATTTGGGCACCATGTGTGTTCACAGAGCGAGAGTCTATTGAAACAAT\t###############?4';8:>5=@@3,=6++7777:B;9>?:6*??AA:?8.665BAD=D:DDBD:A::ADADBDBCDDDDD?DD?DDDD\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:4T39C46\nfoo_693/2\t163\tchr12\t106669496\t60\t91M\t=\t106669874\t469\tGGAGGGAAACATATTCTAGGGAGGCAAGGTTGTGTGAGGTAGAGAGTGCCACGCTTACAGAGTTGGTGCTCATGGCCACCGTGTGAATGGG\t776,++:;77;-=BBAAAAAC5C??=?B>4==?B8A:AA?:AAAA?-;9;5>-5BC5A<:;B7B###########################\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:11G67A10A0\nfoo_695/1\t83\tchr22\t31739572\t60\t91M\t=\t31739200\t-463\tAATTTTGAAATATACAATGCATTATTTATAATGCATTATAGTGACTGTAAAGTCACTATTCTGTGCAAAAGATCACAAGGGCTTATCTCTC\tEECCE:BEECBCBB:EBA:BBCCEDE?EDFECCBBCEBEFGEEEGEEEFFFGEGEGGGGGGGGGGGFFFFEGAGEGGGGGFFGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_695/2\t163\tchr22\t31739200\t60\t91M\t=\t31739572\t463\tAAAGGCTTCTCAGGTAACTTCTGCCCACTACAGGCCCCAGTGGAACTTACAGGCTGGCAAAGGGAGGCCAATCATGAACATGCCGACAAAC\tGGGGGGGGFGGGGGEGGEGGG?FGGGDGFFEEEEEGGGFGEGGGGGGGGFDE?DGEE--A?1<?->?########################\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:83A7\nfoo_697/1\t99\tchr4\t81869042\t60\t91M\t=\t81869422\t471\tTTCACTGGTGTGTGTCTACCTTTCTAATATCTTGCTTTGCCCTTTGCACCAATGATATCATCAGTTTTTAAAAGATGCAATACCTAAAAGC\tGGGGGGGGGGFFFGGGGGGGGGGGGGGGFGGGGEEGGGDGGGFGEAEEFGEBEEE?EEECEEBECDFFFDFFDEFFAFFBGEGGD?FF5DF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_697/2\t147\tchr4\t81869422\t60\t91M\t=\t81869042\t-471\tATTCCACAACCTCAACAGTGCTACCTTAACCCATGCCTGAAAATAAATTCACAGGTGTTTGTGATAGGCAGCCTCTAAGATGGCTCTCAAT\t######BA?C@;<A<=?A>?A>:@>@5D-DDBD5C=FFEEEEE5EEDDDC==BE?CEEEEE?:DEDGAGGEGGBGGGGGGGGGGGFGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_699/2\t129\tchr6\t11486765\t25\t91M\t*\t0\t0\tGGTTGGGTAGTGGGGTAACAGGTGGATGTGTATTTATGTTTTTATTCCTATGATTATTTAGAATATATGGATAACTTTTATATCTTTTTTT\tAC?C5;@>=@077=7.<<99670786778@@C?AC@=C>6CA<<<885B=CAAAC:DD>DEEEBEEBDEA=DD5?################\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:73C3A5A0C6\nfoo_700/1\t99\tchr4\t110701520\t60\t91M\t=\t110701895\t466\tAAATTTCCATCCTGAAATCTGCGAGCACCTCCCTTTCCCAGAAGGAAGAAAAACTCGCGTTTTGTTCAGACACCCGAGAGACGGGTGAATC\tGGGGGFGFGGGGGGG?GGGGGFGGGGGGGFGGGGGGGGGGGAGGGDGGFEGGGGDFGBGDGGFGEGGEABEEEBEEEEEECEEEE######\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_700/2\t147\tchr4\t110701895\t60\t91M\t=\t110701520\t-466\tCAGATTGTTAAAGGCTTAAAAGCCAAAAAGTAAGCAGTGTGATTCTCTTGTCACTTTCACTTCCTGTTTATTTTGTTCTTTTTTTTCCTTC\tEFFDDFFAFFFEE:EFFFFFBB=GGGFGGEGGGGFGFFFFAFFAFFBEEEAEAEDEEEAEBFGFGGCGFGGGAGGGFGFFGEEGGDEFFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_702/1\t83\tchr6\t117747935\t60\t91M\t=\t117747541\t-485\tTGTACAAGTTTGTGTTGGTATTGGTATTTGTGTTTGAGAGAGAAAGAAATATCTCATGGCTTTGCAGTATTGTGTGTTTTATGTGACACGG\tFEEEFFEFFFFEEEEEEBEFEEEBEE:AEFFFFBEFFFFEFEEFFEFFFFBFFFEFFFFDFDFFDFFFFDFFFFFFFFFFFEEDEBCCCAC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_702/2\t163\tchr6\t117747541\t60\t91M\t=\t117747935\t485\tACAATGTTCCTATGTAGGAAATGATTTTTTTTAAATGTGATTTCTGTAGCTATGGATAGGCTTCGACATTTAACCAGTTCTCTTTTCAACA\tGEEGGFFFGDGEFD?FEFEFGA:GFEDGGGE>>FDFFDFEDEEEE?EE=EDFEFDFGGAF###############################\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:70C7C12\nfoo_704/1\t81\tchr17\t12277024\t37\t91M\t*\t0\t0\tTAGATTTTCACGTGCAATTTTATAACTTTTTCATGCAAACTGTATGTTGGATTTGGCCAAGGGACTTCCAGTTTGTGATTTCTGGTTTCGA\tDC>@<4<>3-1:B>6;37385,74+///32/:><>349=:*?ACAC?A?A<A8:97:=5@@=98=:<==>=;89;86266-5BB;=5B==?\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:43G47\nfoo_705/1\t65\tchr15\t45104478\t37\t91M\t*\t0\t0\tAGAAATGGTACTGGCATAAAAACAGACACATTGGCCAATCAAACAGGATAATGAGACCAGAAATGAACCCAGGCATTTATGGTCCATTGAT\tA:)85@55-@7BB=9BA?C>4A.A###################################################################\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:55C28A6\nfoo_706/1\t83\tchr14\t74316437\t60\t91M\t=\t74316050\t-478\tTTTAGTAGAGACGGGGTTTCACCATGTTGACCAGGCTGGTCTTGGACTCCTGACTTCAAGTGATCCACCCGCCTTGGCCTCCCAAAGTTCT\tED:BBAFFFAEFFFEFB@5=:CEBBEE:BBEABB5BBB?BEDAFEBFDFFEGDGFGEEBE?AA4?A?EEEEGFGGFFDGGGEGFGGGGGGD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_706/2\t163\tchr14\t74316050\t60\t91M\t=\t74316437\t478\tAGAAACGGATTTTTCATAGAGTGAAAAGGTCTATAACTTAAGGTTCACTTTTTTTTTTTTTTTTTCCAGCCAGGGTCCCCTTCTGTTGCCC\tGFGGGGGFGDDGGGGFGDGFE?FBEAACC=FFFFFGGGDBEEEEEFFFFDGGFGGGGGD>EECE###########################\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:69A7T1A0C10\nfoo_708/1\t83\tchr17\t7727714\t60\t91M\t=\t7727353\t-452\tCCCGCTTCTCATTTAAAAGATATGTACACAATTAAATGCGTTGTGACTTCAATGAGGTATGTAGAAACAGAGTTATATATAAACATATATT\tFGGAGGEGGDGFGGGGFFEGGEEGFFGFGGFEDGGGGGGGGFGFEEGGGGGGFGGGGGFGGGGGGGDGGGGGGGGGGGGGGGGGGGGEGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_708/2\t163\tchr17\t7727353\t60\t91M\t=\t7727714\t452\tGAGATCATGGAGGGAAGGCAAATATACTGAAACGGAAGAAAAGAGAACATAATTTCACGATCTTCAGATTTGACTCTTTCCTCTTTATCCT\tGGGGGGGGGGGGGGFGGGGFGGGGGGFGGGGFGFGGDFFGGEFFGGGFGGFGGEFGGGGGGGGGGGGGGGGDEEBGEFGEGFGEGGEBDFE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_710/1\t99\tchr4\t90623029\t60\t91M\t=\t90623427\t489\tCAAAACTTGATATAGTGGAAAAATGGTTAGTTATTGCTAGAATTAGGATGAAAATTAAATTCTAGGCAGAATCTCAAGTAGCATGTATATT\tGGGGGGGGGGGGGGGDGGBGFFFFFF@FEF>CA@CEEECC:BBD??A>;>@B@B?EDFFDEDDDCE@CD?C?BAA>AA=?@@:@>>@@>>A\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_710/2\t147\tchr4\t90623427\t60\t91M\t=\t90623029\t-489\tTTCAAGGACTGAATATGATAAAATATATTTGGGGAGGAAATGTAAAGTATTTTAAAATTGACAGTAAGTATAAGAAGTAATTTCTGAGGGC\tGFFEFFGFGGGGGGGGGGGGGGDBGGEGGGGGGGEGGGGGGGFGFGGGGFGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_712/1\t99\tchr14\t53456345\t60\t91M\t=\t53456729\t475\tAGGGGTAAAAAATTATTTTCCATCAAATAATAATCATTCTCCTAAAATCTTAGTCTCGCATTCCTTGAAATTAAATTTTTTCCTAAATTTT\tGGGGGGGGGGGGGGGGGFGGEGGGGGGDGGGGGGGGGGGGGGGGDGF?EGGGGEGFGFGDFBFFFFFE:GFFFEEGEGGFGGGGGGGGFGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_712/2\t147\tchr14\t53456729\t60\t91M\t=\t53456345\t-475\tTTGATCTAACACTCACGTAAAATCTGTTTACTAAGCTCATCCGCAATTAAATAAATACAAATAAAAACAACAGAGAGATACTTTTGCCATC\tGEEEAEAECEAFEFEDGFGGGGEGFGGGGGGGGGGGDGGEFGGGGGGGGGFDGGGGGGGGECGGGGFGGGFGGGGGGGGAGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_714/1\t83\tchr2\t49827980\t60\t91M\t=\t49827618\t-453\tAACACAACTTAGTTTCCTTATTTGCAATCTGGGTAGACAGATTGAAGTCTTTTACTGATTTTTTAAAGATGAATTGTTTGATTTCTTACAA\tEFFFFGEEE=DGDFDGGGGGFGGGFGFGFDGEGGGGGGGGFGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_714/2\t163\tchr2\t49827618\t60\t91M\t=\t49827980\t453\tTATGAAATCCAGAATATAGTTTGTTCAATTTTTATGGAAAGTGACATTTAACAAAGAGTAAAGCAAGAATATTTTATTCCCTGATTTTTGT\tGGGGGGGGGGGGGGGFGGGEGGGGGGGGGGGGGGGGGGGGGBGFGGGFGGGGGGGGGGEGGGGFGEGGFFGGGGGDGFGGGDGGGFGGFG@\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_716/1\t99\tchr4\t22642402\t60\t91M\t=\t22642765\t454\tTGCACATCTGTTTTTGTTGATTGCATGGAATCTGCTGAAATATTCTTGAAAGTACAACTAGAAATAGGTGTAAAAACCACCTTTCCATTGA\tGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGFGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGFDGFFFFFGGGGGEGGGGGGFGFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_716/2\t147\tchr4\t22642765\t60\t91M\t=\t22642402\t-454\tAAAGAGAGACTTGAGGAAGAGAGACAAGAAAGCTCTTGGAATGATTGAGAAGAAAATTAAAACGCCCAAAGTAAAGTACAATCTTATGTTA\tEGGGGGGGGGAGEGGGGGGGGGGGGF?GGGGGGFFGGGGGGGFGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGFGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_718/1\t83\tchr17\t64898632\t60\t91M\t=\t64898252\t-471\tTCATGAAGGCTTAGACCGGCCCCTGCCTCTGCCTCTTCCTCCTTCTAGTTTATGCTCAGGTACACGAGAGCATTTACGGTCATTACATAAC\t###################?(BABBBEADBEEGEFEDEGEGGEEGGGGGFGDCDCD>BBB7BDCDDGFGFGFGFGGGGGGGGGEGDGGGGG\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:15T4A70\nfoo_718/2\t163\tchr17\t64898252\t60\t91M\t=\t64898632\t471\tGGAATCTGAGATTGGCATGCCAGCTAACTATTAGCTATAAAGCAAATTATTTATTTTATGTATTTAATTTTTTGAGACAGAGTTGCACTCT\tGGGGEGGGGEGGGGGGFGGGGEGGGGGGGGGGGGGFGGGGGGGGGGGGGEGGGGGGGGGBGGBGGGGGGGGAGGD?GGGGBDBEFGEBEGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:59T31\nfoo_720/1\t99\tchr5\t31485788\t60\t91M\t=\t31486161\t464\tATATCCAGAATCTACAAGGAACTAAAACAAATCAGCAAGAAAAAATAATAACAATAATAATCCCATTAAAAAGTGGGCAAATGACATGAAC\tGGGGGGG-FGGGGGGGGGGGGGGGGGGGGGGGDGFGGGGGGGG<FGGGGFGGGGGGEGGGGFFGGGGGGFEEGCDFFFFFGFFBGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_720/2\t147\tchr5\t31486161\t60\t91M\t=\t31485788\t-464\tTCTAACACTTGATTCGGTCCTCCCACTACTGTGTATCTACCCAAAGAAAAAGAAGTCATTATATCAAAAAGACACCTGCACTTCTATGCTT\t############C:AC=3EE?GGGEFGFGGGFFFFEGGGGGFGGGGGFFGGGGGGGGGGGGGGGGGGGFGGEFEEEEGGEGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_722/1\t99\tchr8\t129383155\t60\t91M\t=\t129383538\t474\tAGAGCCTCAAATCAATAATTTAATAATAATTTAATTTAGAGCATGTTAAGTTTAAAAAGGTTATCAGATATTCAAGTGAAGATGTCAATCC\tFFFEFGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGEGAGGFGDGFGEGGGEEE:G?DF?FFCDCCAEEEEEGEGFEFGGGEFAEFBD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_722/2\t147\tchr8\t129383538\t60\t91M\t=\t129383155\t-474\tGTTTTTCATGAGGACTCAAAAACTACTACAACCCACATTCTGACAAAGCTTCTTTGTATTCCCCTGAGCTTTTGCACAGACATCATCTGTG\tFDEEEDGGGFFGGAGEFFFDFCEEEEGGGFEFFFFFGGBGEGGFGGGGEEGGGGGGGDGGEGGDFGGFDEGGFGGGGGGGGGGGGEGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_724/1\t83\tchr10\t8504828\t60\t91M\t=\t8504442\t-477\tCAGGAAAGGGAGAAACAATTGAACAACTCCCACATTTTTAAGTTGTCCTTTTGTCATATTGGTTAATGAAAAATATGTCAACTGTGGGAAG\tGGGEGGGGDGFGGGGGFFEEGGDGFCFDFFFDFFFDAEGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGFGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_724/2\t163\tchr10\t8504442\t60\t91M\t=\t8504828\t477\tTTGCTGCACCCATCAACCTGTCATCTACATTAGGTACTTTTTTTTTAGATGGAGTCCACCCGGGCTGGAGTGCAGTGGTGCAACCTTGTCT\tGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGFGGEGGGGGGGGGGGFGFFGGDGGGGGGEGFFFGFAFEE:BDECDFDAEEBDDE:EECEB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_726/1\t99\tchr22\t28840561\t60\t91M\t=\t28840941\t471\tGTTTCTTGAGTTATATGCCTGGAAGTATATCCTTTCCTAAACAAAATTGCACAAACAATAACAATGTTTATATATGCCTGAAGCAAAAATA\tGGGGGGGGGGEGGGGGGGGGGGGFGEGFGGGGGGGGGGFGGGGGGFGGGEGGAGGGGGGFFGEGGGFGGFGEGGGGGGFFGFGFFGG@GGD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_726/2\t147\tchr22\t28840941\t60\t91M\t=\t28840561\t-471\tTGAGTAGCAAGTATTAGTAAAACATGTAAGCCAAAATGTAAATAAGATTTACACAGCTTAATGCATATAAGACCATATAGAATTATTATAT\t?FGEEGGEGGGGGGGFGGGFFGGGGGGFGGFGGGGGGGGFGGGGGGGGGGGGGGGGGFGGGGGGGGGGGEGEGGGFGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_728/1\t99\tchr10\t74023626\t60\t91M\t=\t74024041\t506\tAAATTAAAGATAGGAATAAGTAGAAACATACCCCAAGCTCGTGGATTGGAAGACTTAATACTGTTAAGATACAATACTAGTCAAAGTGATC\tGGGGGGGGGGGGGGDGGEBDEEDDFFFFFFGGGGGEGGGEGEGGEGEFGDEGBDBE?EDDEDFBGBEEABBEEFBDB?CB:=8=B79<>:>\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_728/2\t147\tchr10\t74024041\t60\t91M\t=\t74023626\t-506\tACTGAAGTATCAACAAGAAGATCCACATAAAAAGAAAGCACTATAAATGGTGCTGGACAAACTGGGTATCTAGATGCAAAAGAGAAAGTTG\tCC?BEA=AEE:FEFFEGGG:FGFFEGGGFFGGGGGDFGGGGGGDGGGGGGEFFEEFEAFFEGEGGGGGGFFFGGGGGGGGFGGFFGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_730/1\t99\tchr17\t28952007\t60\t91M\t=\t28952369\t453\tAGAAGACATTGGAAACTGGAATAAAGGCCATCCCTTTTATATAGTTGCAAATAACTTGATGACACTGAGGACAATGGACTTTGTGGAAGGC\tGGGGGFGGGGGGGGGGGFGGGGGGGGGFGFGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGEFGDGGBEGGGGGGGGEEFDDFF?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_730/2\t147\tchr17\t28952369\t60\t91M\t=\t28952007\t-453\tAAGGGAGGCAGGGGCTATTTACCAAAAGAATAGGAGAAAGACCCTGAAGGCATTTCAGAGGTCTTCCAGGATGTCCTTTCCATCTAGGGCC\t=DGGEFGGEGGGGGGGGGGGBGEGGGGFGGGDGGGGGFGGEFFFFFGGGGGGGGGGGGDGGGFGGGGGGGGGGGGGFGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_732/1\t99\tchr13\t90914732\t60\t91M\t=\t90915098\t457\tTCAACAGGTGAATGGATAAACAAATTGTGTTATGTCCATATCGTGGAATATGACTCAGCAATAAAAAGGAATGAACTATTGATACATGCAA\tGGGGGGGGEGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGFGGFEFGEGGEGGGGFGGGGEGGDGFEGEGFGFGEEDGGGGGBEGGGGFGEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_732/2\t147\tchr13\t90915098\t60\t91M\t=\t90914732\t-457\tAATCTGAGTAAGCTCTGTAGATTCTACCAATGTCAGTTTCCTGGTTTTGATACTGTACTATAGTTATACAAGATGCTTCCCCTGGGGGAGG\tGE?EACF?GGG=GGGGGGGGEGFGGGGGGGGGGGGGFGGGGEGGGFGGGGGEEEEEF-FFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_734/1\t99\tchr11\t41351080\t60\t91M\t=\t41351473\t484\tATAGAACTTACTCTATTTCTTATTTAAAAATCTAGAGAAAGAATACTTTTCCATTTTTGTACAGCTATGACTATTCTAACACATCCATTTA\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGEDGEGDFFGGGEGFGFGEGGGGGGGGGGGEGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_734/2\t147\tchr11\t41351473\t60\t91M\t=\t41351080\t-484\tTATTTCTTAAGATATACGTATACATACTTACACATCTAAATGTAGTACACTGTAAAACTGTCAGTTAAATTTTTAAACTATTAAGAAACAG\tGGGGGGGGGGGGGGGGGGFDGGGGGGGFGFGEGGGGGGGGFGGGGGFGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:49T41\nfoo_736/1\t83\tchrX\t85633676\t60\t91M\t=\t85633309\t-458\tACTTATCCTCTTAACAATTTTTGATTATATAAGTTTGACTATTTTAGATAGCCCATGTATGTGGAATCCTGCAGTATTTGTCTCTGTGACT\tCFE5FBGGGGGFGEEEEBE@EEEEFEGEFEGE?BFCF@GFFFFEFFGEGG?FGEGFGGGDGAGGEDGGEFGFGGGGGGEGGFDGGGGFGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_736/2\t163\tchrX\t85633309\t60\t91M\t=\t85633676\t458\tAACAAAAACATTTTGCACGTGTTGTCTTTATAAGAGTTAAAAGGATGTTCTCAATGTTTAAGAAAGAGAAAACAAAATGACAGTCGGGATG\tGGGDGGGGEGGGGGGGGGGGGGGGGFGGGGDGGFGGGGGFGGGGFGGGGGGFGGDGFFFGGGGGGGGEGGGGGGFGGGFGGDGBGFGFGEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_738/1\t99\tchr20\t59294412\t60\t91M\t=\t59294807\t486\tTCCCTCTCTGTGAGTGCCCTGGTGCTGGCTTCTTTTCCTCCCCCCTTCCTATGAATGTGCCCCAGATTCCATGTGTTAAGTATTGCGCTCT\tGGGGGGGGGGGGGGGGGGGFGGGGGFGGFGGAEGGC9EAEFFEEEAECEECEBCECFDABCEEEA=@C?C:AC@CBD:DC?@BD?6;@;>:\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:39T51\nfoo_738/2\t147\tchr20\t59294807\t60\t91M\t=\t59294412\t-486\tGAGGACCCACTTGCTTTGCGAAGCTCACTTGTCCAGCTGTTTCACACATGTGACCCCATGGAACCTGCCAGACCCAGCCACCGCAGCTTCT\tA?@?=AA@7AB5C?BC?C@?@A5AA?5BDEC=ECCBAEBEECC??@DCCB@E:EEE@@@D?E?EEEFFDDEEGGGFGGGFGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_740/1\t83\tchr4\t13012572\t60\t91M\t=\t13012210\t-453\tGGAAACTTAGATCACTGATTTTGAAAACTTAGTTTTTCAAGTATAAGCATTTAATAATGCTAGACATATTTCTCTAAACACTGCATTGGCA\tECEE@ECFFFDEEEDEGBGFGBGGGGCFFFEFFEFED=GEFFGAGGGGDGGGGGFGGDFGGGFGGGFGGFGGEGFGGGGFGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:9C81\nfoo_740/2\t163\tchr4\t13012210\t60\t91M\t=\t13012572\t453\tCACATTCTGCACATGTACCCCAGAACTTAAAGTAAAATAATTTTAAAAAAAATTGTTAAGGAATTTGTTCGTTTAATCTAAATTGTCAATT\tGGGGGGGGGGEGGGGGFGGGFGGGGFGGDGFGEGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGAGDGGEGGGGGGGGGGEBDFGEFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_742/1\t99\tchr15\t83690923\t57\t91M\t=\t83691325\t493\tGAATTGCCAGGAAGTTCAGATTTACATAAAAAAGTCATAAGAGGAATGGTCCCCACCTTATAGTGGATGAAAGTTTCAGCTCTTCTTCCAA\tGFGGFEEGGGGGGBFFGGDGGFDGGGGEGGGGGGEGGGGFGCGGGGGGGGGEDGEFCGDEFAFCFDEEE?B:B:BC>?A-?>?B?>C=@>A\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:20\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_742/2\t147\tchr15\t83691325\t57\t91M\t=\t83690923\t-493\tAGCAGGCTATTCATAAGCATTTTAAATAACCTCTTTTGCATGTTTTATTTCCCCCAATTAGGTCAAACCTTCTCAAGGACAAGGATTATGT\tEE?=FE?GGFG?GFGGGEFGEGEFGFGGEFFEGDGEFFF;FGFGGGGGGEGGFFDGGFGGGGGGFEFDGDGDFGGEGGFGGGFGG?FEGGG\tXT:A:U\tNM:i:0\tSM:i:20\tAM:i:20\tX0:i:1\tX1:i:2\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\tXA:Z:chr15,+25952367,91M,1;chr15,-24828760,91M,1;\nfoo_744/1\t83\tchr17\t25425956\t60\t91M\t=\t25425569\t-478\tGATCTTATATTTCCCCTGCTTTCCCCCTGGAATTATTCATCCCCCCAAGCCAGCATCTGGGTTCTACATGTGCTCATTACTCATGAGTCTC\t#########?=>+A@?:56981>(>>:.<8<,954902)&2&?B<;GDGGGDGGGGGGGGGGGGGGGGGGGFGGGGGGGGEGGGGGGFGGG\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:12T10A17T49\nfoo_744/2\t163\tchr17\t25425569\t60\t91M\t=\t25425956\t478\tGGCCTGGTTGACTCAGCATAATTTTTCTTGAGAGTCTTAATATGTTGCTACATGTATCAGCAATTTGCTTCTTTTTATTGCTGAGTAGTAT\tGGGGGGGGGGGGGGGGGGGGGGGGGGGDGFGGFGEGGGGGFDGGGGGGGGGGGFGGGGGGGFGGGGGBFGGGGGGG@GGDGG?EFBFFDFB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_746/1\t83\tchrX\t88341525\t60\t91M\t=\t88341149\t-467\tCTGCAAAATTTGCCATTTTCACTAGCAACATATGAGCATTCCAATGTCACCATATCTTCAAAAACAAATTATAATTTTTTTAATTAAACTC\tFEEEFEECEEEEEACEEEBE?DEEEEECFDFDEGEGDEEBFGGGGGGGEFGFGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_746/2\t163\tchrX\t88341149\t60\t91M\t=\t88341525\t467\tGCCTATTCCAGTCATTTCATGTAAATTGCTAAATGGACTAGATAGCCTTTTGTGTCTAGATTATTTTACTTCTTATATTTTTATGTTTATT\tGGGGGGGGGGGGGGGGGGFGGGGGFGGGGGFGGFGGEGGGGFGGGFGGGFGGGGGFEF=AADBDDEC3EDDADDADC:DDEB8@8CCC??E\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_748/1\t83\tchr17\t52049237\t60\t91M\t=\t52048859\t-469\tTAAAGCCCAGACTTAGCCCTCTCCAACTTTCCAACTTCAGGGGGATATACTGTCACCAAGAAGCAAACATTTGGGGGAAAAAAAATCCTCA\tFEEA@CCDCF=EEFEFEDEEEAAC@=EFEEFFDEGEDEEFCEFFEGECE:GGGGEGGEGGGGFGGGGGGGFGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:48G42\nfoo_748/2\t163\tchr17\t52048859\t60\t91M\t=\t52049237\t469\tACAGATGAAAAAACCTGAGACCTAGAACAGGAAGGAATCCATCCAAGTTCACAAAAGGGCTTGAGAAACAAGAGTCCCAACTTGGCTTTCC\tGGDGDGGGFGGGGGGGGGFEGGGGEEGDGGGFGFDGDFGGGFGGGGGGGGGGDGGFEEGFDGGFFFFGGBFEEFABEFFEDFE5F=DBFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_750/1\t99\tchr18\t23435820\t60\t91M\t=\t23436230\t501\tTAACATATATAAGTGCGTGTGTGTGAGAGTGTGTGTGTGTGTGTGTGTGTGTATTTTTCAACCTCAGGAAATTCACAACATGCTGCTTTTT\tGGGGGGFGGGFFGEGFFFFFEEEEEBDADAEBEBEEEEECFEFDFDFEFDFBDFFGEDBBEEFEDBEBDCCDDEEGFFDDEEE?EDE?GEE\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:44A46\nfoo_750/2\t147\tchr18\t23436230\t60\t91M\t=\t23435820\t-501\tATGATGGGATCCATGCCCTTCATGGCGCAATCCAGCCACTACCATCCCATGGCCAGGCAGAGGAACTAAAGAATAAGTACCAGAACTTCTC\t################################DCDDECEECGGEFGGGGDFGEEGGGEEGGGGGFGGGGGGGGEGGGGEGGGGGGGGFGGG\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:15A7T7A59\nfoo_752/1\t83\tchr8\t128794752\t60\t91M\t=\t128794382\t-461\tCTACTGTGTATTCCCCCACTCTTTGCTATATTATCCTCTGCCACTCAGTGCCCACTTAATAATAGGCTCAATTCCAAAACACTGCATAGTT\t7?::?A<A?>A+>CCC<.959BCBCB??AAD?E:EECEEEEGEGFGGGGEGGGEGGGGGGGGFGGFGEGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_752/2\t163\tchr8\t128794382\t60\t91M\t=\t128794752\t461\tCCTTGCCCAGCCTAGATTTGCCTGTAGGAAAAGAAAAACACTGTGGTGCAACTCTTGCCCAAGAAGTTTAAGTTTCTAGCTCAGTGAGACA\tGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGFGGGFGGGGGGGGGFGGGFGGGGGGGGGGGGGGGFGGGGGEGGGGGGFGGBEDDEFDFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_754/2\t129\tchr9\t125074118\t37\t91M\t*\t0\t0\tGAGGCCAAGGTGGGCAGATCACGAGGTCAGGAGATCGAGACCATCCTGACCACCATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATT\tAFDEAEDED5EEEEEBEEEBEEDE=FFFFFFEFE=AE?E=DD:CCCEC=E=?DDDEEEA=:BAAA:AAAAADDDADEBEEGGFEFE?B>@E\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_755/1\t83\tchr6\t163144161\t60\t91M\t=\t163143791\t-461\tACAGTAGGGATGTGGTTTGTTAGGAAAGGTTTGTGCTGAGTCTTGAAGGTGATTAATGGTTATTCAGGAGAAAAGGAAGGAGAAAAGAAAA\tEGGGGGFCBGGGGGGGDEEFGGEDFGFGGGEEGFGGGGEGGBCGGGGFGGFGGGGGGFGGGEFGGGGGGGGGGGGCGGGGGFGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_755/2\t163\tchr6\t163143791\t60\t91M\t=\t163144161\t461\tCTTTATTATTAAAAAACAAAAACAAAAAAAATGGTCTCTCTCACTTGTTGAATTTTATGGTATCATCCCTTTTCCATTTTCTTGCTACTAT\tFFGGGGGFGGGGGFGGGGGEGGGGGGGGGGGGGGBGFGGGDGGGEGGGGDGFGDGGGGFGDGGFGGGDAGGGGGGBFGGGGFD>GGGF?GD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_757/1\t83\tchr6\t163144161\t60\t91M\t=\t163143791\t-461\tACAGTAGGGATGTGGTTTGTTAGGAAAGGTTTGTGCTGAGTCTTGAAGGTGATTAATGGTTATTCAGGAGAAAAGGAAGGAGAAAAGAAAA\tBGG?DGGGFGBGGEGGFGGFGGGEFCFGEFGFGBGGGFGEEEFGBGFFGEFGGGFDGDGFGDFF?FFFGDGGGGGFGGDFGFGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_757/2\t163\tchr6\t163143791\t60\t91M\t=\t163144161\t461\tCTTTATTATTAAAAAACAAAAACAAAAAAAATGGTCTCTCTCACTTGTTGAATTTTATGGTATCATCCCTTTTCCATTTTCTTGCTACTAT\tGGGGGGFGGGGFGGFGGGGGFFGGFGGGDGGFGGEFFFGGGGDGFEEFDEG:EGGGGGGFEDFFFFGFGGGGGBDGGDBGGGGEG=:@B:C\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_759/1\t99\tchr9\t38084962\t60\t91M\t=\t38085323\t452\tGAAGTGCTGGGATTTCAGGTGTGGGACACTGCACCTGGCCCCTTTATCTACTTTCTAATTCCTCCATAATATGGGCTTCCAGGGCCCTGGT\tCCC?:DDDDDFFFDBEDEE:DCDBD;EEEEEEDEDEFCFFFFBAEDAFFEFFFFFFB?FFDFFEFEFDDFCAAACFFFDFAFBFF=DDCCA\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_759/2\t147\tchr9\t38085323\t60\t91M\t=\t38084962\t-452\tAATGGCTTGGTGCTGTCCTCTTGGGAATGAGTAAATTCTCACTCTGTTCGTTCTTGCGAGATCTGGTTGTTTGAAAGAACATGGTGCCCTC\tFBBDGAFAFFDECEE5EEEEE=GGDFGDGGGFEFBFFFFEFEAEEEBDFF=GFGEGDEDE=GEGDGGGAGEFGGGGDEEGGGDGEGGGG=?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_761/1\t99\tchr17\t29053884\t60\t91M\t=\t29054252\t459\tTCAATCAATGAGGTTAGCTATATAAGTGTCATATGTTGCCTCCCCTGGAAGTGAATCTAGGAGATTTAATGATTCATTCACTCAACAAATA\tGGGGGGGFGGGGGGGFGGGGGGGGGGEGGGFGGGGGGGGGGFGGGEEGEGGEGFGFGFGGFEGEEAFEBFGEGBEDBEEEGCGGGGBEGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_761/2\t147\tchr17\t29054252\t60\t91M\t=\t29053884\t-459\tGGGATAGCAAGTGCAAAGTTCCTTTGGTTTGTTTGAAGAATGGAAAGAACCTTAGGATACATGGACGCCTAGAACAGAGTGAGCAAGGGGG\tGDEAGGEDGGGGGFGEGEFFBGGGFGEGGEGGGGGGGGGGGGGGGGFFFFFG5GGGGGGGGFGGFGGEGFGFFEFFGGG=GGGGGGGGFGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:52C38\nfoo_763/1\t83\tchr17\t63954616\t60\t91M\t=\t63954230\t-477\tGGCTGCAGATAAGACTAAACGCAGAGGGAAACAGAGAGAAAACACTGGGAGGGAACGAGGATGCACTCTTAGTTCGTGTGAGAGCATGCTT\tGEEEGFGGGGGGGFGFEGFFDGGGGEEGFGEGGGGGFGGFGEGEGEGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_763/2\t163\tchr17\t63954230\t60\t91M\t=\t63954616\t477\tTACCTAAGGCTCTGTCGTAGGTCTGAGACTGGGGACTGAGGGTTAAAAAAATTCTAACAGGTTCTCAAAATTGATGATAAATATGAGTGTT\tGGGGGGGGGGGGGGGGGGGGGEGGGFFFFFGGGGFGGDFFGGGGGGGGGFGGGGGGGGGGGDGEDGGGGFGGGGGF=GGGGFGFGDECE@A\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_765/1\t83\tchr4\t77472544\t60\t91M\t=\t77472185\t-450\tATTACAAGGCCACATATAAAATATTTGGACAATTAATTTGATTCAGAATCTCAGATTCTTCTTACCTATGGCACATGTCTGATCTCAGGCT\tGGGEEEFGGGGFFGGGGEGGFEFAGEGGEGGGGGGGFGGGGGGGGGGGAGGGGGGEGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_765/2\t163\tchr4\t77472185\t60\t91M\t=\t77472544\t450\tGAATTCAGCTATTCAATTATTGATGAAAATAGGATGGTTTTCAGTAAGGTGCTTATAGTCCTAAATTGAAGTTTTTACAGTTTCCTTCTGA\tGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGEGDGGGGFGGGEGGEGGGGCFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_767/1\t83\tchr4\t63113300\t60\t91M\t=\t63112917\t-474\tAAAATGGTGAAACCATCTGTACTTTGTGCACGCTGATGACCTTGAATTACTTCTAGCAGTGTCATTACCCAATTAGTTGAGAGAGGAAAGA\tBGGFFGGGGEGEGFGGGGEGEFGGGGGFGDGGGGGGGGEGGFGGGGGGDFGGGGGGGGGGGGGEGGEGGGGGGEGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:49A41\nfoo_767/2\t163\tchr4\t63112917\t60\t91M\t=\t63113300\t474\tGCATACTCCAGTAGATTGTACACCAGGAGGCAGATCAGCAATACAATGTATTTCAGGTTGAAATTCTCACTGGGACCATAATTGAAACTAA\tGGGGGGGGGGGGGGGGGGGGGFGGGGGFGFFDDGGGFGFGFGGGGGGGGFGGGGGGGGGGGEGGGGGGGGGFGGGGGGAGFGFFFGEGEGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_769/1\t99\tchr4\t85534371\t60\t91M\t=\t85534729\t449\tTCATATGGTATACAAAGTAGGATTTATCAAATAAATATGGCTTTTTCTGTGTGGAGGTGTTTGTTTTGTTTTGTCTGTTGGGTTTTAAGGG\tEEEEEEE?EEEEEEEEE=EEEAEEEEEEEEEEEECAEEEEEEEEEE:AA=B?B5BBE?B:>A?><@A;B@>93,7755:16@7>@DAB?AC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_769/2\t147\tchr4\t85534729\t60\t91M\t=\t85534371\t-449\tTGACACCGCATCCCTGGTAGCTTAGTCATGCTGGATATTTCCAGTGAGGCTAAAGAAAACCACTGCATCTTAGAATAGCCTGTCAGAGAGA\t@EAAACFFFFBBFF:FGG5GGFF?EBGFGBGGGGGGGGBGEEAEEEEEDBCFFGGGGGFFFGGGGGGFGGFEFFGFGGG?GGGGGFGGFFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_771/2\t145\tchr8\t28899072\t37\t91M\t*\t0\t0\tAGATCACTGCCTTTTTAACTTATCTTTAAAATGTAGGATAAAACCACTGCATATTCTATTTAATTATAGCAATTTCTTTACAAAGGTAAAG\tFG:GGGGEFGFGDEBGGFGGGGGGFGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_772/1\t83\tchr3\t153472567\t60\t91M\t=\t153472200\t-458\tATATAACAAATGTGGCTAACATCGTATTTATTTTGGACAGTGTTGTTCTAGGATAAAGCAGGAAAACAATATTCTGGGAGAAAGCTTAAAG\tGEEE;EDFDGGFFFBFEGEBEGGDGGAA?EB?EFFFDDCGGEGGGGFEF:FGGGGFGBEGGCFGGDFEFBFEDEGGFDFFFGGEBGGFGEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_772/2\t163\tchr3\t153472200\t60\t91M\t=\t153472567\t458\tATCAACCATATGGATCATAGAGAATGTGACATAGAAAGATTTTAAAGTGTATGGTTTTTTTAGGCCAAAAAGCTTCATATGTTTGGAAATC\tGGG?GGFGFGFDFBFFFFFFBE?EEGFFGEGGBGGGGGGGGFGGGDFEEDEBDEEGGEFBG<GGGGGFGGFDGDGD:DEFBEEEECAADEB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_774/1\t65\tchr6\t164315670\t25\t91M\t*\t0\t0\tCAATTTTAGGAACCTATTTTACAGAAATAAAATATCAGACTCTAAGGATATAAGAGTATTCCTTGAAGTACTATTCATACTTACTATTATC\tA:DDDDDD:D,<=:;CC?C??BDDAC;=C>4A8B92A,3;7(/>76*68(@@<C@A?AD,CAAA@8**4:A=?5ACDDDBDDBDD-,>@@C\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:46T10C7C20A4\nfoo_775/2\t129\tchr6\t124858504\t37\t91M\t*\t0\t0\tCATTATACTAATTTTCTTCAACCAAAGAAAATTCCAAAGGATTCTAAGAGATGACATTTCAGAGTAATTGTCACATTCTACCCAGCTTGGA\t:A??:CCCA=?AACA?>=:?DDDB=AAAA:@>DDDEFADD>6?@?;;5===CCCA@@;@?BD?DDEE5?5DD?DD=B?EEEAE:E6>@6=B\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_776/1\t83\tchr6\t124858894\t60\t91M\t=\t124858504\t-481\tTTTGGCATTTCCTTTTTTCCAATGTTCTCTGAGCATGCTATCTCAATTTATTATAAAGCTACACATACAGCCCAGGAATTTCTTTAGCAGC\tGGFGDGGFEGGGGEGGEEFGGGGGGGFFGGGGGGGFFGGGGGGGGGGGGGFFGGGGFGGGFGFGGGGGGFGGGGGGGGGGGGFGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_776/2\t163\tchr6\t124858504\t60\t91M\t=\t124858894\t481\tCATTATACTAATTTTCTTCAACCAAAGAAAATTCCAAAGGATTCTAAGAGATGACATTTCAGAGTAATTGTCACATTCTACCCAGCTTGGA\tGGGGGGGGGGDGGGGGGGGGFGGBEGGGGGGGGGGGFGGGEGGGGFFFEFGEGGGEGGGGEGGGEGGBFFGFGGGGGGGGEDGDGEGEBBE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_778/1\t99\tchr5\t157868546\t60\t91M\t=\t157868917\t462\tATTTAAACTCCTTAAAATGTGCCCAGTGCACTTTAAGGAGAGTGTTGTGATGTACCAGCACATTTGAAATTCAGGCATTTGTGAAAACTTG\tFFFFFFFFFFFFFFFFFFFBFFFFBEBDAEFFFFFFFFBFEDEEEEFFBDFFEFFFFFFDFAFFFFDEFFEFEADFFFEFFFFFFF=EFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_778/2\t147\tchr5\t157868917\t60\t91M\t=\t157868546\t-462\tATAAAGAAAGGCTCTGCCATCGATAGGTTTGCAGTCTAACAAAAAACATAAACCTCAAATTAAAAATGTATTATTTTATTAAACTTTTGAT\tFEFGCEFEEGGGFGFGGGGFGFFDFFEE-EEBGGGDGGGGGGGGGGFFEFFFDFEFGGGBGGGGDGEFGGGGGGGGGGEFGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_780/1\t83\tchr15\t92374971\t60\t91M\t=\t92374581\t-481\tGTGAGAAATAATATCTCCATTTCACAGAAGAAAACATGAGGCTCAAATCGTTAAGATGCATTTCAGACTCATGCAGTTTATAAGTGGTGGA\tEGEFGDGFFGDFEFDBEFE5DEGEGFEGEFGEFDEGGGEGEGGGEFFGGGGGGGGGGGGGGFFFFFE5EEEGFGGGGFGGGAEGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:67T23\nfoo_780/2\t163\tchr15\t92374581\t60\t91M\t=\t92374971\t481\tTCAGAAGTTTGGAGGTGTTGCTTGAAGCCTTCACTGAAGAATTGCTTCTGCCATCTAGGTTTGTTTTTAAATAACATGTGATTAGACTTGT\tEEBEEEEEFFEEEEE:EDDDEEEDEEFFGGGGEDFEGDGG=FEFEFFFDFDFEDFEDADEGDAGFFGG<GE=BEEADAFFECGAGDFGFCF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_782/1\t99\tchr6\t52691824\t60\t91M\t=\t52692215\t482\tGTGGAAAATAAAGCTTTGTAAAATAATCGGAGATAGCTAGATAAGCAAACAAGTTAGTTTATTTAAACTGTATTGAAAAAAAATTAAGCAA\tFGGEGGGGDGGFGFGBGGEGGGGGGGCGGGFGGGGGGGGG?GGGG:GGDFGFGEGFGEEFFGGGEEFGFDAFFFEEEFFCGEEGFGGGFGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_782/2\t147\tchr6\t52692215\t60\t91M\t=\t52691824\t-482\tGTGAGTTTAAGGGGGATGCTCTTGGAGTATATTTAAATTGTTCTGTTAAAATGTTTCGATTGACATTTACCTTCTCTGTGAGCTGTATCTG\tGFGGEGDEEGEGGGGGFGGGGGGGGDGGGGGGGFGGGEGGGGGGGGGGGDGGGGGGGGFGGGBGGGGGEGGGGGGGGGGGGGGGGGFGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_784/1\t99\tchr10\t27302604\t60\t91M\t=\t27302980\t467\tTGTGAGGCAGAGTCTCTTTCTGTCAACTAGGCTGGAGTGCAGTGGCGCAATTTTGGCTCACTGCAGCCTCTGCCTCCTGGGTTCAAGTGAT\tFEFFFFFBFEFDFDFEDEEAFAFDFFFFFFFFFFFBC@CB?@=CB5C>-AACCBAAEBB:E?:BBEDD:BBB=AB@D4?CC<:>5:13<2>\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:45T45\nfoo_784/2\t147\tchr10\t27302980\t60\t91M\t=\t27302604\t-467\tATTGGTATGAATGTTACATACTCTATTCTACTCTTTTAGTAATTATCCTAAAATGATACCCTGAATACCTGAGTTAATAAGACCTAAATTG\tBFDBFDADDDBCBBFFF?DFFAFDFF?EEBDBDGEGDGGFFGGGGGGD=DGG?GGGEAADEDDEEEEFFEFDGGDDFFFFF=DFFAGGFGD\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:39C51\nfoo_786/1\t83\tchr3\t99767311\t60\t91M\t=\t99766946\t-456\tTGTGTTACAGCCACTTGTCTTACTTTTATTTCATTTTTGTTTTTTTTTTTTTTCTTCTATCTGAAAGAAAGCTAGAGAGTTATTTCCCCTC\t#######CCBCA=?AA==??@=0;?BA:BFDC;9@>CC4?CCGA;AFGGDGGBGDGGEGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGDGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_786/2\t163\tchr3\t99766946\t60\t91M\t=\t99767311\t456\tTGATTGTTTGTTTTAATGTCTTCAGTGTGGCTTTAACTGATTTCCTGAATACCTAATGTCCACATAACACTTAACAGCTTGTAAAACTCTT\tFGGGGGEGGGGGGGGGGFGGGGFFGEECEEFFFFFEDGFEGGFGGGBGGFDFGEDGFGDGGED=EGGDDFGGBFD?CCCD.;:?BEB:=?D\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:27C46A16\nfoo_788/1\t99\tchr5\t158486218\t60\t91M\t=\t158486590\t463\tGTTTGCAGGCTTTTAAAAAATTTTCCTATCAGCACTTTGAATATAGCTTTCTACTGCTTTTCTGGCCTCCATTCTTTCTGATAGGAAATCA\tEEEDEEEDEEFFGGFGGGGFGGGGGGDGFGDGFFFGGBGGEAEEEGDGGDDGGGFFFBGDGGFEGBFEDFFEDFDFFFDGAFEGBGFDEEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_788/2\t147\tchr5\t158486590\t60\t91M\t=\t158486218\t-463\tTATTATTCCACAGGTCTATAGGATTGTCTATTTCTCTTTCATCTTTTTTCTCCCTATTTTTCAGATTCGGTAATTTTCACTGCTCTCTTTT\t?EFEFDEFF=FGGDBEGGFGG?FFDGDGGDGGGFDGEEEE>C>><DEDE?EBFGAGFFFFFABEEEECEDEEGDGGFFFAFDBC?FGDDGD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_790/1\t99\tchr3\t46626715\t60\t91M\t=\t46627098\t474\tATCTAATTTATATAATTAGATGATGCAATCAAAACAAGTTAAACTGTGAGAAAGCCAGAAATCAGTGCCAGTGTCCGCTTAACCAGGCTCT\tGGGGGGGGFGGGGGGGGGGGFGGEGGGGGGGGGGGGGGGGGFGGGGEGFGGGGEEGGGEFGFEEGEGEFEGEFEEGGBGCFFCFFEEEBGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_790/2\t147\tchr3\t46627098\t60\t91M\t=\t46626715\t-474\tGGTTCACGGGACAATCGGGTGGCAGGGCTCACTGGGGTCTTAGTGACAGTATATAATCTTTGAGACTTTTCCATGCCTGAATCTTATCAAA\tBCEEE?FFFFDGFGDGGGGGGGGGGGDFEGFGGGGEFFFFDFGGGGGGGGGGGDFGEGGGGGGGEGGGGFGGDFEGGGGFDGGFGGGGGGE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_792/1\t99\tchr9\t84058926\t60\t91M\t=\t84059308\t473\tAAAAATTATTTTCATGTGTTAATCATATCTTTCAGGCAAGATAACTTAGAAATTATTCATCCTTTATAACTACTAGCAAACTTAGTGTGGT\tGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGDEGGEGGGGGGGGGGGFGFGGGGGGGGGGGGDFGGGEGGGGGGGEGFGGGGGGGFGGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_792/2\t147\tchr9\t84059308\t60\t91M\t=\t84058926\t-473\tATTACTACAGTTGTTATCTTTATAATTAAAGTAGACTGACTACAGCATGAATGAATTAGGTTAGTTATAATGAAGATGAAAATATTTTTAT\tGFFEFFEGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_794/1\t83\tchr19\t34534438\t47\t91M\t=\t34534058\t-471\tCAAATAAACTTGAAAATCTAGAAGAAATGGATAAATTCCTCGACACATACACCTTTCCAAGACTAAACCAGGAAGAAGTTGAATATCTGAA\tCCFFEF?<@>.DDDDCBBBEFDBBE?AEFEGFFFFBAEEEED@B?DDC=C@GGEGGGGFDGGGDGGGFGGGGGGGGGGFGGGFGFGFGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:10\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_794/2\t163\tchr19\t34534058\t47\t91M\t=\t34534438\t471\tGCTGTGTGTAGAGGGAAATTTATAGCACTAAATGCCCACAAGAGAAAGCAGGAAAGATCTAAAATTGACACCCTAATATCACAATTAAAAG\tGGFGGDGGDFGAGGGGDGGDGD=FGFBGFEGGDGGGGGGFGDFDGGDEDGDGGGFFGFGEGGGGGGGFGEGEEGGGGGGGGGDGEGDEGF?\tXT:A:U\tNM:i:0\tSM:i:10\tAM:i:10\tX0:i:1\tX1:i:18\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_796/1\t83\tchr12\t80216455\t60\t91M\t=\t80216087\t-459\tCGGGCATATCCTAGCAGCTACTAGCAGCTCACATAGAGAACTGAATAGAAGGGGCAGTGAACAGTACTGTTAAGTGACATGAAACAGCCCT\t###########@A:BABEE@BFD?EEEFFEEBEFFFED?FFDFFDDFFFFFBFFFFEEEE?AFFFFEFFDFEEEEEBEBEEEEBEEDEDEE\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:1A89\nfoo_796/2\t163\tchr12\t80216087\t60\t91M\t=\t80216455\t459\tAGCATTTTGTCTTGTGATTGATTTTTTAAATATAGTAATTTTAAAGACACGGTTGAAAGAAGCAGCTGATCGCTAATATCTGATGGCTAAT\tFDGGGFBFFBEEEE?FFEDFDDEFEFEEDEFFAFFGGEGGGFGGD=DDADFDAD=ADDEEGGBEFGGFDDADEDEDE?EDD:5C=DCBEE>\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_798/1\t99\tchr2\t185454724\t60\t91M\t=\t185455086\t453\tCAAATAACTACCAGAACTGCCTTACAGGAGGTCCTTAAGGGAGTGCTAAACATACAGGAAACACTATTATTAGCCACTACAAAAACACACT\tBDDD=CC>CC>AAABDA5D=DBDDAAAAA?A?C:BDADD:B:-?5@@@C@D;BDC==@CCDDDDD5BAB:AA-AC:?AA?-CAC:@>CDD5\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_798/2\t147\tchr2\t185455086\t60\t91M\t=\t185454724\t-453\tAAAACAGAAAAAAGCAGGGGTTCCTATTATAATTTCAGACAAAACAGACATTAAAACAACAAATATCAAAAAAAGACAAAGAAGGATATTA\tGC@?6:=CDDCA>5:ACB?;?5=A5C=DDFFCCC=A@=4-@FF?FFDGGFDAGGFGFFFBGGFGEDEEEEEACCCA?>E:DFDFEF?DEEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_800/1\t99\tchr5\t175546065\t60\t91M\t=\t175546449\t475\tCTCAGTGAAAATTTCCATCACCACAGATTCATCACAAGCTTTAAGAACTATCAAATCAAATGGGCACTGATGAGGTATAAAAACTGCATTA\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGFGGGGGFGFGGGGGGGGFGGGFGGGDGGGFGGFGGGGEGGEFGGGGAGDGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_800/2\t147\tchr5\t175546449\t60\t91M\t=\t175546065\t-475\tGAAATTGTTTTAAAAAATACAGAATACAGTAAAAAAATAAAACCATCTCACTGGAGGAACAGCATTGTAAAGCTACTTTTAAAAACGCAGC\tEEGEEEGFFB@FFGGGGEEGGGGGGDGGGAGGGGGGGGGGGFFFFEFEFEEGGGGGGGFGGGGGGGGFGGGGGGGFGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_802/1\t99\tchr8\t15132879\t60\t91M\t=\t15133251\t463\tTCTAACCAGATCAGCACCTTTTCCACTGCACAAATGAATTTCCTCAAACACTACAATCTGTTAGATTTACAGTGTGAGCTTTAGTACTTTG\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGFGGGGGGEGFGGG@GGFGGEDDFDGEGG?GFGGEFGGGGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_802/2\t147\tchr8\t15133251\t60\t91M\t=\t15132879\t-463\tTGTTGTTTAGGAGCCTCAAGAAAACTCTCAAGGCAATTTGGAACAATTTTTAATTTTTAGAATGTGAGGCTACATAATGCTCTTGTAGGGG\t:BGFDGDG=GGGGGGGGCGGGGDDGFGGGGGGEFGGGGGGGGGGGGGGGGGFGGGGGGGGGGFGGGGGGGFFGGGGGGGGFGGGGGGGGGF\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:85G5\nfoo_804/1\t99\tchr4\t181669501\t60\t91M\t=\t181669877\t467\tAAACTATTCTATATATTCTTCTCTAACAGTAGAGGGAATAATTAAAAATATATTTTAAAATATTAACTTTGTTAGTTTTTTCTTGTCTTTG\tFEFGGFGGGGGGGGFGGEGGGGGGFGGFGEFFFAFAGGFGDDDGGGGDDGCGGGGGFBGFFGGGFDGGGGGEGEEEFFEFFGGGFGFGFDD\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:90C0\nfoo_804/2\t147\tchr4\t181669877\t60\t91M\t=\t181669501\t-467\tCTGCCAATGCAAAGGATGTAATTCCCACTAATGCCCTTCAGTAAGGCATAAAAAGGTGATTGACCAAGCAAAACAATCGGCCAGCTTCCTT\tD:FDGDEFGFGFGFBEBFFFD:GAFGGFGFFGG?GDGGEGGGFEEFG>EGFGGGFGEDGGGGGGGGGDGGGDDGDGGGGGDDEGGGGDDGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_806/1\t83\tchr10\t120158272\t60\t91M\t=\t120157889\t-474\tAAGCAACTTCGTTTTTACCCCCACATAGGTAGTCAGCAACACAATATCTTGTAGTTGTGAGTGGTTAATTGAAGGATAATCCAGTAACACA\tGGBGG@GGGGGEEEED8AEEEFEFGGGGGFFFEDGGGGGGEGFGEGFGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_806/2\t163\tchr10\t120157889\t60\t91M\t=\t120158272\t474\tAGACGAGGGCCTGTTTGGGCTCAATCATTAGAATCCAGGTTGCTGGAGAGGTTCTGTGAATCTCAAGTGGAACTTGGCAAATTTCAGCAAA\tGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGFGGGDGGGFGGGGDGGGBGGBGGFEDEBEDCDDDGGEGGEGGDGGGFFGGGGFGBGEGGE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_808/1\t83\tchr10\t120158272\t60\t91M\t=\t120157889\t-474\tAAGCAACTTCGTTTTTACCCCCACATAGGTAGTCAGCAACACAATATCTTGTAGTTGTGAGTGGTTAATTGAAGGATAATCCAGTAACACA\tGG=DGDEAFFBEEEEB3CFFFGEGFGFGGGGG?GGGGFFFFEGGGGGGGGGGGGGGGDGGGFGGGGGGGGGGGGGGFGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_808/2\t163\tchr10\t120157889\t60\t91M\t=\t120158272\t474\tAGACGAGGGCCTGTTTGGGCTCAATCATTAGAATCCAGGTTGCTGGAGAGGTTCTGTGAATCTCAAGTGGAACTTGGCAAATTTCAGCAAA\tGGGGGFGGGGGFGGGGDGGGGGGGFGGGGEGGGGGGGGGEGFGGGGGGFGGEFGGECEAEDBDDDGGCGGEAEEEEGEGG?FFFBEGEGE?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_810/1\t83\tchr11\t35423603\t60\t91M\t=\t35423216\t-478\tGTCATAAATGGCTTTTATTTTGAGGTATGTTCCTGCTTTACCTAGTTTATTGAGAGTTTTTAACATGAAGGGATGTTGAATTTTATCAAAG\tGFFGCEGGFGGGGEGFGEFGFGFBFGEGGGEBEEE?GBDDGFFGGGGGGFGGGGGGGGGGGEGGGEGDGGGFFBFAGGGGGGGGGGEFGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_810/2\t163\tchr11\t35423216\t60\t91M\t=\t35423603\t478\tCTTTGTGGCAATTGTGAATGGGAGTTCATTCATGATTTGACTCTCTGCCTGTCTGTTGTTGATGTATAGGAATGCTTATGATTTTTGTATA\tGGGGGGGGGGFGGGGGFEGGFFDFBEEEDEEEFDC?EDDEGFFEED=EEDBFFEFGGEBGF=F=?<B:AA5BC?CCEBCCCAEEEC>5(@#\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_812/1\t83\tchr3\t10031980\t37\t91M\t=\t10031630\t-441\tAAAGGCTGAGCTGGAGGGGCTGGGACACAGCCCACCTTGACGACAGCTGCTCCTGGCTGTGCGGGGGGGTCTCCAGGAAAGGCCATTCTCC\t################@>==@222531>84@(6.A1:BA>AB@EBEFFFDFCFFFEDDD>><@ACC=D=ADEEADDBEEEEDFDFFEFFEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_812/2\t163\tchr3\t10031630\t36\t91M\t=\t10031980\t441\tAAAAATATAAAAATTACCTGGGCGTGGTGGTGGGTGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGAAGAATTGCTTGAACCCAGGA\tABAAA?@@DD?DBABFABFFEFFDF@@,B@1=A>*>B=B>?DBDDABCAAAFEFEEE5-ED-DD?CDACD#####################\tXT:A:R\tNM:i:1\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:89C1\tXA:Z:chr3,-126146230,91M,1;\nfoo_814/2\t129\tchr19\t32423845\t25\t91M\t*\t0\t0\tTGGATATTCAGACCTCCTTCAGGCCTTCGTTGGAAACGGGATTTCTTCATATTCTGCTAGACAGAAGAATTCTCAGTAACTTCCTTGTGTT\tGGGFGGGGGGGGGGGGGGGGGGGDGFGGGEGGGGGGGGGDGDGFGGGGGEGGGDGGGGGGGGFGGGGGGGGGGFGG=GGGGGGGGFFDFDG\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:16T2G27T5A37\nfoo_815/1\t83\tchr9\t3436756\t60\t91M\t=\t3436381\t-466\tGGTACATTTTGGTTTGAGGGGCTTCTTTTATCATCTATTCTCTCACACAGACTTAAAATGGAAATAATTTGGGTTCTTGCTGGACACTGCT\tBEECEE=GEGGGGFGGEFDDEGGEGGGGEFDGFEFGFDEGFFEFEFGGGGGGGGGGGGGGGGGGGGGGGGGGGGBGFGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_815/2\t163\tchr9\t3436381\t60\t91M\t=\t3436756\t466\tGTAAAGGTAATATATAATTTGATGCAAGAATTAATTCTCAAGAATGTAAAATGGTTGGAAACCAGGAGATCTGTGAACTACTCTGATGACT\tGGGGGGGFGGGGFGGGGGGGFGGGGGGGGFGGGFGGGGGGGGGGFGGGFGGGGFDGGGGGGGGGGGGGEDFFFDFGGEGFGFGEGEGGFEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_817/1\t99\tchr5\t126398937\t60\t91M\t=\t126399316\t470\tTGGATTATCCCTTAGAAATGAATTGTATCATTACTGATTATGAATACACCAAATTATGACAGATTCACATGTAAGAATGCCAGGGAAAAGT\tGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGFFGFGGGGGGEGEEGEDF?BE@CECFFE?EGDFGGAFFDB@\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_817/2\t147\tchr5\t126399316\t60\t91M\t=\t126398937\t-470\tAACAGGCAAAGTGGTTTGTCCAAGAGCAAAGCCAGGGGCAGGGTGAGAAATTGTTCATTCATTCAATCATGCATCTATTCTTGGCAAGGCA\tGEGGGFGFGGFFGGGGGGFGEGGGGGGGGFGGGGGGFGGGGGGFGGFGGGFGGGGGGGGGGGGGGGGGGGFGGFGGGGGGFEGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_819/1\t83\tchrX\t88036008\t60\t91M\t=\t88035630\t-469\tATATAGAAAGTAGAGCTATATCAAAAATAAATTGGATAGATTCACAGGGTGACAGCTCATAGGACATTTTCAGGAATAATATAGTACTTGC\tGEGGGGGGGGEFFFFFBEEGEGGEGGGEGGFEGGGDGFGGGEGEGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEEFFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_819/2\t163\tchrX\t88035630\t60\t91M\t=\t88036008\t469\tTGTGTGTGTGTGTGTGTGTTTGTGTGTATCTTGCAAATTCTAGGAAGGCCCTAGATTATCCTGCTTGGACCTCACTGAAGGAACTGATTTG\tEEEEADDDDDEFEEDEEBEAEEEEEFFAFDEDEBE-BCDDACDDD-CCCCA?C=CCEAECCCEB?5C@@BFFFECCE:CC0,534??B=AE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_821/1\t83\tchrX\t120359247\t60\t91M\t=\t120358878\t-460\tGATTTCAGTCTCCATTACATCCTTTGTCCTATCCTGAAAGTATGCTTGAATAGCTTTGATTAAAAATGAAATCTTCTCTTGGTGCTGGATT\t?9?7?;=-@DBC@CB>ADA=DEBC=BEBEEEEEBBEEEECEEEEDEEEDEEEEEEEE:EEBEEE?EDDD:D:DEDEEE:DE:EEEDDE5BB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_821/2\t163\tchrX\t120358878\t60\t91M\t=\t120359247\t460\tAAACAGAGTTTCATTCTGTCACCTAAGCTGGAGTGCAATGGCACAATCTCAGCTTACTGCAGCCTCTACCTCCTAGGCTCAAGTAATGCCC\tGGBGFECE5=CCCCCGGEFGF=FA=EEAEECEE5BB==BC?EEEE?DC:DBE?EBBC?CCECEAED=5AC@@?8BBB=EECCBCC=A=@;A\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_823/1\t99\tchr12\t20053844\t60\t91M\t=\t20054214\t461\tATGTTTACATTCATTTTCTATGTGGTGCTGCTCTTTTTGCAACTCTTCTGATTTGATATACATTGATATTTTGTACAGAATTGGTTCCTTC\tGGGGFGGGGGGGGGGGGGGGEGGGGFGEGGFGDGGGGGFFGGFGGGGGGADGGGDFGGFAGFGGGGFGG?FFFEDFEBFFDFFFB?AEBDB\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:19G71\nfoo_823/2\t147\tchr12\t20054214\t60\t91M\t=\t20053844\t-461\tGAAAGGGACCCAAGCTGGTTGCCTCTGCTGGCTCGGGTGGCCAGGTTTTATTCCCTTAGTTGGCCCCGCCCACATTCCGCTGATTGGTCCA\tE??:CCA5A?BE?:EEB=CC??CBE:EFDGEGFFGGF:FBGGGGGGFFFECDGGGGGGGGFGAGGGGEG?EGGAGGGGGGFGGGGEFGFGG\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:58T24C7\nfoo_825/1\t99\tchr18\t50458738\t60\t91M\t=\t50459109\t462\tTCACTCTGTTGCTCAGGCTGGTCTCAAACTCCTGGCATGAAGGAATCCTCCAGCCTCGGCTTCCCAAAATGCTGGAATTACAGGTGTGAGC\tFFFFFFFFFDAEAE=FBFFFFFFDFFEFFBBFAADDEF?FEBFFEFFFADEBB?EEBBCBF?DBE5@@A58:4.?>A??<A?<A#######\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_825/2\t147\tchr18\t50459109\t60\t91M\t=\t50458738\t-462\tTATCTACACTGAAACCCTGTTGTAACCAGTAGCCCTGTTCCCTGATGATGTCCATGAGTATTTTAGGGAAAACTTTTGGACATGGTATATT\t?5?A??;:<=7:56'0)5;;0=D5ADDEED?FFGFGFDGBGEDE:BEEEEDFF=FFDEGFEGGDBGGEFEGFF:FFDEEBEDD?DDBADDD\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:14A76\nfoo_827/1\t99\tchr2\t5266539\t60\t91M\t=\t5266920\t472\tCTAGAAATGGGTAAGTTTGTTGAGGAAGGCATGTAGAAAGCTGATATGTGATGAAATCTAGGTCTCTTGTGCCAAATAGCCCAGTTGCAAA\tGGDGFGEGGGFDFFFGGGGGFFAFFGGGGFDGFEBGAFAGFCGC?ABA?DBCCCCBEEEE?B@E@DEEDCE5DEEFFFEEBDBE5ACBAAD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_827/2\t147\tchr2\t5266920\t60\t91M\t=\t5266539\t-472\tCCTAGGGGAGAAGCTGCAGCAAGTTATCCAGAAGATCTGCCTAAGACCATCGATGAATGTGGCTACAGTATACAACAAATTTTCAATGTAG\t##################ACBB5DBBB=DCBB?5>AD5:D?EBEDEDEECECAAC==CEBEFFFFDEEEEDF>FFAGGFGGGFGDGGFGGG\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:0G2G0A0T85\nfoo_829/1\t83\tchr1\t220333851\t60\t91M\t=\t220333460\t-482\tATCAACGGTTGCCAAGTGATTCCAGCCTATGACCACCAGTGGCCAGATATTGATTTCTCTTGTATTCAGCAGCAGTTGAAGTTATGCATAT\t######################BCBCCCECC2FFCFGFGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:21A69\nfoo_829/2\t163\tchr1\t220333460\t60\t91M\t=\t220333851\t482\tCTAAGAATAAAAAAATTTTTTTTTCATTAATGACAAACAAAAGAAAGAAACTTCAGCATATGGTTTGCAGCCATCAAATGGCAGCCTTTTT\tGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGDGGGFGGFGGGGGFGGFGGGGEGGGFGFGFGFGGBGGDGGFEGEAGDDGGFE5B?BADE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_831/1\t83\tchrX\t134261126\t60\t91M\t=\t134260743\t-474\tACATATAATTTGCACATTTTTCTTCCATTCTGTGGATTAACTTTCAACATTCTTGATGGTGTCCTCTGCTGCACAAAAGATTTTAATTTTG\tEFGFBEFEEDGEDDFC?<?BCE=AEEEEDGGGEEGFFFF=DE?EGGGGFBGGFDGGGGGGGBGGGFGEGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_831/2\t163\tchrX\t134260743\t60\t91M\t=\t134261126\t474\tTGGTGTAATTGCTAGATCATATGTTAACTTTATGTTTAAACATTTGAGGAGCTGCCAGACTATTTTCCAAAGGGACTACACCATTTTACAT\tFFFFFGGGGGAGGDDGGGGGGEG=?FDFDAGDFGGEE=E57=@B=EEEED?DEEEFA=FFBB5ADD?:C=C5CAAFB===EEE-B4->96?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_833/1\t83\tchr7\t109887272\t60\t91M\t=\t109886912\t-451\tATTTTGGTGTGTTTATTGGCCATTTGGTTTTTTGTGAATTGATCACATTTCTAGTGGTTTGCTTCTCTTGTCTGCCTTGTCTTTATGTTGT\tDB>@,=CECD?8?=?=>9.>=@=>A?<>8DD,D?EEDC?DCB:@2?CAB-DEGGGGFAFBFEE5EEEAFDEDGDFGAGGBGGGGGGGGBGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:19T71\nfoo_833/2\t163\tchr7\t109886912\t60\t91M\t=\t109887272\t451\tTAATTCATTTGTTCATTTAAATAACTAAAATAAATCATGGTTTGGATATTTTATAATTTATTTAGGCATTGCCCTAGCACTGAGCAACATA\tEEDEEEFFFFDDDB:DDDCBE=EEEEGAGEGFA@C5:AABCEED?@@,;@CCACA>CB?5AC6<C?5AC-=@6>@9--93+:<5BEEE==E\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_835/1\t99\tchr14\t104525651\t60\t91M\t=\t104526025\t465\tACTCAGCCTGCGAGCCTTTGTCTTTTCCAAGGTAAAGTGCTGTGCTTACTGTAGTATGTATAATTTCTTTTTTTTTTTTTTTTAGAGGGGG\tGGGGGGGGGGGGDGGGGGGGFGGGGFGGEGGGDGEEE@EEGGFGEGFAGGGFEGEECDE?ECB=E?FDGG=CC@>CCEEE?##########\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:82G3C2A1\nfoo_835/2\t147\tchr14\t104526025\t60\t91M\t=\t104525651\t-465\tGTATAATTTCTTAACCAAACTGGAAAACAGCACTGACATTATTGGGTGGTCATCTTTTTTCATGTGTCACTGATGAAGTTTGAATGTTTTG\t###############DDBDDCA'A;;55CC:=ADDDBADDD5ABDDBDDEDCG?GGEBEEEEECDFEFDFDGGFGGGGGGGGGFFGDGFGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_837/1\t99\tchr8\t29126628\t60\t91M\t=\t29127016\t479\tTGCTGTGCATCGTGGGGTGTGATGGAAAGAACATGGCTTAGGAGCTAGGCAGACTTGAATTCAAATCTAGATTCAATAAATGCTTGTGCTC\tGGGGGGGGGGGFFGGFEDFFFFFFFEFEGFGGFFGGGFGGGGEGEGE?GEFF@BEEE?B=CCCCCEBEBBCDEDDEEC=EBACAC=03;>?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_837/2\t147\tchr8\t29127016\t60\t91M\t=\t29126628\t-479\tTGCTCCTAGACTACAAACCTGTATGGCATTTCCTGGAGGCAACTGTAACACAATAGTAGGCATTTGTGTATCTAAACATAGAAAAGGGGCC\t#######?C=C81?=9-=CAC:5D:?=E?E=FEDFEE?GDFDEEEDEEBCEGEEGDFFFDF;GGGGGGGFFAGFFGFGGGGGFGGDDFGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_839/1\t99\tchr16\t9242042\t60\t91M\t=\t9242386\t435\tCAGTCTTTTTTTTTTTTTTTTGGTCTTTTTGGTAGTGTATTTTTTGAATAGATTTTTGGGAGTAGGATTGCTATATTTTGCAGGTTTGTCC\tFFFFFFFFFFFFFFFFFFFFECB5=CCCC=8>64?380*>CCC@:0:<?54'?>BAA>A?###############################\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_839/2\t147\tchr16\t9242386\t60\t91M\t=\t9242042\t-435\tAACCAGCCTGAGGCTCTCACCAGGAGCAGATGCTGGCGCCATGCTTCTTGTACAACCTGCGGAACTGTGAGCCAAATAAACCTCTTTTCTT\t#####################ABA5BBEGGGECEE5EECEE:BEDEDB?BBBEEEAGADFGGAEGGBFGGEGGGFFGGGBGDFGGGGFGGD\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:2G88\nfoo_841/1\t65\tchr16\t73154720\t37\t91M\t*\t0\t0\tACATCAGAGGTGAAAAAGAACACTAGTGCTTGGAAGCCTTCATCATTCTGAATGGCCTTCATTGTTAGGTCACTTTTTCCAGAGTTTGGAG\tFEAFFECB?:?DB=DEEE6CFFDFFFB?FFEEEBEAEEEFC5C@CFBBDEE?BBE:?@B?@AEEB>*?<117)24AA<AAEB:E*2*=:9D\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_842/1\t99\tchr17\t10324804\t60\t91M\t=\t10325200\t487\tAGAGATATTGAGCACCTTTTCATATATTGGTTGGCCATTTGTATGTCTTCTTTTAAGAAATTTAGGTCCTTTGCCCATTTTTAAATAGGGT\tEEEEBFBFFEGGGBGGFGGGEGFFFBEEEDBDEECGBEGGCDE?FEEEECFFEC=FEDEEBEE-:>.8?8<66:=DABDCC;=CBE?E=EA\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_842/2\t147\tchr17\t10325200\t60\t91M\t=\t10324804\t-487\tAGCAGATCGCATGCGAACTCACTCACTATCACAAGAACAGCATAGGGTAACCACCCTTATTATTAAATTACTTCTCGCCAGGTTTCTCCCA\t################?B,@>=5DADAA>>?7=/7*@5:CC=E5FE>B:9=A-AACABAA,CCCCADDDADDFEEGDG:FGDEFDEFDFFF\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:1T6T4A46G30\nfoo_844/1\t81\tchr4\t8067024\t37\t91M\t*\t0\t0\tTCTTCTCAGAATAATCTCTATCTCTACAATTCACATTGAATTGGAATGACATTGAGAGGAAGAGAGAGAGGCAGAGGGAGGGAGAGAATGT\tE=C??C:FCGFEE?CE@C-:>C>:>=@C@?:FDFEEGGGGBEGEGGGGDFGGGGGGGGFGFGG?GGGGFFGFDGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_845/1\t83\tchr2\t50160845\t60\t91M\t=\t50160468\t-468\tGGGACAAGGAGAGAGAGAGGAAGAGGCCACATTCGCAGAACTTTTATTATGGCATATTGTTACGATTGTTCTATTTTATTACTAGTTATTG\tEDCDEFBGEFFGGGGGEFEEEEEEEEFEEFGEGGGGEGGEGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_845/2\t163\tchr2\t50160468\t60\t91M\t=\t50160845\t468\tTACATGCAAAGGCCTTTATTAACTTCTTCCCATAATAATGAACGGTAATATGGTAGTCTCCTCTTATTCTACAGTTTCAGTTACCCACAGT\tGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGFGGFFFFFGGGGGGGGGGGGGFEGGCGGFGGFGBFAFBFGEAGEEDGBFDFDD6\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_847/1\t83\tchr1\t60434645\t60\t91M\t=\t60434256\t-480\tCATCAATAACACAGTTGACACATATTTTGTGTGTTACATGTATTATATACTGAATTCTTAGAATAAAGTAAGCCAGAGAAAAGAAAATGTT\t:4AAB:8;6:=,37)2?;<B=40*4;627+:7'7=+*;943066621?<8>;?;*<636<3>>8><;+277<>B<>8/;41?2=8BA7,<<\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_847/2\t163\tchr1\t60434256\t60\t91M\t=\t60434645\t480\tTGTGAGAAGTAAAATACATTCAATTCAGTTAATTTAGAATATACACTAGTCTTATTTTGACTTTTATTTGAATCTAAGCTACATCTCAGTA\tBDBB:A:AA-7<7,7>+2:60,7763++3,767;@:.=??5;959267,@38>76?:8+:<3>5;(21.(>B??#################\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:13T2A67A4A1\nfoo_849/1\t83\tchr1\t58484140\t60\t91M\t=\t58483770\t-461\tGGCACATCCTGCACATACAGTATCGTATAAGGTATGAAGATGATGTACATGAATATATATTGCAAGTTTCCTGCATATAATCTAGTTAGGA\t?040-*@DD@B=@AC@=BAEFGEAFGEFDE=GFGGGFGGGBGFGGGGGEFGGGGGGGFGEGGGGGGFGEGGGGGFGGEGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_849/2\t163\tchr1\t58483770\t60\t91M\t=\t58484140\t461\tGCATATGAAGCCGAGTTAGATGCCCTGCCTCTGGGTGCTCACAGATCCACTAATAGGGTAGAGTGGAAAAAAGAGAGGCAACAAAATGAAG\tGGFGDGFGEGGGGEGGFGGGGEEGGAGGG=EEDEBEFGGGFFEFF=FFEEFEFGGEFDDFEEEBBEEEDEE:EAABEFFDDFDFF>;DA?7\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_851/1\t83\tchr9\t98951549\t60\t91M\t=\t98951179\t-461\tAGGATTCAGGAAGCCTCCCAATCATACCAGAAGGCCAAGGGGCAAGTAGATGCTTCATATGGCAGGAGTAGGAGCAAGACTGAGAGAGGAA\t1;:994>=?9@B,FEBFEDEFF=BGCFFEFFGFEGEGBGEDG=GGGGGGGFBGGGGGGGGGGEFGGGGFGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:4C86\nfoo_851/2\t163\tchr9\t98951179\t60\t91M\t=\t98951549\t461\tAACAGTTATTTTCAGTGGTCTAAACTTTTCCTCATCTTGCTGTCTTCTAAGGGTTCCCAACTCTCCCGACCTCTCTCTCTTTTACCCACTT\tGGGGFGGGGGFGGGGGFDGGGGGFGEDGGFDEGFFGDFDGFEGGGED:EEEEEB?DDEEEDFG?GCA6?@EAEEEG=EEDCDFBDFBED?D\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_853/1\t99\tchrX\t119574899\t60\t91M\t=\t119575275\t467\tATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCATGCACCACATTGCCTGGCTAATTTTGTATTTTTAGTAGAGACGGGGTGT\tFGGGGGFGFGEDGGGGGGGGGGDD:EEEEEGEDFEEEEEFBFEGDEFDDG=BEEEFEBAEEBB=EAC:??EEEE>??BEE?BBFB@?C###\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:89T1\nfoo_853/2\t147\tchrX\t119575275\t60\t91M\t=\t119574899\t-467\tGACTAAGCTGCTGGACTCCAAACTGCTACTCATTTCTGGGCTAAGGTGCGTCTTGTAGCCCAGGTATTAGAAAAATGTAAAGAGCAAGAAA\t#######################B==6B@=;6?>AA><B.?.=>?;>@?-@=@@=?=-CCCDC=D?F>EAEED=EE5CCA>F=DBDGA?E?\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:17A73\nfoo_855/1\t81\tchr4\t170955375\t37\t91M\t*\t0\t0\tGGAAGAATAGCCCAGGAGTTTGAGATTAGCCTGGGCCTCAAAGCGAGAACCCCATCTCTTCAAAAATTTTAAAAATTAGCCTGGCATAATG\t##########################################?939@@>C6A6CC?8>7()==:B=>@=?@>>>5=@=>.@76+27B66;@\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:37A21A31\nfoo_856/1\t83\tchr18\t36681794\t5\t91M\t=\t36681411\t-474\tATAACAAAGACTTGGAACCCACCCAAATGTCCAACAATGATAGACTGGATTAAGAAAATGTGGCACATATACACCATGGAATACTATGCAG\tFDF=BFGAEEGC=ECC5;CBACCCCDEEEEGEEEGGGEGGAGGEGGFGGFGGGGGGGGGGGGDGDGFFGGEGEGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:5\tAM:i:0\tX0:i:1\tX1:i:63\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_856/2\t163\tchr18\t36681411\t15\t91M\t=\t36681794\t474\tTGAACAGACACTTCTCAAAAGAAGACATTTATGCAGCCAAAAAACACATGAAGAAATGCTCATCATCACTGGCCATCAGAGAAATGCAAAT\tGGFGGGGGGFGGGGFGGEGGDGCGGDDFFFBEEAEGFGGFGGGFGGGGFGGDGAGBEEDEGFFGGFFGEGGD:GG?EEC=CDEECEDAEEC\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:496\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_858/1\t83\tchr10\t26879805\t60\t91M\t=\t26879418\t-478\tTTTTAGTGGGCGTATGCAGAAATTGTGAGCAGATTGACTTTTGTATGTCATTTCAGCGCTAGAGGGCCTCCTAAGTCCAGGTTAGACCCGA\tE:ED;CFGE?FEEEEDB@BC@A5GBGEEF:FEGGGGEBGGDGGFEDFFFEAEGFFGBDFDFGEGGGGGDGGGGGGFEDEEEFFFFDEBEEE\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:57A33\nfoo_858/2\t163\tchr10\t26879418\t60\t91M\t=\t26879805\t478\tCAAATAGCCTGCCTTCAAGCTGACTGATTATTCTGCTTGATCAATTCTGCAGTTGATGTCCTCTGTTTCATTTTTCATTTTGTTTGTTATA\tFD?GGGGDFGFGGGGDGFFGFGCGDEE?EDFFFFEGFFFFFFDFFGGDGFEGDDGFAFFAFBFBAE?E:?FEFFFABAFFGGG<ECFFDFA\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_860/1\t83\tchr21\t39498413\t60\t91M\t=\t39498031\t-473\tAAATCACATTATTCTCTACATAGCAGTCCCCAAGCTTTCTGGCACCAGGGACCGGTTTTGTGGAAGACAGTTTTTCCATGGACTGGGACTG\t@@A?D?GGEEGCCAEDCCFEBFGFAEEECEECDCD=DCDDDBEBBEGGGGDGGGGGGGGGGGGGGGGGGGGGGGBGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_860/2\t163\tchr21\t39498031\t60\t91M\t=\t39498413\t473\tTATATGTACTGTAATAAAAATAGTATTAATATAAAATCGATTGTTTGCAATAGATTGAGATGCATAGTGTAGTCCCCAGAGCAACCACAAG\tGGGDDGGFGGFFFFF+BBA;BCCCDEFFDFGGGGGEFFBDFDFFFGDFDGGGGGFG?GAG:FFFFGGE=FFF:FFFGEFFFGDDGFA?FFE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_862/1\t83\tchr12\t59885594\t37\t91M\t=\t59885228\t-457\tTTTATTTCCTTGAGGAGTGGTTTGCAGTTCTCCTTGAAGAGGTCCTTCACATCCCTTGTCAGTTGGATTCCTAGGTATTTTATTCTCTTTG\t?E=CFBEDG?GGGCGGFFGGGEGGGGGFDGABEFFGFGEFGGGGGGFFFFFEGGGGFGGGGGGAGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_862/2\t163\tchr12\t59885228\t29\t91M\t=\t59885594\t457\tACATAGTTGTAGATATGCAGCATTATTTCTGAGGGCTCTGTTCTGTTCCATTGATCTATATCTCTGTTTTGGTACCAGTACCATGCTGTTT\tGGGEEGGGGGGGEGBGGGGGGEGGGGGGFGFEGFEEBFFFFEGGFGFGFGGFEEGGGE?EGGFGGEEFEEEFEEGB=?CB@B5BCEDE?EC\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:90\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_864/1\t83\tchr1\t49824562\t60\t91M\t=\t49824281\t-372\tGAGTATGCCTAGAGAATGACTAGCATTCCTCTTTGCTGCAAGTTTCTTTGCTCGGTAACAAATTTCATTTCAGTCTATCAAGACAACTTTT\tGGFGGFGGEEGGGGGGFEEEFGGGEGEEEGEGDGFGGGGGGF@GGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_864/2\t163\tchr1\t49824281\t60\t91M\t=\t49824562\t372\tTGCTTGTTCAAACCTACAAAGTGAATTGCTTATTTTTGCTTAGATTCAGAAGACACAAATATTTATGGAAAATGTATTGACTGTCAAATAC\tGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGEGGFFGGGFGGGGGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_866/1\t83\tchr1\t216412360\t60\t91M\t=\t216411978\t-473\tATATTCAAGGTGAAATGAAGCTACCAGAGGGGGAAGCTTTTGCATAATTTAAGTTCCCTAAAAGATTTGACCAAACAGTGTGAAAATAGTT\tBGDFGGGDGGGGFGGEFFDBEBEAGGGGGGFGGGGGDC?FGFAFFFFFGGGGGGGGGDFFGGGGGEFDFGGGGGFFGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_866/2\t163\tchr1\t216411978\t60\t91M\t=\t216412360\t473\tAATTCATAGAACCAAACTATTGATAAGTAATTACCCTCAACCCATCATACTATTTTGTATTAAAATAGTTCAAAAGAATTGTAGAGTTAAT\tGDGGGFDGFEGGGGGGGGGGGGEGFGGDGFGGGFGGGGGGGGGGDGEGBFGGFGGFGCGGGGGGGGGGGDFGGEAG?GGFE:B?CE=EEE?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_868/1\t83\tchr6\t27519370\t60\t91M\t=\t27518977\t-484\tGCACACCCCTCAAAACCTCAGTTTCAAGCATTCCACTTCCAAACTACCAGGCCCTATCTTTTCAACTTACTCTCCTCCCTTTAATACTTCA\t#########################@>=:@C=?-=CA?=A<>54/.5=B<==>>>58=1B:66@C;;708+;,A<3DDDDDDDD:DDD?DD\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:6T84\nfoo_868/2\t163\tchr6\t27518977\t60\t91M\t=\t27519370\t484\tATTCCACACCTGACTTAACTGGGCCTGGAGGGGGAGGTAGGTGCCCTTTTTGCTCCTCCTTGCCACATTCAGGCCATTGTTTCTCTCCCTA\tA>ACA:CCC?E?CEFBFFGGGBEFGE?=AD<A&9(.0'<A?,A5?03;9;A,C?A=B@#################################\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:32T25A32\nfoo_870/1\t99\tchr12\t34222174\t60\t91M\t=\t34222564\t481\tAATAATTGGTGACTGGTTAATTTATAAAGAAAATAGGTTTAATTAGCTCATGTTCTACAGGCTGTACGGAAAGCATAGCACTGGCTTCTGC\tGFGGGGGGGGGGGGGGGGGGGGGGGGFGGFGFGGGFFGGGGGGGGFGEFGEGGGFGGGFAEDFFDGGFGCFFFD5BBDCDBFAFD=@####\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_870/2\t147\tchr12\t34222564\t60\t91M\t=\t34222174\t-481\tGCCATTCCAAATCTCATATCCTTCTCATATTTCAAAATACAATCATGACTTCTCAATAATCCCCAAAGTCTTAACTCATTTCAGCCTTAAC\t?A?<-AA:ECBDDEAFDAAFEFEGGGGDGGDGFGGDGGGGGGGFGGFFF?EGFFGGEEECBEEEEDAGFGGGGFGGEGGDAFGGGGFGGGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_872/1\t83\tchr20\t5083099\t60\t91M\t=\t5082717\t-473\tCAAAGTCTCTTCCACTCTGAGCATTCGTCCAAATGGAGGAGATAGACAAATGTGCAGTCATTTCAGGTGGTAGTGATGTTAGAAGAATAAA\tEBAAC=AACCCD=?DAEGEEDEFEFDEEGBGFGGGGGGEGGGGGGGGGGGGGGFGGGGGGGGGGGGGEGGGGGGGGGGGFGGGGGGGGEGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_872/2\t163\tchr20\t5082717\t60\t91M\t=\t5083099\t473\tACCCCGTTCCCACCTCATGTCTCCCTTAGGCTCCAGAATACCTATGTGGAGGGCCTGAGTGCTAAGCTGTCCTGTCATCTGCAAGCTTGTC\tGDGGGGGGGGGGGGGGGGGGGGGGGFGDGGFADF,C=A@AFDDFFFEGGGGEGCFCDCB?DEBEEDBE=:?BB=BEBBFEGEC5F######\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_874/1\t99\tchr1\t203987262\t60\t91M\t=\t203987636\t465\tAATGATCTGGGCGCATCTGGCAGAAGTCTCCAATGAGCAGTCAGTTAACAACCAATTGAAGATACACTTTCCAAACAACAACAACAAAAAA\tFFGFGGGGGGGGEGGFGFGGFGGFFGEGDGGGGGGDGFFFEGFAEFCGEEFBGGFEEGEEGEGEGGGEEGCGEADFFFFEGGDGGEAEEE=\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_874/2\t147\tchr1\t203987636\t60\t91M\t=\t203987262\t-465\tTAGCTGGGATTACAGGCGTGCTCCACCATGCCCAGCTAATTTTGGTATTTTATTGTGGAGATGGGGTTTCACCATGTTGGCCAGGCTGGTC\t:E:AFE@CCAAC?FFEAFBD?AFGBGEE5DE?GDGGGDGGGGGFGFGGGGGGGGFFFFFFFEEBDE:FGGGGGFFGGGGFGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_876/1\t83\tchr4\t189625940\t60\t91M\t=\t189625553\t-478\tGACCCAGAAAAGAATTAAGCTGAGATATCTCTTAGTGATACTTTCACATCCAGATTTACGCTTAGATTCATGATGCATTACAACTAATAAA\tFCGEFDGGGGFGGGFGEEEGFEFGG=DAFEGGGGGEGGFEGFDFGGGGEEGGGGBGGFGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_876/2\t163\tchr4\t189625553\t60\t91M\t=\t189625940\t478\tATAAGATTAGTCCAAATTATCAAGTATATTCCATCATTTTGAATACTCATAAAACATATTTTGATTTTTATTATTTTAAGTGAATATTTGG\tGGFGGGGGGGGGGGGGGGGGGGGGGFGGGGGGDGGGGGGGGGGGGGEGGGEGGEGDDAGDGGGBGGGGGFGFGGEGGG5G?EDEEFGGFFA\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_878/1\t99\tchr18\t33918482\t60\t91M\t=\t33918853\t462\tACACAGCTGATACATTTCCATATCCTTTTCACTTTGAAGATATGTTGCAGAATAATGACAAGGTGAGGTTCCACTGAGTCACATACATGTG\tGGGGGGGGGGGGFGGGGGGGGGGFGGGGGGFGGGGGGGGAGGFFGGGGDFGGGEFDGBFDEECAECEC?CEEDEEEEEEE=EEEEGDAEFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_878/2\t147\tchr18\t33918853\t60\t91M\t=\t33918482\t-462\tGAATTGACCACAACCACAGAAGGAGGAGAGCAAGGTGGAAGGCCTCAGTAAATTACTGGACTGTGGAAATAAGATGATGAAGTGGTAACTG\t=<;7<=0>=9=CE?DEAFDFBAGGDGFFB=FFFEFFCC=C-ADEEEDD5DDDBDAD?BDAEEGGGEEEE=BGE?GGFFE?FE?GGGGGFGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_880/1\t83\tchr7\t72008981\t23\t91M\t=\t72008612\t-460\tTAATTTTTATTTTTTGAGATGGAGTCTTGCTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTCACTGCAACCTCCACCTCCTG\tED8>@8?-@@=?<AAA=@>@>BB@BBC?ADD;A=>?:DBC?EAEAEDEF?FDFDFFFFEFFCDD?:FFFFFB?EEEC?DCDAA8>AEDEEE\tXT:A:U\tNM:i:0\tSM:i:23\tAM:i:0\tX0:i:1\tX1:i:1\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\tXA:Z:chr7,+74933732,91M,1;\nfoo_880/2\t163\tchr7\t72008612\t14\t91M\t=\t72008981\t460\tGGCATTTGGATAATCAGTCCTAAGGCCCCAGGCCCTGCTCTGTCATTGACTGTCATTTTGAGTGAGTCATTTGATCTCTGGGTCTTTTTCC\t5?DD=DC==:EDEEEF?CEBAA?C:B?BE:=C@@@>>B.;@B@BCDB?D=EAE=EEEE=:3<6?*7(@47@@@.>?AA?-=B0BBEEEB>D\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\tXA:Z:chr7,+72008612,91M,0;\nfoo_882/1\t83\tchr6\t23795237\t60\t91M\t=\t23794855\t-473\tCAAAATTCCCACATCGTTTGCATGTCTTTCTGCATTTCCATTTTCTATTAAAAAAATCTATCTCCATATTTTATAGTTCTTTGATTTGCTA\tEEADEE4D?EBAAAAC:==>=CB4AD=DB?BEE=BEAEE;E=EE=EEDCAE=EEEDEEDEEBCEDEEEEEDDDDDDD=EEEEE?EEE:EDD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_882/2\t163\tchr6\t23794855\t60\t91M\t=\t23795237\t473\tCATGCACAAACCAGCAAGCCTCTAAAAAAAGTCTTGAGGTTACAGTGACTAGAGATTGTTGTTTAGGCACACTGCTCCAATACCCTCATAC\tD?DEGGFFGGFGEBGGG=A?EEEEEEGF?EBCDEDEBEBBDDDDD:@@@CEED?=:AAAA=EED-=?.>12?9:A:;<:A###########\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:67A23\nfoo_884/1\t65\tchr16\t63049877\t25\t91M\t*\t0\t0\tCAAGGAATCTAATTATATATATGCCAAACACTGGAATCATGAATAACACTAAAATAATCCATTTTAATGACATCAATCTGGGGAAATAGAG\t2756,22772,2,777227,1:;:;6,2272=7@62>,+20,7@7;A@;;AA><@*1-):BBA?B##########################\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:4A25T6G42T10\nfoo_885/1\t65\tchr4\t24040080\t37\t91M\t*\t0\t0\tCAAAGATGAGTTGGTCTCCTGCGTCACCCCCTACAAGGCCTAATGAGGGAGACAAAGGCATATTAGCAGGTGAATCACATTACAAAAAGGC\tGGFGGGGGGGFFGGFGAGGFEFGGGFGGGGGGGGG5EEEEFGEGFGFEFEEADF=FFDDDAAE?ECEECE;B0-=??AA?DEEABEDA<B:\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_886/1\t65\tchr4\t129172075\t37\t91M\t*\t0\t0\tAAAAAAAACTTTTTTTATGCTAAAACTATAATTTTCTGGTATTCTGCATTATTGAAACATATTCCTCATGTTATTCTTAATGATTAAGTAA\tB2BBA?A####################################################################################\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:8A66A15\nfoo_887/1\t83\tchr9\t3543323\t60\t91M\t=\t3542970\t-444\tGCTTGAATAGAATTAGCTGATCATTTTAAAGCTTAAACCATATCCATATGGCTTTTAAAAAGACACTAGTAGCAGAATCAACTTAGAAAAG\tAACA5AA=FDGEEEEACBDC:=CC??EEEDEAEBDDAGFFGFFFDFGGFEFFFGGGEFFFEGGGEGGGGGGGGGFDGFGGGGGGGGGGGGF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_887/2\t163\tchr9\t3542970\t60\t91M\t=\t3543323\t444\tAAATGCAGTAAGATCAAAACTTTGTGATTACAGAAGAAAGCATGTACAAGCAGAATCATAGAAAAATTACCTCCTGAAATATCAGAACAGG\tGGFGGGGGGGGEGGGGGGGGGDGFBDDFFFGGGFGGGGFGGGGGGGGGGGGGGBGGGGGGGGDGGAGGFGBGGGGDD=EFFGFGDFFFBGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_889/2\t129\tchr16\t83932448\t37\t91M\t*\t0\t0\tTACCATGCTGAGCTAATTTTAAAAATTTTTGTAGAGGCCAGGCGTAGTGGCTTATGACTATAATCCCAGCACTTTGGGAGGCCAAAGCGGG\t:ACCC:EEEEEEGGAFBFF?FBBDEAFFFFGFBGBFDBBEEEEE?DD5DD<6;@;5-<<>AAC::B#########################\tXT:A:U\tNM:i:3\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tMD:Z:69T15G0T4\nfoo_890/1\t83\tchr5\t58979821\t60\t91M\t=\t58979447\t-465\tGGAACAAAGTGCTCAGGAGATATATTCACTGACCAATTCCCCAAGTGCTCATGTGTGAGCAATTCAAAAATTCTAAAAATACTATTCAGTG\tEGGEEGEGGEGGEBEGEEFFEFEBEEGEFEGCFEFBGEFGGGGGGGGGEGGFGFGGGGGGGGGGFGGGGGGGGGGGGGGGEGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_890/2\t163\tchr5\t58979447\t60\t91M\t=\t58979821\t465\tTTATATATCTTTGCAGGAAAAGGTATATGTAAAATGAGGTAGAACTAAATCAGTGATTCTTAACCCAGAATCACTACACTGTAATAGAAAT\tGGGGGGGGGGGGGGGGGGGGGFGEFFFFFFFFGDGGGFGEGGGGFGGGGGFGFGGGGGGGGGGAFFFEFFAFFGGFBFGFGFFFCEFFFFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_892/1\t65\tchr4\t187482106\t37\t91M\t*\t0\t0\tAGTGATTACTTCTGCCTCAGTTAGGAAAGAATTAACAGGAGTAATTATAGTTGAGCCAGATAGGAGTTTTCCAGGAGGAGCACAACATCAA\tGGGGGGGGGGGGGGFGFGGGEGGGGGGGGGGGGGGGFGGBFCFFFFEFFFEGGEEGGFF@FGFGEE=ACEFFBDFEFFDFFGGGF=FFFFF\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:47C43\nfoo_893/1\t83\tchr4\t166802072\t60\t91M\t=\t166801716\t-447\tCCAATTACATTTTTCCTTAACATGTACTAGCATACTTACTGAAAAGATATTTTTCTTATACTATAACCTGTTAAGCATATAAATAATTTGT\t#A:DDCACA>>E?5EEEEEBBACA5;BCEDEB>=B=@5ACCD=DD?CB@@BDF=DFDEEEE?FDFDDFD=FBFFFFFDDFDFFF?E?FFFE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_893/2\t163\tchr4\t166801716\t60\t91M\t=\t166802072\t447\tGTTACCAAAACTTGAAGAACGGGAAATGCCAACATTTTGCAGGTTTTACTATGATTGCATCTATTCAGCATTAGAAAAATCCTAAATTTCA\tDDBGFFEAE?FFGFGG?FGGEDFD?EFFFFGGGDFGGG?A?DEEEDEEEBD5CADFFGDDFGFGGGGEGFAEDAADDB@2EEDECB?A?DD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_895/1\t83\tchr1\t192349686\t60\t91M\t=\t192349312\t-465\tAAATGCTTTCCTTCAACAGGCCTGCTGTGTTTGGTTAAAATATCTTAAACATTTTATGTATTGTTAATTTTTTTACACTTTGAAAGCTTCC\tGGGGDGGDGGGDGFGEGFBGGGDGGGGGGGGGGGGGGGGGFGGFGGGFGGFDGFGGGGFGGGGGGGGFGGGGGGGGGGGEGGGGFGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_895/2\t163\tchr1\t192349312\t60\t91M\t=\t192349686\t465\tAAAATTCTCCTTCTGCGTGCTTTTAGCTGTGATTTTTGAAGCAGCTTATAAAAATAAAAATAAAAAGTCACAAATATTTTCGATAAGCTGT\tFGGGGGDGGGGFGGDGGGFGGGGGGGGGGGFFGGFGFGDFGGGGGGFGFGGGGGGGFGAGEGAGGGGEGGGGEDGGGGGGFGGG??EEEC#\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_897/2\t145\tchr13\t46459041\t37\t91M\t*\t0\t0\tAATTCACATACATTGTTTTTCCTTCTTTGTGAGGTTATTTTGTCAATTAAATGATTTCTTAGTGCCGTATGAGTTATAATAGGGTGGGTAG\tGEEEEADDDABDGGGFDGFGGGGFGEFDGGFGGGGEGGFGGGFGGGGGGGFGGGAGFGDGFGGGGGGGGGFGGFGGFGGGGGGGGFFGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_898/1\t99\tchr6\t150659863\t60\t91M\t=\t150660223\t451\tTTTGCAAAGATTATGACAGTAAGAAAAGTCTAGGTGGCTGACTCCATCTTGCTTCTAGCCTCACAGGCTAGCTGTCCCTGCTCATTCCTGG\tGGGGGGGGGGGGGGGGGGGEGGGEFGGGBGFFFFCGGGGGGGFGGADGEEGAEEFEEGGGGGGEECFEDD6:5:77?##############\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:77T13\nfoo_898/2\t147\tchr6\t150660223\t60\t91M\t=\t150659863\t-451\tTGTGGAGCCTAAGATTGGTCTTTTGAAATGTTTTTCAGACTTTTGCATTCTGGTGATCAACAGACTCCACCCAGACCCATGACTCATGACT\t?E5DBEGGBGEFFF=FDFFF?GGGGGFD=GFFFF?FEE=E?FFGGGEGGGEGGAGGFFFFAEFCEAFFDFFGGGEFGGEGGGGGGGGGGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_900/1\t83\tchr18\t27108044\t60\t91M\t=\t27107673\t-462\tACAAAGTTCACAGATATAAAATTTTGAAAAAATTTTGAGCCAGTAAAATTTCTTCCAGAAGGCAAAATTCCATGAGGTTTTGTTTTTAATA\tEGGGFFGEFEGGGGGGDGGGEEGGGDGGGEGGGGGFGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_900/2\t163\tchr18\t27107673\t60\t91M\t=\t27108044\t462\tAAGGCAAATTAGTATTACATTTGCACCATATATTCCAAGAAATTCAAAATTGCTCTAAATTATAAACACAGTATTTCTTTCAAATAAGTGG\tGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGFGGGEGFGFGGGGDGGGFFFAFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_902/1\t83\tchr18\t1844801\t29\t91M\t=\t1844371\t-521\tAAAGGGGAGTTTATACACACACATACATATCCACATATACATATACACGTATATATATAAAGGGGAGTATATATATATACATATATATATA\t<>C=0>=92;:4=?=B041593>00:510:(+)89:=389@3@/589>?+B+?;==:7=1269>?:5C?CC?B=:>==?ACD=?D:CAC5C\tXT:A:U\tNM:i:2\tSM:i:0\tAM:i:0\tX0:i:1\tX1:i:1\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:30A17A42\tXA:Z:chr18,-1844801,91M,2;\nfoo_902/2\t163\tchr18\t1844371\t37\t91M\t=\t1844801\t521\tATCCAAATAGGTGCTGGATATTATGCTGGAGAAGGCAACTGGGCATGCTAATGTATGACTTTTACTAGTCTGAGCCACGGACAGTCAGCAC\t--;9-:;7996>6=>>;=66682,7>@;;6B==:.;9;:?:-555@@>@=;5?DA=>?#################################\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:84G6\nfoo_904/1\t83\tchr1\t147465983\t60\t91M\t=\t147465581\t-493\tGAGAGGCCTCAGTTAAGTCTATAGCTGTTGTCACCTGTTGAATCATCTCTAGTCTTCAGAATACCATGAAATTAGTTTTCTCAGAAGTAAA\t;@@CCB??:?=?B?DBB:56*DA?EEBBCCCE<EEE?@?@CBBBAADFFAFEFEEFEFDFFFBFFDFFFFFFFFFEEFDDEFEFFFFEFFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_904/2\t163\tchr1\t147465581\t60\t91M\t=\t147465983\t493\tACAAGTCCAATTGAAAGGGCATTACAGCCAGACACGGTGACTCACACCTGTAATCCCAGAACTTCAGGAGGCCAAGGTGGGTGGATTGCTT\tGEGGDGGDFAGDGGGFEBGAGFGGEF=FFF?EA=EGGAFEBBEDDDEEEAED5BED??E:>9@@>DEEEB=B?=5*5)371)640?A??##\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_906/1\t99\tchr2\t107216341\t60\t91M\t=\t107216698\t448\tAGAAAAAAAATCACATGAGAATTAAAGAATATCTTAAGACAAGTGAAAACAAAAACACAGCAAATCAAAACTTACAGAAGCAAGCAAAAGC\tGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGFGFGGGGGGGDGGGGFGGGGGG:FEEBEEEEEFFGGGEGD?AFFE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_906/2\t147\tchr2\t107216698\t60\t91M\t=\t107216341\t-448\tTAAAAGAGAAGACTCAAGTAAGTACAATAAAAAATGAAAGAGAGGACATTATAACTGATACCACATAAATAGATAAGACTATAAGAGAATA\t#FFFCFGBGGGDCEGGFGGGGGGGGGGGFGGGGGGGGDGGGGGGGGGGFFGEGFGGFFFEFGBFGGGGGGGGGGGGGGGGGGGGGGGFGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_908/1\t83\tchr2\t6850828\t60\t91M\t=\t6850442\t-477\tTGGATCACATATTTTTAAAGAATGGACTCCTTTTTCATTTTCCACATGGCAAGTATGTTATCCTCAGTAACAGACTAAGGTCTTTGCATGA\t#######A37A@C==C??>>>=,>?;8:667+>:@>?>A@=5DBDCAB?:EDDDADCBD=DEEBED?AAAC5EFCFEEEEADEADDECEEE\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:4G26A59\nfoo_908/2\t163\tchr2\t6850442\t60\t91M\t=\t6850828\t477\tTCCCCATAAGATTTGTCCGAATAATTGTGGGAGATGAAAACAAGGATATACAGGGGAGGCCGCCATCAGTCGTCAGTGCTGTGCCAAGTCT\tDFEDAFE:?DFE?DEE;DDAEEDDEEB=BD@6@>@EE=BEDB>>?6C??##########################################\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:53T7T7C16T4\nfoo_910/1\t83\tchr9\t35126274\t60\t91M\t=\t35125895\t-470\tTTAGGGCTATGACATGAACCCTAAAATTCCTGTTCCCTGAAGGTGGAAACCAAAAGAAAGTATCGCCACGTGGTTAAAAGTTCAAGCTCCC\tFCDEEACCEBB>@@@?C8CCCBEEEFE@BBECF?EEEGEFEEGEEEGGDGGEGGGGFGGGGCEGGGGFGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_910/2\t163\tchr9\t35125895\t60\t91M\t=\t35126274\t470\tCGGGTGTAATGGCACAGGCCTGTAACCCCAGCTACTCAGGGTACTGAGGCAGTAGAATCACTTGAACCCAGGAGGCAGAGGATGCAGGGAG\tFFFFDGGDGGGFEAGGFGFGGGGGFCGGGEDFEFGFG=GGFAFFEFDFFFGG?FGFC?F=BEEDEEFDG:BBA@B=ABAAA*<B#######\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:87T3\nfoo_912/2\t129\tchr5\t74740998\t37\t91M\t*\t0\t0\tAAGAAAAAATTCTTCTGTTTTCATAAATATAACTACAACTATTTTATTCTTCTGTTCCACCTCAGTGGTACTCCTTGATACCCCATTTTTA\tGGGGEGGGGGFBFDGFGBGGGGEEFAGEGGGGGGEGGGGGFGGEFAEEEFFFDGGGFGGGGGBFD?CCABFFEEDFGEEEEEDEEED-FDF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_913/1\t99\tchr1\t41165207\t60\t91M\t=\t41165587\t471\tTTAACCCTGGCGATGTAATCATCAATCATTCCTATCCTTGACCTTCCCCTGCACTCACTCAATGAGCAGCTCCCAAAAGTTGGGTCTCCAC\tGGGGGGGGGFGGFGGEGGGGGFGGGGGGGGGGGGGGGGDGEGFGFGGDGGFGEGFGGEGEGBGFEEGGDEEEEGEEGGFDFF=FCDFBFEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_913/2\t147\tchr1\t41165587\t60\t91M\t=\t41165207\t-471\tTATTTAAAAATTAGCCAGGCATGGTGGCATGTGCCTGTAGTCCCAGCTACTCAGAAGGCTGAGGTGGGAGGACTGCTTGAGGCCGGGTGGT\t3CC@9CEE:ECCBEFE@A@A@:BCBEAE:=FGFGBGGFGFGEEEEEF5DEFEEE:EF?EBGEFFDGFF=EFEEE?EFGGFGGGGGGGDGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_915/1\t99\tchr1\t77221403\t60\t91M\t=\t77221775\t463\tCATAATGACAGGATCAAATTCACACATAACAATAATAACTTTAAATGTAAATGGGCTAAATACCCCAATTAAAAGACACAGACTGGCAAAT\tFFDFDFFEDFEFBFFFFDFFDB?EDEFD?FFFEEFBFFFEFFDDEDFFFFFF==FFBF?DFFF=BFFEFE=ECD?DDADCDDD???AAACA\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_915/2\t147\tchr1\t77221775\t60\t91M\t=\t77221403\t-463\tAGGTTCATAAAACAAGTCCTTAGAGATCTACAAAGAGACTTAGACTCCCACACAATAATAATGGGAGATTTTAACACCCCACTGTCAATAT\tB?D5?@A>8C;A=?5CEGGBGEDGEBFFF?DDAEDED=EEE@C;@>C@C6@CDEA=FDFDFGGEGGFDFEDEEDEDD=DDACCCCCBD5BD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_917/1\t99\tchr5\t32777132\t60\t91M\t=\t32777504\t463\tTTAGCCAGGCATGGTGGCATGCACTTGTAGTCCCAGCTACTTGAGAAGCTGAGGTAGGAGGATCACCTGAACCCAGGGAGCTCAAGGCTGC\tGGGGGGFGGGGGGGFGGFGGGEFGGGGFGGGGGGFGGGGGFGGFGAGGGGFFFFDEDEBEF?=FFBEE:?FFFFB=EEBEBCE5ECE5CE#\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_917/2\t147\tchr5\t32777504\t60\t91M\t=\t32777132\t-463\tTTAAAACTGACTTTATAAATAAATAAAAATTAGAATTTTAAGTAGAAGTTAGAGCTAACTATAATAACTTATTTTAGTTCTGAAGATGTGT\tGFFAF=EFFGGGGFGGGGGFGEFGGGGGFAGGGGGFGGGGGGDGGGGGGGGGGFGGGFGGFEGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_919/1\t83\tchr6\t105189052\t60\t91M\t=\t105188677\t-466\tTTCAGAAAAGTAATCAAGGAGAGACGCAACACAATGACGAACTGGGTTTAGATTTGCGTGGCTAAGATTCCTAATTTTAAATAATTCCATT\tAEEAEGGAGFBFGGEGDGAFEFGEGFGFDGDFFFFF=FFFAGGGFGGGFGGGGGFGFGGGGGGGGGGGGGGGGGFGGFGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_919/2\t163\tchr6\t105188677\t60\t91M\t=\t105189052\t466\tTTCACTGTGTCATCCAGCTGGAGTGCAATGGCACAATCACAGCTCACTGCAGCCTCGAACTCCCAGGCTCAAGTGATCCTCCCACTTCAGC\tGGGGGGGGGGGDGGGGGGDEFFFDFFFFFFGGGGGFGGGGGGGGGGFFGGGGGGGGGFGGGGGGFFGGGGE:FEFF:FEFEEED=BAEECG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_921/1\t99\tchr7\t66655187\t60\t91M\t=\t66655576\t480\tATGTGTGATTTTCATGATTACTTAGAAAGAGGTGGATTTATCCAAAGAGAGCGAATGGACCCGTTACAGTCAATATTTCTTGCTTCCTTTA\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFEFFFFFEFGGGGGGGGGGGGGGGGGGFFGGEAGBCCECC:ECCFFFEBEAEFEDGGGGC\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:22G68\nfoo_921/2\t147\tchr7\t66655576\t60\t91M\t=\t66655187\t-480\tGTGCAAAAGTAACTGAAGTTTTTTTGCTGTTACTTTTAATGGCAAAAACAGCAATTACTTTTGCACCAATTTAATAAAAGGATCCAAGATC\tF?EGGGEDFFFD?:FFD>GGGEGGEGGDGFFEGGFGGGGGGFEEDFGGGFGGGGGGEFFFFGFFDGGGGGFGGGGGGFGGGFGGGGGGGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_923/1\t99\tchr11\t51224961\t60\t91M\t=\t51225337\t467\tTTGCATTTCTGACATTGGCCTCAAACAGCTCCCAATGTCCATTCTCAGAATGGACAAAAACAGTGTTTCCAAACTGTTGAATCAAAAGAAA\tGFEGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGEDGGGGGGGGGGGGEGGGGGGGGEGGEEEEFCAFEEDECEEGEGGGGFGFGFEGEE=\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_923/2\t147\tchr11\t51225337\t60\t91M\t=\t51224961\t-467\tAATGTCCATTCACAGAATGGAAAAAAACATTGTTTCCAAGCTGCTGAATCAAAAGTAAGGTTTAACTCTGTGAGATGAATGCACACATCAC\tEEAEAEEEBEEFEEGGGGEEGGFGGF=FFFFGGGEG?GGGGGFGGGGGAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_925/1\t99\tchr8\t24087137\t60\t91M\t=\t24087502\t456\tTGAAGTCCTGGGGGAGATGTTATGAGGAGAGGAACCTGGGCAGGAGGATTTGTGCTCTCTAGCACAGTGTCACCAGGACTGGGACTGAGAT\tGGGGFGFGGGFGFGGE?FFFGEGGGFFEFFGFGGGGEGGGDDGFEFFAFFGFEGGGFGEAGGGFGFGEDDCCBBBEEDEBGFFFFGGGEDF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_925/2\t147\tchr8\t24087502\t60\t91M\t=\t24087137\t-456\tGTATTCCTATTAAAGGATCCTGTAAAATTAGTGCAGAAGGTAGGAATGACCCACATATGAAGCTCTGGTCTTGGTCCACAGATGCCTTGAT\tDFFABFD?EABDDD?:?FFDBGDDEGEGBDDEEBEDFDBBFFGGGGEEEDCGEGGGFDGGDGGEGGGGGEGGFGBGGGGGGFGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_927/2\t129\tchrX\t9092306\t37\t91M\t*\t0\t0\tCCACAATGTGATACCACCATACACCCACCAGATGACAAAAATTGAAGACCGACAATACCAAATGACGTTGAGGAATTGGGGCAACTCGAAT\tGGFGGFGGGGGGGGGFGFGGGGGGGGGGGGGGDGGGGGDGGGGEGFGGFGGGGG?GFGGGEFFGGGGFFGAFFBDFE5FFGGFGGD5E:B=\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_928/1\t81\tchr4\t90683007\t37\t91M\t*\t0\t0\tGGGGTTGGTCCCTAGGGATACAAGCTCTAGGCCTGCCCTCATAGGCCTAATCAACAAGTCTGTCCCAGTGGATTCAGGTTCCAGACTCAAC\t#CA=@C:3%(7,40<?):05623/(5:5C@@55C=@:18:714:78=>A:CACA:AAC5C?=>@@=?C?AA?FFFFEEEEEAEFEFFDFFA\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:0T90\nfoo_929/1\t99\tchr6\t136121859\t60\t91M\t=\t136122250\t482\tCTTTCTATATCTTTTTGCCAAATCTATATCTGTCAAAATCTGACTTTAGGCATATACCATTAACATAAATTGATATTTTTGGACAAAGCTT\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGFGGFGGGEBGBGGGGGDGGGEFGGGGEFEGGGFDGFDGFFDDEDDGAEGE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_929/2\t147\tchr6\t136122250\t60\t91M\t=\t136121859\t-482\tCTTTTCCATTAAAACAGCACCCTACTTCAGTGCCTAGTATATGCTAGGTGTTATTGGGGTTACCTTCAAATTTGCTCATTTGGTCTTCATA\tFDABAFF?FEFDAAD?EEED=FCDFAGGGGGDGGGGGGGAGFBFFDFFFDFAGGGGGGEEGCGGGGGDGGGDGGGFBGGFGGFGFGGBGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_931/1\t83\tchr2\t91684006\t9\t91M\t=\t91683626\t-471\tCAGTTTTGAAACTCTCTTTTTGTAAAATCTGCAAGAGGATATTTGGATAGCTTTGAGGATTTCGTTGGAAACGGGATTGTCTTCATATAAA\t#####################A5?AA5=-??=B??B>;?5?0231'96?*13254264.@?;3-649:;:8;;7:8DBAADDDB=DDDD=D\tXT:A:R\tNM:i:2\tSM:i:0\tAM:i:0\tX0:i:6\tX1:i:5\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:12A50A27\nfoo_931/2\t163\tchr2\t91683626\t9\t91M\t=\t91684006\t471\tTATCTACTCAGCTAACAGAGTTGAACCTTTCTTTTGAGAGAGCAGTTTTGAAACACTGTTTTTGTGGAATCTGCAAGTGGATATTTGTCTA\tCC>@@AB:>>CCAC=:896A?==9;C=CC?EEE>>44=*;;22@<C5AA5?@@>6DDDB:<A:4:2B7<B#####################\tXT:A:R\tNM:i:1\tSM:i:0\tAM:i:0\tX0:i:3\tX1:i:8\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:57C33\nfoo_933/1\t99\tchr14\t25507675\t60\t91M\t=\t25508053\t469\tCTGTTGCTTTTAGCCAAATGGATACTATTGCTAAAATTGACCAGAGCATATTATGTAGCGTGAGAGACTTATTGGTGTGGATCCAATACAA\tGGGGGGGGGGGGGGGGGGGGGGGGDGCGGFEFFDFFGGFFFGGGBGGFFDFFGEGEE?FGFBEGDBBEEEDFFAD?CBCE?CDEB@A@==E\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_933/2\t147\tchr14\t25508053\t60\t91M\t=\t25507675\t-469\tCAGTATGGACACTGCATCCATAGTATCAGGCAGATAGAAGAGGTGTATGAATAAAGATGCAGGTAAGCCAGACTTGTGGTAGTGAGAAATT\t######?;186?EEFDFBEGFEGGGFGGEGGGGGGGGGGGFGGGGEGFGGGDGEGGGGDGGGGGEGGGGGGGGGGGGGGFGGDGGGGGGDG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_935/1\t99\tchr2\t236820250\t60\t91M\t=\t236820621\t462\tATTTCTGCAGGACTTACTCATGCACAGCTATAATCTGATCCAACTGTGTTTCACTTTAACGAATTTCCTGATGCCTGATATAGCTTGTCGT\tGGGGGGGGGGFGGGGGGGGGFGGGFFGGGGGGGGGGGEGGGGGGGGEGFFGGFGGGGGCDGGGGFFGEGGEEEBEGFGEGGGGGEE=BDEC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_935/2\t147\tchr2\t236820621\t60\t91M\t=\t236820250\t-462\tTTTTTAAAGAACAAGAATAGCCACATGTTGATGATTACATTAAAACAAACAAACCTGAAAACCAACTCATTCATTCATTCATTCATTCAAT\tE>AA+AA?;A:9@>7?-EEDE:FFFDDFGGGEE?AE?GFFGCEFDDGFGBBGGGGDGGGGGFGGGGGFGGDGGGDGGGGFGGEGGGFGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_937/2\t129\tchr9\t34190929\t37\t91M\t*\t0\t0\tTGGAGTTTCGCTCTTGTTGCCCAGGCTGGAGTGTGATGGCGTGATCTTGGCTCACCGCAATCTCCGCCTCCCGGGTTCAAGTGATTCTCCT\tGFEGGGGGGGGGGGGGGDGGGGGGGG?GGGEBEBEEEDEEFDF=FEEDEEGAGFDFGDE?GGDBGGGGGFEEADE4:A?BFBD?EEBDC==\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_938/1\t83\tchr14\t98494687\t60\t91M\t=\t98494297\t-481\tAGCTTGGTAAGTCAGAACGGTCAAAGGCCAGGACTGTGTGCGTCCACACTTGACCTGCCCACACTGCCCCTGAACACCCTTCAGCCCCGGA\t:5@A==@B=CB:CABB66;)4;:>5>>>?->B@=EEC@->C?@DD?EBEEEEDGGFEEEDBE=FEFFFFAEFGGGEGGGFDGFGGEDGBGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_938/2\t163\tchr14\t98494297\t60\t91M\t=\t98494687\t481\tCACATTAGAGTCATTAAAAATCATTAGGATGGTTGAGTTCTGAGGTCAGATAAGCAGGATAAACACACTCCGGAGAAACTAGTTCTCCCTT\tFGGEGGGAGFGDGGGFFGDDGFEFGFGFFBBDBDDEEBEBFFDFBFDDFFAEEFFFCEAGEB=DBDDEBB?=CAC?CBBDEA:DE:BA?BF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_940/2\t145\tchr4\t25985810\t37\t91M\t*\t0\t0\tCTCAGGAGTTTGAGACTAGCCTGGGCAATATGGTGAAACCCTGTTTCTGCAAAAAAATACAAAAATTAGGTGGCCATGGTGGCTCGCACCT\tGD?FFFDDFBGGDGGGGG?GGFGGGGGGGGFFGGGGGGGGGGGGGGGEGEGGGGGGGCCGGGGGGGDGGGGGGGGGGGGGGGGGGGGEGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_941/1\t99\tchr9\t24609070\t60\t91M\t=\t24609419\t440\tGGTGATTCTCAGCGGAATGGACGGGGAGCTGCAAAGGGGATGGAGTGGGAAGATGACCTTCGCCTGGCGTTCACCAATCCTGCAGCCAATC\tFFBFFFEFEDFEEBFFBFFBDBDDEEB?EDABA-A:=BB:.?>.7*BB==AA=5:>>?9=?@BD5:70A,?=>?>1:B=BCA:@AD5:?D>\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:67A7C15\nfoo_941/2\t147\tchr9\t24609419\t60\t91M\t=\t24609070\t-440\tCTCTCTTCTGCCCAGTATTTTTTCCTGCCTCCTGTCTGTATCAGTGTAAACAATTTTTTTTTTAAGTTACAAATAATTTTTTATAAAAAAT\t1?AA?B?AD=D5FG:GFFEFFEADBEC>C;=CACA:E?EE:C?C:CA;A>:F@5EEGGGBGEFFFFDDBDDFD?EEGGGGECEBFBFEFBF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_943/1\t99\tchr1\t121185968\t60\t91M\t=\t121186347\t470\tAACTGACAGAGTTGAACTTTCATTTAGAGAGAGCAGATTTGAAACACTGTTTTTGTGGAATTTGCAAGTGGAGATTTCAAGCGCTTTGGGG\tGGGGGGGGGGGGGGGGGGGGGGGGGFGEGFGGGGFGGGGGGGGGGGGGGGGGGFGGGGEBEFFFFADGFGFFFDFFGGGFDGFFD=DDEEG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:58T32\nfoo_943/2\t147\tchr1\t121186347\t60\t91M\t=\t121185968\t-470\tTGAAACACTCTATTTGTGCAATTTGCAAGTGTAGATTTCAAGCGCTTTAAGGTCAACGGCAGAAAAGGAAATATCTTCGTTTCAAAACTAG\tEBEFEGCEE?E??EEGEEEGGBFDGGGGFFGDGGGFGGGEGGGGGGGGGGEFEFGGGGGFGGGGGGGGGGGGGGGGGGGGGGFGGGGGFGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:56T34\nfoo_945/1\t83\tchr4\t184591595\t37\t91M\t=\t184591221\t-465\tGCGGCCCAAGCCTCCCCAACGAGCACCGCCCCCTGCTCCACGGTGCCCAGTCCCATCAACAGCCCAAGGGCTGAGGAGTGCGAGCGCATGG\t###########C@CBB::119BAB->;CE=EEEDDE5EEDEGGGGFGGGGGGAGGGFEFFBEGGGFGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:7G32T50\nfoo_945/2\t163\tchr4\t184591221\t36\t91M\t=\t184591595\t465\tGCACTTGAGGAGCCCTTCAGCCCACCACTGCACTGTGGGAGCCCCTTTCTGGGCTGGCCAAGGCCAGAGCCCTCTCCCTCAGCTTGCAGGG\tGGGGFGGGDGGGGEGGGGGFFGEFGGDFGGEFFGFEFFFFB?FFFFDFFDFFGFD=DEEEFA::EC?=:ABD=DAEEE=EB5:@A4;.8<B\tXT:A:R\tNM:i:1\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:11\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:26G64\nfoo_947/1\t83\tchr4\t154941369\t60\t91M\t=\t154940992\t-468\tTATTAATTATTTTGTTTTTTTACAGCTTGATCCTCTTATGAAAAGGAAAGAATTTAAAGATGAACAATTACATTTTTGTTTAAACGGCAAT\tEEBEBA=GGGEC;CCFBCBBBF:DEFEFF=DDEB=EEEEEBF=FFFCEE-CGDGGGGFGGEGDG=GGFGEFGGEGFGGBGGGFGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_947/2\t163\tchr4\t154940992\t60\t91M\t=\t154941369\t468\tTGCAGCCTCAAACTTCTGGGCTCAGGGGATCTGCTTGCCTCAGCCTTCCGAAGTGCTGGGATTACAGGCATAAGCCATTGTGCCTGGCCCC\tGFDGGFFGGGEDGGFEGGFGGGGGFGFGFDGGGGGGEFGFGFEFGEDFFFEEAADFE?FDFGBGGFGEGGGEFGGGGGG?EFFGBEGGFDF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_949/1\t83\tchrX\t6957455\t60\t91M\t=\t6957115\t-431\tAAATAACGACATATGGAAAAGATACAATTTAAGTAATAGAGACTTCCCAGGCAAACCCCATGTAGGGGGAGGAATCGAACATATCAAGGGC\tEGGGGEDCDD:DD5ADE,EEEE-DE=EE:?EEEGEGEFDEEBBB-DDEFFFBDGGGDGGGGGFGFFGGBGGBFBGGDGGGGGGGGGGGGGD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_949/2\t163\tchrX\t6957115\t60\t91M\t=\t6957455\t431\tCAGGACAGCTTTGAATGTTGCCCAACACAAATTCGTACTTTTTTAAAACATGAGATTTAAACACGGACCTGTTTTTTCACTCATCAGCTAT\tFGGE?F=FFFE5DEEFFDDFEFFFDGFGGFGGGFGFGGEGGGFGFEGBGGGD?GGDGGFDGGGGG:EEEEDBBDEEEEEFDFE5D=DEEAG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_951/1\t99\tchr9\t79693596\t60\t91M\t=\t79693973\t468\tGCCATTTGCTCCCTCAGAGTAGGGGAGAGGACACGGAAGAGATTCCATTTTCAGCATGACATTTAGAATCAAATTAGAAAATCAAAAAATT\tGGGGGGGGGGGGGGGGGFGDFFFFFEGGGGDGGFGGFGGEGEDGGGEGGGFEGDEEAAEEEEGGEEEDGEEDDCEDEEAAFEFEFGEEGEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_951/2\t147\tchr9\t79693973\t60\t91M\t=\t79693596\t-468\tATCCCATCTTTTAAAAAAAAAATTAGCCGGGTGTGGTGGTGCGTGCCTGTAGTCCCAGCTACTCATCAGGAGGTTGAGGTGGGAGGACTGC\t@:BC@3BEEAF?GGAGFDDFAE5EEBDDFGDDFGAGDGGDFFEGDGGGGGGGDGGGFFEGBGGGGFGGGGGGGGGGFGGGGGGFGGFFGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_953/1\t83\tchr13\t49630598\t60\t91M\t=\t49630225\t-464\tAACAAAAGTTACTCATGCAGTTAAAGCTTTTTTTTGTACCTAGGTTTAGAAACTACAAGTCCACAGAGAATAGGGCTCGTGAAATAAATGA\t@BCBABAE==BCDDCB?BEEDFBEEFC?=C=GGFADFBFFDFEEBFBBE=EDFFFDEDDEED:DEDFFF?FEDGFGGGFDGEGFGGFGGFG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_953/2\t163\tchr13\t49630225\t60\t91M\t=\t49630598\t464\tTTGCCTCTTAGAAATACATTATAATGTTATCCTAATATATGTACTAGGACAGACAATGCTACGTTGAACTAACTTGAATAGCATATTTTAT\tEEECAEEBEEA=DDDDFGGEGFDGGGGGDGGGGFAEEFCFEAEEE?EEBEBE=DEGGDGGBGDDBF?DEAFGEEG:@==??A:5@A@A<76\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_955/1\t99\tchr6\t120758198\t60\t91M\t=\t120758584\t477\tGCCTGTCACCATTTCATATACATTTTGATTAGTTTTTTTCTGAATATAGAAAGTTAATATGAATAATCTATTAAATAGTCTTCGAGAACTT\tGGGGGGGGFGGGFGGGGFGGGGGGGGEFGDGGGGGGGGGGFGEFGDFGGCFGGCGGFGGGGGDGGGFGGEGGFDGGGGBGGGGEE?:?CCE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_955/2\t147\tchr6\t120758584\t60\t91M\t=\t120758198\t-477\tAATAGAGTTGTGAGTGCTTGAGAAAGACTGGTTACATATTTGATCAAAATGATAATATAATAAACCATTCAAAATAAATAGAGAAGATAAC\tGFFFF:GADEGG?GEFFGDGGGFFEEGGGGGGGDGGGG?GFGGGGGGGGFEGGFGGGBGEGGGEGDGFGA>GGGGGEEGDGFGGGGGEGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_957/1\t83\tchr3\t167410024\t60\t91M\t=\t167409640\t-475\tAATAACAACCATTGCTCAAATTTGCTGGAAATCAAACTTACAGCCCACAGATACCCACAGAAGAATAGCAGGATCAATGTGTACACTAAAT\tDCCC5CCEEDED=BDA?BEEEF:FDFEBEEC:AD=A@@@>?DDD5ADDD=DDADADFEDFFEDEE?DFBFFDFFDFEEDDEEEEBEDDFDE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_957/2\t163\tchr3\t167409640\t60\t91M\t=\t167410024\t475\tTTTGCTAAAAAGAAATATGAAAATTATGTAGAGGAGTAACATCAGAAAAAGATGATGGAGTAGGCAGCTACAAACTCCTTTCCCACATGAA\tGGGGDFGGGDDAEEDGGGG=FDFFFGGDE?DEEECEGGGEGGG?GFDFACEECDF=DDADAD=DCEDE=EBAADABCBDEA5AA:77CA?F\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_959/1\t83\tchr1\t206918225\t60\t91M\t=\t206917853\t-463\tTAGAAACATATACAGAAGAACAAAACTATTAGGATTTCACACCAAAACCCTTTAAATAGGAATTTATTAAACATTAATTGTTCCCTCTTTC\tEFEFDEFGFCFEEGGFC??CEABA@?EDAEGFEEDEEFDGCCBC=CCED:EGGGGGEGFGGGGGFGGGGGFGGGFGGGGGGGGGGGGGDGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_959/2\t163\tchr1\t206917853\t60\t91M\t=\t206918225\t463\tTCTCAAAAGATTAAGAACAGATTATATTCTTAATCTTTTGATTACAAAGAGACAAAGAGCATTCAAACAGCTGAACTCTTATTGTATTGCC\tGGGGGGGGGGFEGGEGGGGGGGGGGGDGGGGEGGGGGFFFEGGGGGGGGGGGEGGGGGGGFGFGGGEGBDFAFFFGGGGGGFEDDEBE=A=\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_961/1\t83\tchr3\t36806989\t60\t91M\t=\t36806613\t-467\tTCTCATGCCCTTACCTGCATCCCATCCCTTCCCCCAAGCTGGATACTCATCCTTCAGCTTATCACCCATAGGGTCTCCATCTAGTTGTCAC\t@EEEA=5B@?@=;C@>>A:5ABB::CEEE7EE5EEEEEEEDCCADDEEEEEDCDD=BEEEEBE:EEEEAEEAB:A:EEECECEEEEEEE?D\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_961/2\t163\tchr3\t36806613\t60\t91M\t=\t36806989\t467\tAGCTGTTGCCAGCTTTTCATCAGTCCTTTGAAAGGAGTTGTTGACACTGCTCAGGGCCACCTTCTCTACTCCAAGTTACTCTTCAACCCAA\tC>A:??AAAC?DABD:DCA?CCDDEE:ED:CCC??BBCC:B?=D:?AC?>:CA=@:,@@2;.:8:AA;>>/;8:4?C5:A0(;*=??=A5>\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_963/1\t83\tchr7\t142308461\t37\t91M\t=\t142308054\t-498\tAATGAGACACTTTAAAATATCCTAAGTGACAAAAGCAACAACTCCTTTTCTAATCTACTAAGCTATTATATGTATTTTAAGATAAACATAT\tDDDFBDDFCDEFEFEFBCCCDGDEBFEECAEEF@FFGEGGEGGGEGGGGGGGEGGGEGGGGGGGGGGGGGGGFGGGGEGGGGFGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_963/2\t163\tchr7\t142308054\t29\t91M\t=\t142308461\t498\tGTAATCCCAGCACTTTGGGAGGCTGAGGCGGGTGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCGTCTCT\tGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGEE2EE?A@@B?=CC=DBCD@ECCEBCBBCEB:BEB@A==??####################\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:47\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_965/1\t81\tchr2\t85059521\t37\t91M\tchr11\t133740820\t0\tGTGTTTTTATAGAGACAGGGTTTGTCCATGTTGGTCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCTGCCCACCTCAGCCTCCCAAAG\t7<CCBFFDFBBGBGEGEEEEEGEGBFEDEBDBEFEGEGEGGGGGGFGGF@GEGGGGEGGGGGGGGGFGFFDGGGEGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_965/2\t161\tchr11\t133740820\t37\t91M\tchr2\t85059521\t0\tTGAAGGGCTTTATGTGCGCAGTCCAGAGATGTCTGTTTTCCTGAAGGAAGGGGACTTGGACACCTCCCTGTTTAAATCAAGGGCTCTCTGA\tGGGGGFGGGGGGGGDGGGFGGGGFGGBFEFGFFFGGFGGGGGGGGGGFGGGGGFDDEFFDGGGGFFGGGDEFBF:FFFFFGGEG:EFF??C\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_967/1\t83\tchr16\t30410610\t60\t91M\t=\t30410251\t-450\tGTCCAGCCTCTTCCCTTTTTGTAGATGAGAAAACTGAGGCACAGAGGCATGAAGCAATTTACTTAACTACTACACAGCTAGGAAGCAGTAG\t:AFBAFGGDGGEGGGGCC=:EFGDGFFFFAFBDFDFDFEGEGGGGFGGGGGGGGDGGGGGGDFFFFGFEGFGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_967/2\t163\tchr16\t30410251\t60\t91M\t=\t30410610\t450\tAATCTCTAGTTAGACACCCACCCCACTATACACACACATAAATATATACATATATGTGGATAGATAAATAAACAGAATCCTGAGTGTCTAC\tFGDGFGGGGGFGGGGGFFGGEGGG>GGGFGEEEEECEEEEFFFFFGGGGGGGDGGFFGGGBD-BBCEDCDFC==?@C,AA??>5;B:@@@?\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_969/1\t99\tchr8\t75728528\t60\t91M\t=\t75728901\t464\tTAAATACAATGGCAAGTATAAAATAAAGTATTTTTTTGATTCCCCATTCAAGCAGTAATTAAATTACATGTACCCATAATGTACATTCCCA\tGGGGGGGGGGGGGGGGFGGFGGGGDGGGDGGGGGGGGGFGGGGGGFGGGGGGGFGGGFGGGGFGGGGGFGEGGEGGGGGGGFFGGGGGEGD\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_969/2\t147\tchr8\t75728901\t60\t91M\t=\t75728528\t-464\tGAGAGAATCGTATTTGGGGTCTACAGAAGAGCAGTAGAGATATGCTTTTTTCTTCATTTTCAGTTAGAGGAGTTTCAAGGAAAAGGTTTGG\tGEGGDGGFGGGGGDFGGFGFGGFGGGFFGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_971/1\t83\tchr19\t42476835\t29\t91M\t=\t42476452\t-474\tTTTTCTTCCTCCTGGGGTTTCTTCCTGCTGGTGGACCCTCCGCGAATCCCGGCCTCCGGAGACCGTCCTGGTAACTGCCCTGGCCAGGACT\t#######?<;>;>CAA,9208?,@?@>>&>>:CB6BC?:EBDFDEE?GGGFBGFBFFDFEEBEEE?GGBGGGGFGGDGGEGGGFGGGGFGG\tXT:A:U\tNM:i:1\tSM:i:0\tAM:i:0\tX0:i:1\tX1:i:3\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:74A16\tXA:Z:chr19,-42471784,91M,1;chr19,-42476835,91M,1;chr19,-42241437,91M,1;\nfoo_971/2\t163\tchr19\t42476452\t37\t25M2D66M\t=\t42476835\t474\tCGAAAGAGCGAGAAGGGAGAGAGACAGAGAGAGAGAGAGAGAGAGAGACGTGAGAGAGAGACAGAAGTCGGCACACAGACACGCACTGCGC\tGEBGGGDG?GGGGGFEDCAEBEDEEGGEF=EAEADDE=FCEEECEAA5CCADBDDECEAA?5A??>9.6;6>>@??=?B=(3(.=B?C-B@\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:2\tMD:Z:25^AG66\nfoo_973/1\t65\tchr9\t81954188\t25\t91M\t*\t0\t0\tTCCTTTGGGTATATATACAGTAAAGGGATTGCTGGGTTGAATGGTAGTTCTGCTTTTAGTTTGTTGAGATCGTTCTCTTTTTCGTTATTTG\t=BEEEEEEC:DDDDDADDDD=DDDD?A?CC=DD:=CC=C:6>@@@CA?A=@;-.6:>,=*<<99:6,>.;)8*67>=BB?A=(AAB-B>>C\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:52A17A5A5A8\nfoo_974/1\t83\tchr10\t42475636\t60\t91M\t=\t42475248\t-479\tATAAGCAACCTACATTTTCCTCCCTGTATCTCATGTTCTGTGTCAACAGATGTGTGCTCTGGAGGACAGAGCAGTTCTCCTATGTTTCTTT\tGFGAGGG@GGGEGGGFECFFEG?GGGGEGDGGGGFGDGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_974/2\t163\tchr10\t42475248\t60\t91M\t=\t42475636\t479\tCCACTCAGGGCATTCTGTTCTTCTTACCAAGCTCTGCCATCATTAAACTATTTTAGCAAAGCCTGACTTACTGGGGTTTTGTCTGAGTTTA\tGG?GGFAFFDGGGGGEGGGFGGFGGGGGGDGGGGGGGFGGGGGGGGGGGGGGGGGDGGGFGFGGGDGDGGGGFFFGEGGGGFGGDFF=FFF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_976/1\t99\tchr12\t95784026\t60\t91M\t=\t95784378\t443\tATTTTGATGTTAAAAAATATATTAAGAAAGAAGTTTCTCTTTACTAACACTCATTTTCTATTTCTACTTTCCACTTCAGTAGAGCCTTATG\tGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGFGEGGGGFGGGGFGGGGGGGGGGGFGGEEGGGGGGGGGGDGGCGGGEDGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_976/2\t147\tchr12\t95784378\t60\t91M\t=\t95784026\t-443\tATATTTAAGATAACTTTCATACATTTTGTATTTAACAGCTTAGAATGGAATAGGTCTAAAACCAGATTCTGCCCAAGTACAAACCACCCTG\tGDGGFGGEGEGGCGCGGEEGFGFGGGFGGGFGEDGGGGEGGGGGFFDGGGGGFEFGFGEGEGGGGGGGGGGGGDFGFFFEFFEFFCEEEEE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_978/1\t99\tchr4\t26714158\t60\t91M\t=\t26714521\t454\tAAGAAATTTTTTCTATCCTATCTATTATTTTACCCTAGTGCCTATAAGAGCACTTGGCACATCGAAGGCACCCAGTAAATATTTCTCAATG\tEEEDEFEFFFEFFC>FFFDFFDDEFEFDEF:AEEEEACACE?EDDFFFEDDDEFAFDFDBDDDDD5?@:=;347=?A5>?8?5?>@D?:?@\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_978/2\t147\tchr4\t26714521\t60\t91M\t=\t26714158\t-454\tAGCACTTTGGGAGGCCGAGGCAGGCAGATCACCTGAGGTCTGGAATTTGAGACTAGGCTGGCTAACATGGTGAAACCCTGTTTCTACTAAA\t@:>6?>-EDDEFE:FFE=AEADG:GGDDDDB?:DEEEE5EEDDDDAF5DFDEEEED==BEEGGDGGCAAA>DDDADD=DC=EBEFBEEEDE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_980/1\t83\tchr6\t70510288\t60\t91M\t=\t70509922\t-457\tAGGATTTTATAATCTAAAAATCAGACCTAAAATATTTTCACATAAACTAAGAAAATATCCATAAGTAGAACTCAAAATATGGCAAAGGCAC\tEGFGGFGFGGGDGGCGFEFF=GEG@GGFGGGGGFGGGEGFGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_980/2\t163\tchr6\t70509922\t60\t91M\t=\t70510288\t457\tTATCTAAAAATAGCATTCATCTCATAGGGTTTAATACGCAATATTCTAAAACTCAGTATGTCAGTTTTAGGTAATCAAAGATATTGTACAG\tGGGGFGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGFGGGGGGGGGGFFGGGGFGGGGGGGEGGGDGGEFFGGFGGGFE:DGDEEEAF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_982/1\t99\tchr14\t96016952\t60\t91M\t=\t96017322\t461\tGGATGGGAGATGGGGAGGGCGTCTCACAGCCAGGGATGCAGCCCGGCCTCTGCAATGTCTGGAATGGGGCCCTGGAAAGATGCTGGGAATC\tGFFGGGGGGGGGGGEGGGGGGEGGGGGGGGGGGGGG=FGGEFGFDDDFAEEGFEEED=E?CB-CCDDDEADDCCDCEBA==@?>=A?5>5=\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_982/2\t147\tchr14\t96017322\t60\t91M\t=\t96016952\t-461\tTATTGCTAGGGGGAATCATTCCCAAGTACCTTCTCAAAGGTGTCCAGTTAGTGATATCAAGGTTGTAACTACTCAGAGTAATCAGGGAGAA\tC=F=EDFEFEDEADEFEGDFGGGDEFGEGGGEGGGGGGGGGGGGGGGGFFEGFGGGGGEFGGGGGFGEGGEFGGGGFGGGFFGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_984/1\t65\tchr4\t12945395\t25\t91M\tchr2\t58343178\t0\tGCTTCCAGTTTTTGCCCATTCAGTATGATATTGGCTGTGGGGCTGTCATAGATAGCTCTTAAAATTTTTAGATACATCCCATCCATACCTA\tDF?FDDBFFBFFEF=EEEEBEEDEEBD?D=CA5??>A<?####################################################\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:41T19T0T5G22\nfoo_984/2\t129\tchr2\t58343178\t37\t91M\tchr4\t12945395\t0\tAGAAAGAATGTAAAGAATTTAGGAGATAAGCCTAACTCATTGTATGAGGCCAGCATCATCCTGATACCAAAGCCGGGCAGAGACACAGCCG\t?DF=GE?DEEEEEE:BDDDDFD5FAAEEEEAFFFAEFEDEF?FFFEDD?DDA=DDDFDD=>.@C;EEE:BA>5A=DBBDD=A5DA;+=6A#\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:25\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:90A0\nfoo_986/1\t83\tchr2\t114554725\t60\t91M\t=\t114554339\t-477\tTATATCACAGATAGATCTAATTTTATAGTGGATCATAATATATTCTGTAACTGATCTTTATCACTTACGCAGCAAATATGTCCTAGATATT\tEGGGGFEGFGEGGGEGFEEEGGDDGEGGGGEFGGGGGFGGGGGEGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:55T35\nfoo_986/2\t163\tchr2\t114554339\t60\t91M\t=\t114554725\t477\tCGCTCATGAACAACTAACTTATTTCTCACAGTTCTGGAGGTTGCTATGTCCAAGATGAAGGCACCAGCAGATTTGGCATCTGGTGAGGGCC\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGEGGGGEGGGGGGGGGGFGGGGGGGGGGGGG=EDFFFFGGEGFEEEBDEEAC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_988/1\t99\tchr7\t130338986\t60\t91M\t=\t130339344\t449\tTTCCTATTTAAACAGTGTAAGAATTGGGGGTGGCATTTGAAGGTGGGAGAAGAAGGGCAAACTTGTTAGTTTACTAGTGTGAGCGGTAGAT\tF=FFFFBFEFFFFDFDEDEEFFEFFEEEEEBB@@DDBDDDFFFBFDAA?ADECBC:C=EEBEDE:;>A=>FFFEE5A@CCFF:EDEBEEBE\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_988/2\t147\tchr7\t130339344\t60\t91M\t=\t130338986\t-449\tGCCCAGGCTGGAGTACAAAACATTGCACATATTTTTTTCAATGGTTTACTCATACTTGATGAGAGAGCTTCTATTATATTTAATTTTTTGT\tFFBDFABBC5EEFFBFE?BGGFFGGDFFGGGEFFFDDFFGGFDDDBFFEFEFGGGGFBDGGFFGGFDFGGGGFFGGGFGGGD>FFGGGGEG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_990/1\t83\tchr3\t61494141\t60\t91M\t=\t61493746\t-486\tAATCGCACATATCATCACGGCCACATTCATTTACAGTGAGTGATTAAGTTTGTCCCACACTCAAGGGGAGGAGAATTAGTCTCCACCTTTT\tB==?>=???@?=C@:C:EECEFCFEBDFEBBDDDFGEGEGFBGDGGFGGBFGEFGGEGEGFFGFGGGGGGGGFGGGGGGEGGGGEGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_990/2\t163\tchr3\t61493746\t60\t91M\t=\t61494141\t486\tTGAAACTGGCTTTCTCCCAGAGTGACTGATCCAAACACAGTTAGGTGAAGGTGTCAATATGCTTTACAATACAGTCGGGGGCAGTGGCTCC\tGFGGGGGDGGGGGGGGGGFGFFEFFFFFFFGGGGAGGFGFEEEEE5DDDDA5AAAEEEEEGD=GGFGDGGFA?GBCCEC############\tXT:A:U\tNM:i:2\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:79T10A0\nfoo_992/1\t99\tchr8\t63749236\t60\t91M\t=\t63749604\t459\tTCTTAGCAAACACACACAAATGCCTGCAATCTGAACCTCCTCCACCTATCGCTCTGGCCACATCCAGACATATGGAGGAACTCAAAATTCC\tGGFGGGGGGGGGGGFGGGGFGDGFDFDFFFGGGCFGGGGEGGGEGGGD?GEFFFEDAEFEEFBDFFEGGGGBEEEFFD?EFFFAFBFDDDF\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_992/2\t147\tchr8\t63749604\t60\t91M\t=\t63749236\t-459\tATGAATGATTGCAGGTATCCTGCTTGGGGATGGGGGCGAGAGAATGGTATCACTGTTGTTAGAAACACTAAACGTAATAAATGGCAAGGAA\tE:CEB?FFAFFC5ECE?5CBEGGGCGGGFDEGEGGFGGDGFGFGFGEEDEDEGGGGGGFGDGGDFGD?FFDDGGGGGGGGGGGGGGGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_994/1\t99\tchr3\t13832430\t60\t91M\t=\t13832790\t451\tATGAAGAGGCCCACATTCCCATGTGGCAAAAATGCCCTTGTGTTGAGCAGCTTACTTTCTGAGACTGGGTTCCTGCCAAGACCCCAGGCAT\tGGGGGGGGGGGGGGGGGGGGGGGGGGBGGFGGBFFGGGFGGGDGGGGEGGGGDGGGGEGGGGGGGGFCFCGGBE=EGDGEE5EEEFFFFFB\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_994/2\t147\tchr3\t13832790\t60\t91M\t=\t13832430\t-451\tAGTCATTACTGGGAGGATCCTCACAACAGCCCTGTGAGGTAGGTGTTATTCTGATTTCACAGGTGGAGAGACTGGGGCTCAGAGCACAGCT\tFEBCB?FEGGGGGGGECCE?BGDFGBFFFDBGGGGEGGGFFGGFGFGGGGGEGGGGGFFAGGGGGGGGGGEGDGGGGGFGGFFFF?EEEEE\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:69A21\nfoo_996/1\t65\tchr3\t13832430\t25\t91M\t*\t0\t0\tATGAAGAGGCCCACATTCCCATGTGGCAAAAAAGCCCTTGGGTTGAGCAGCTTACTTTCTGAGACTGGGTTAGTGCCAAGACCCCAGGCAT\t?##########################################################################################\tXT:A:U\tNM:i:4\tSM:i:25\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tMD:Z:32T7T30C0C18\nfoo_997/1\t99\tchr15\t67761720\t60\t91M\t=\t67762092\t463\tATAAGACTTGAAAGTTGAAATTACTCTTTGAGCTAAGGGCTGAGATTGGATGTTGTGTTAGCAGGCATGAAAACAACATCAATTGTACATC\tGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGFGGFGGGGGGDEGGGGFGCGGDDFFBECEEFEEECBCDC=:@=??CEE=EDEBEEEC\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\nfoo_997/2\t147\tchr15\t67762092\t60\t91M\t=\t67761720\t-463\tCCCCTGATAAGAGAGTCAACCTGTCCTTTGCCATGGAAGTCAGGCATTGACTTTTCTTTTCTTTTCTTTTCTTTTTTTTTTTTTTAATTAT\t?ABA@CAA?:A?AAA?=@:A@=5?CAAE:=EEBBEC:FFFF-DDCB@@@:27:;B'>@AD8>AAA0;A=:1EGGGGGFCDGGGGFGGGGGG\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:46C44\nfoo_999/1\t81\tchr22\t31739572\t37\t91M\t*\t0\t0\tAATTTTGAAATATACAATGCATTATTTATAATGCATTATAGTGACTGTAAAGTCACTATTCTGTGCAAAAGATCACAAGGGCTTATCTCTC\tFEEEDDEGGFGEGEGGEAEDEEEEGEEGFGEEFBEGEFEGGGGEFEGFGGGGEGEGGGGGGGGGGGGGGGGGGGFGGGGGGGGFGEGGGGG\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:91\n"
  },
  {
    "path": "examples/pydoop_script/data/stop_words.txt",
    "content": "one\ntwo\nthree\n"
  },
  {
    "path": "examples/pydoop_script/data/transpose_input/matrix.txt",
    "content": "a00\ta01\ta02\na10\ta11\ta12\na20\ta21\ta22\na30\ta31\ta32\na40\ta41\ta42\n"
  },
  {
    "path": "examples/pydoop_script/run",
    "content": "#!/usr/bin/env bash\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\nfor s in base_histogram caseswitch grep grep_compiled lowercase transpose wc_combiner wordcount wordcount_sw; do\n    bash \"${this_dir}\"/run_script.sh ${s}\ndone\n"
  },
  {
    "path": "examples/pydoop_script/run_script.sh",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\n# Use NLineInputFormat to force multiple mappers with a single input file\nNL_INPUT_FORMAT=\"org.apache.hadoop.mapreduce.lib.input.NLineInputFormat\"\n\nnargs=1\nif [ $# -ne ${nargs} ]; then\n    die \"Usage: $0 prog\"\nfi\nprog=$1\n\nOPTS=(\n    \"-D\" \"mapreduce.job.name=${prog}\"\n    \"-D\" \"mapreduce.task.timeout=10000\"\n)\n[ -n \"${DEBUG:-}\" ] && OPTS+=( \"--log-level\" \"DEBUG\" )\ncase ${prog} in\n    base_histogram )\n\tDATA=\"${this_dir}/data/base_histogram_input\"\n\t;;\n    transpose )\n\tDATA=\"${this_dir}/data/transpose_input\"\n\tOPTS+=( \"--num-reducers\" \"4\" \"--input-format\" \"${NL_INPUT_FORMAT}\")\n\t;;\n    wordcount )\n\tDATA=\"${this_dir}/../input\"\n\tOPTS+=( \"--num-reducers\" \"2\" )\n\t;;\n    wordcount_sw )\n\tDATA=\"${this_dir}/../input\"\n\tOPTS+=( \"--num-reducers\" \"2\" )\n\tOPTS+=( \"--upload-file-to-cache\" \"${this_dir}/data/stop_words.txt\" )\n\t;;\n    wc_combiner )\n\tDATA=\"${this_dir}/../input\"\n\tOPTS+=( \"--num-reducers\" \"2\" \"-c\" \"combiner\" )\n\t;;\n    *)\n\tDATA=\"${this_dir}/../input\"\n\tOPTS+=( \"--num-reducers\" \"0\" \"-t\" \"\" )\n\tcase ${prog} in\n\t    caseswitch )\n\t\tOPTS+=( \"-D\" \"caseswitch.case=upper\" )\n\t\t;;\n\t    grep | grep_compiled )\n\t\tOPTS+=( \"-D\" \"grep-expression=March\" )\n\t\t;;\n\tesac\nesac\n\nWD=$(mktemp -d)\nif [ ${prog} == grep_compiled ]; then\n    src=\"${this_dir}\"/scripts/grep.py\n    script=\"${WD}\"/grep.pyc\n    ${PYTHON} -c \"from py_compile import compile; compile('${src}', cfile='${script}')\"\nelse\n    script=\"${this_dir}\"/scripts/${prog}.py\nfi\n\nif [ \"$(hadoop_fs)\" != \"file\" ]; then\n    ensure_dfs_home\n    INPUT=\"input\"\n    OUTPUT=\"output\"\n    ${HDFS} dfs -rm -r -f \"${INPUT}\" \"${OUTPUT}\"\n    ${HDFS} dfs -put \"${DATA}\" \"${INPUT}\"\nelse\n    INPUT=\"${DATA}\"\n    OUTPUT=\"${WD}/output\"\nfi\n${PYDOOP} script \"${OPTS[@]}\" \"${script}\" \"${INPUT}\" \"${OUTPUT}\"\n${PYTHON} \"${this_dir}\"/check.py ${prog} \"${OUTPUT}\"\n\nrm -rf \"${WD}\"\n"
  },
  {
    "path": "examples/pydoop_script/scripts/base_histogram.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nCount the base frequency in sequencing data (in SAM format).\n\n  input: file in SAM format\n  output: tab-separated (base, count) pairs\n\"\"\"\n\n\ndef mapper(_, samrecord, writer):\n    seq = samrecord.split(\"\\t\", 10)[9]\n    for c in seq:\n        writer.emit(c, 1)\n    writer.count(\"bases\", len(seq))\n\n\ndef reducer(key, ivalue, writer):\n    writer.emit(key, sum(ivalue))\n"
  },
  {
    "path": "examples/pydoop_script/scripts/caseswitch.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nConvert text to upper or lower case.  By default, the program will\nswitch text to upper case.  Set the config property\n'caseswitch.case=lower' if you prefer to switch to lower case.\n\nSet --kv-separator to the empty string when running this example.\n\"\"\"\n\n\ndef mapper(_, record, writer, conf):\n    if conf['caseswitch.case'] == 'upper':\n        value = record.upper()\n    elif conf['caseswitch.case'] == 'lower':\n        value = record.lower()\n    else:\n        raise RuntimeError(\n            \"Invalid caseswitch value %s\" % conf['caseswitch.case']\n        )\n    writer.emit(\"\", value)\n"
  },
  {
    "path": "examples/pydoop_script/scripts/grep.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nEmit strings that contain the substring provided by the property\n'grep-expression' (script raises an exception if the property is\nmissing).  We use the fourth 'conf' argument to retrieve the custom\n'grep-expression' parameter.\n\nWhen running this example, set --kv-separator to the empty string and\n--num-reducers 0.\n\"\"\"\n\n\n# DOCS_INCLUDE_START\ndef mapper(_, text, writer, conf):\n    if text.find(conf['grep-expression']) >= 0:\n        writer.emit(\"\", text)\n"
  },
  {
    "path": "examples/pydoop_script/scripts/lowercase.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nConvert text to lowercase.\n\nSet --kv-separator to the empty string when running this example.\n\"\"\"\n\n\n# DOCS_INCLUDE_START\ndef mapper(_, record, writer):\n    writer.emit(\"\", record.lower())\n"
  },
  {
    "path": "examples/pydoop_script/scripts/transpose.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\\\nTranspose a tab-separated text matrix.\n\n  pydoop script transpose.py matrix.txt t_matrix\n  hadoop fs -get t_matrix{,}\n  sort -mn -k1,1 -o t_matrix.txt t_matrix/part-0000*\n\nt_matrix.txt contains an additional first column with row indexes --\nthis might not be a problem if it acts as input for another job.\n\nHow does it work? Suppose you want to transpose the following matrix:\n\n  a00 a01 a02\n  a10 a11 a12\n\nWe can set the intermediate key to the column index to have the\nframework automatically regroup elements by column. We also have to\nsend the row index: the reducer will need it to sort each output\nrow. Although we don't know the global row index for a given input\nrecord, we can use the input key, which is equal to the global byte\ncount (with the default TextInputFormat). The key/value stream emitted\nby the mappers looks like:\n\n  0, (0, 'a00')\n  1, (0, 'a01')\n  2, (0, 'a02')\n  0, (12, 'a10')\n  1, (12, 'a11')\n  2, (12, 'a12')\n\nAnd reducers will get:\n\n  0, [(0, 'a00'), (12, 'a10')]\n  2, [(0, 'a02'), (12, 'a12')]\n  1, [(12, 'a11'), (0, 'a01')]\n\nWriting out the key (i.e., the output row index) together with the\nvalue allows to put the output rows in the correct order.\n\"\"\"\n\n\ndef mapper(key, value, writer):\n    # work around pipes' current limitation with explicit input formats\n    try:\n        value = value.decode(\"ascii\")\n    except AttributeError:\n        pass\n    for i, a in enumerate(value.split()):\n        writer.emit(i, (key, a))\n\n\ndef reducer(key, ivalue, writer):\n    row = [_[1] for _ in sorted(ivalue)]\n    writer.emit(key, \"\\t\".join(row))\n"
  },
  {
    "path": "examples/pydoop_script/scripts/wc_combiner.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\\\nWord count with combiner.\n\"\"\"\n\n\ndef mapper(_, text, writer):\n    for word in text.split():\n        writer.emit(word, 1)\n\n\ndef reducer(word, icounts, writer):\n    writer.emit(word, sum(icounts))\n\n\n# DOCS_INCLUDE_START\ndef combiner(word, icounts, writer):\n    writer.count('combiner calls', 1)\n    reducer(word, icounts, writer)\n"
  },
  {
    "path": "examples/pydoop_script/scripts/wordcount.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\\\nPydoop script version of the word count example.\n\"\"\"\n\n\n# DOCS_INCLUDE_START\ndef mapper(_, text, writer):\n    for word in text.split():\n        writer.emit(word, 1)\n\n\ndef reducer(word, icounts, writer):\n    writer.emit(word, sum(icounts))\n"
  },
  {
    "path": "examples/pydoop_script/scripts/wordcount_sw.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\\\nWord count with stop words (i.e., words that should be ignored).\n\"\"\"\n\n# DOCS_INCLUDE_START\nSTOP_WORDS_FN = 'stop_words.txt'\n\ntry:\n    with open(STOP_WORDS_FN) as f:\n        STOP_WORDS = frozenset(l.strip() for l in f if not l.isspace())\nexcept OSError:\n    STOP_WORDS = frozenset()\n\n\ndef mapper(_, value, writer):\n    for word in value.split():\n        if word in STOP_WORDS:\n            writer.count(\"STOP_WORDS\", 1)\n        else:\n            writer.emit(word, 1)\n\n\ndef reducer(word, icounts, writer):\n    writer.emit(word, sum(icounts))\n"
  },
  {
    "path": "examples/pydoop_submit/check.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport sys\nimport os\nimport argparse\nfrom ast import literal_eval\n\nimport pydoop.test_support as pts\nimport pydoop.hadut as hadut\nimport pydoop.hdfs as hdfs\n\nTHIS_DIR = os.path.dirname(os.path.abspath(__file__))\nDEFAULT_INPUT_DIR = os.path.join(THIS_DIR, os.pardir, \"input\")\nCHECKS = [\n    \"nosep\",\n    \"wordcount_minimal\",\n    \"wordcount_full\",\n    \"map_only_java_writer\",\n    \"map_only_python_writer\",\n]\n\n\ndef check_wordcount_minimal(mr_out_dir):\n    output = hadut.collect_output(mr_out_dir)\n    local_wc = pts.LocalWordCount(DEFAULT_INPUT_DIR)\n    res = local_wc.check(output)\n    return res.startswith(\"OK\")  # FIXME: change local_wc to raise an exception\n\n\ncheck_wordcount_full = check_wordcount_minimal\n\n\ndef check_nosep(mr_out_dir):\n    output = []\n    for fn in hadut.iter_mr_out_files(mr_out_dir):\n        with hdfs.open(fn, \"rt\") as f:\n            for line in f:\n                output.append(line.rstrip())\n    exp_output = []\n    in_dir = os.path.join(THIS_DIR, \"data\")\n    for name in os.listdir(in_dir):\n        with open(os.path.join(in_dir, name)) as f:\n            exp_output.extend([\"\".join(_.rstrip().split()) for _ in f])\n    return sorted(exp_output) == sorted(output)\n\n\ndef check_map_only_python_writer(mr_out_dir):\n    output = []\n    for fn in hadut.iter_mr_out_files(mr_out_dir):\n        with hdfs.open(fn, \"rt\") as f:\n            for line in f:\n                try:\n                    t, rec = line.rstrip().split(\"\\t\", 1)\n                except ValueError:\n                    t, rec = line.rstrip(), \"\"\n                output.append((literal_eval(t), rec))\n    output = [_[1] for _ in sorted(output)]\n    exp_output = []\n    for name in sorted(os.listdir(DEFAULT_INPUT_DIR)):\n        with open(os.path.join(DEFAULT_INPUT_DIR, name)) as f:\n            exp_output.extend([_.rstrip().upper() for _ in f])\n    return exp_output == output\n\n\ncheck_map_only_java_writer = check_map_only_python_writer\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"name\", metavar=\"NAME\", choices=CHECKS,\n                        help=\"one of: %s\" % \"; \".join(CHECKS))\n    parser.add_argument(\"mr_out\", metavar=\"DIR\", help=\"MapReduce out dir\")\n    args = parser.parse_args(sys.argv[1:])\n    check = globals()[\"check_%s\" % args.name]\n    if check(args.mr_out):\n        print(\"OK.\")\n    else:\n        sys.exit(\"ERROR: output differs from the expected one\")\n"
  },
  {
    "path": "examples/pydoop_submit/data/cols_1.txt",
    "content": "foo1\tbar1\nfoo2\tbar2\n"
  },
  {
    "path": "examples/pydoop_submit/data/cols_2.txt",
    "content": "foo3\tbar3\nfoo4\tbar4\n"
  },
  {
    "path": "examples/pydoop_submit/mr/map_only_java_writer.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport os\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\n\n\nclass Mapper(api.Mapper):\n\n    def __init__(self, context):\n        self.name = os.path.basename(context.input_split.filename)\n\n    def map(self, context):\n        context.emit((self.name, context.key), context.value.upper())\n\n\ndef __main__():\n    pipes.run_task(pipes.Factory(Mapper))\n\n\nif __name__ == \"__main__\":\n    __main__()\n"
  },
  {
    "path": "examples/pydoop_submit/mr/map_only_python_writer.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport logging\n\nlogging.basicConfig()\nLOGGER = logging.getLogger(\"MapOnly\")\nLOGGER.setLevel(logging.INFO)\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\nimport pydoop.hdfs as hdfs\n\n\nclass Mapper(api.Mapper):\n\n    def __init__(self, context):\n        self.name = hdfs.path.basename(context.input_split.filename)\n\n    def map(self, context):\n        context.emit((self.name, context.key), context.value.upper())\n\n\nclass Writer(api.RecordWriter):\n\n    def __init__(self, context):\n        super(Writer, self).__init__(context)\n        self.logger = LOGGER.getChild(\"Writer\")\n        jc = context.job_conf\n        outfn = context.get_default_work_file()\n        self.logger.info(\"writing to %s\", outfn)\n        hdfs_user = jc.get(\"pydoop.hdfs.user\", None)\n        self.sep = jc.get(\"mapreduce.output.textoutputformat.separator\", \"\\t\")\n        self.file = hdfs.open(outfn, \"wt\", user=hdfs_user)\n\n    def close(self):\n        self.file.close()\n        self.file.fs.close()\n\n    def emit(self, key, value):\n        self.file.write(\"%r%s%s%s\" % (key, self.sep, value, \"\\n\"))\n\n\ndef __main__():\n    pipes.run_task(pipes.Factory(Mapper, record_writer_class=Writer))\n\n\nif __name__ == \"__main__\":\n    __main__()\n"
  },
  {
    "path": "examples/pydoop_submit/mr/nosep.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pp\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, ctx):\n        p = ctx.value.strip().split('\\t')\n        ctx.emit(p[0], p[1])\n\n\ndef __main__():\n    pp.run_task(pp.Factory(Mapper, None))\n\n\nif __name__ == \"__main__\":\n    __main__()\n"
  },
  {
    "path": "examples/pydoop_submit/mr/wordcount_full.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport logging\n\nlogging.basicConfig()\nLOGGER = logging.getLogger(\"WordCount\")\nLOGGER.setLevel(logging.INFO)\n\nfrom hashlib import md5\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\nimport pydoop.hdfs as hdfs\n\n\nclass Mapper(api.Mapper):\n\n    def __init__(self, context):\n        super(Mapper, self).__init__(context)\n        context.set_status(\"initializing mapper\")\n        self.input_words = context.get_counter(\"WORDCOUNT\", \"INPUT_WORDS\")\n\n    def map(self, context):\n        words = context.value.split()\n        for w in words:\n            context.emit(w, 1)\n        context.increment_counter(self.input_words, len(words))\n\n\nclass Reducer(api.Reducer):\n\n    def __init__(self, context):\n        super(Reducer, self).__init__(context)\n        context.set_status(\"initializing reducer\")\n        self.output_words = context.get_counter(\"WORDCOUNT\", \"OUTPUT_WORDS\")\n\n    def reduce(self, context):\n        context.emit(context.key, sum(context.values))\n        context.increment_counter(self.output_words, 1)\n\n\nclass Reader(api.RecordReader):\n    \"\"\"\n    Mimics Hadoop's default LineRecordReader (keys are byte offsets with\n    respect to the whole file; values are text lines).\n    \"\"\"\n    def __init__(self, context):\n        super(Reader, self).__init__(context)\n        self.logger = LOGGER.getChild(\"Reader\")\n        self.logger.debug('started')\n        self.isplit = context.input_split\n        for a in \"filename\", \"offset\", \"length\":\n            self.logger.debug(\n                \"isplit.{} = {}\".format(a, getattr(self.isplit, a))\n            )\n        self.file = hdfs.open(self.isplit.filename)\n        self.file.seek(self.isplit.offset)\n        self.bytes_read = 0\n        if self.isplit.offset > 0:\n            discarded = self.file.readline()\n            self.bytes_read += len(discarded)\n\n    def close(self):\n        self.logger.debug(\"closing open handles\")\n        self.file.close()\n        self.file.fs.close()\n\n    def next(self):\n        if self.bytes_read > self.isplit.length:\n            raise StopIteration\n        key = self.isplit.offset + self.bytes_read\n        record = self.file.readline()\n        if not record:  # end of file\n            raise StopIteration\n        self.bytes_read += len(record)\n        return (key, record.decode(\"utf-8\"))\n\n    def get_progress(self):\n        return min(float(self.bytes_read) / self.isplit.length, 1.0)\n\n\nclass Writer(api.RecordWriter):\n\n    def __init__(self, context):\n        super(Writer, self).__init__(context)\n        self.logger = LOGGER.getChild(\"Writer\")\n        jc = context.job_conf\n        outfn = context.get_default_work_file()\n        self.logger.info(\"writing to %s\", outfn)\n        hdfs_user = jc.get(\"pydoop.hdfs.user\", None)\n        self.file = hdfs.open(outfn, \"wt\", user=hdfs_user)\n        self.sep = jc.get(\"mapreduce.output.textoutputformat.separator\", \"\\t\")\n\n    def close(self):\n        self.logger.debug(\"closing open handles\")\n        self.file.close()\n        self.file.fs.close()\n\n    def emit(self, key, value):\n        self.file.write(key + self.sep + str(value) + \"\\n\")\n\n\nclass Partitioner(api.Partitioner):\n\n    def __init__(self, context):\n        super(Partitioner, self).__init__(context)\n        self.logger = LOGGER.getChild(\"Partitioner\")\n\n    def partition(self, key, n_reduces):\n        reducer_id = int(md5(key).hexdigest(), 16) % n_reduces\n        self.logger.debug(\"reducer_id: %r\" % reducer_id)\n        return reducer_id\n\n\n# DOCS_INCLUDE_START\nFACTORY = pipes.Factory(\n    Mapper,\n    reducer_class=Reducer,\n    record_reader_class=Reader,\n    record_writer_class=Writer,\n    partitioner_class=Partitioner,\n    combiner_class=Reducer\n)\n# DOCS_INCLUDE_END\n\n\ndef main():\n    pipes.run_task(FACTORY)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "examples/pydoop_submit/mr/wordcount_minimal.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\\\nIncludes only the bare minimum required to run wordcount. See\nwordcount-full.py for an example that uses counters, RecordReader, etc.\n\"\"\"\n# DOCS_INCLUDE_START\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, context):\n        for w in context.value.split():\n            context.emit(w, 1)\n\n\nclass Reducer(api.Reducer):\n\n    def reduce(self, context):\n        context.emit(context.key, sum(context.values))\n\n\nFACTORY = pipes.Factory(Mapper, reducer_class=Reducer)\n\n\ndef main():\n    pipes.run_task(FACTORY)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "examples/pydoop_submit/run",
    "content": "#!/usr/bin/env bash\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\nfor s in map_only_java_writer map_only_python_writer nosep wordcount_full wordcount_minimal; do\n    bash \"${this_dir}\"/run_submit.sh ${s}\ndone\n\nbash \"${this_dir}\"/run_submit.sh -p wordcount_minimal_pstats wordcount_minimal\n"
  },
  {
    "path": "examples/pydoop_submit/run_submit.sh",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\nOPTS=(\n    \"-D\" \"mapreduce.task.timeout=10000\"\n    \"-D\" \"mapreduce.job.maps=2\"\n    \"--python-program\" \"${PYTHON}\"\n)\n\nwhile getopts \":p:\" opt; do\n    case ${opt} in\n    p )\n\tOPTS+=( \"--pstats-dir\" \"${OPTARG}\" )\n\tOPTS+=( \"--pstats-fmt\" \"_test_%s_%05d_%s\" )\n\t;;\n    \\? )\n\techo \"Invalid option: -${OPTARG}\" >&2\n\texit 1\n\t;;\n    : )\n\techo \"Option -${OPTARG} requires an argument\" >&2\n\texit 1\n\t;;\n    esac\ndone\nshift $((${OPTIND} - 1))\n\nnargs=1\nif [ $# -ne ${nargs} ]; then\n    die \"Usage: $0 [-p PSTATS_DIR] MODULE_NAME\"\nfi\nMODULE=$1\n\nAPP_DIR=\"${this_dir}/mr\"\nJOBNAME=${MODULE}\nRESULTS=results.txt\n\nOPTS+=( \"--job-name\" \"${JOBNAME}\" )\ncase ${MODULE} in\n    wordcount_minimal )\n\tDATA=\"${this_dir}\"/../input\n\tOPTS+=(\"--entry-point\" \"main\")\n\t;;\n    wordcount_full )\n\tDATA=\"${this_dir}\"/../input\n\tOPTS+=(\"--entry-point\" \"main\")\n\tOPTS+=( \"--do-not-use-java-record-reader\" )\n\tOPTS+=( \"--do-not-use-java-record-writer\" )\n\tOPTS+=( \"-D\" \"pydoop.hdfs.user=${USER}\" )\n\t;;\n    nosep )\n\tDATA=\"${this_dir}\"/data\n\tOPTS+=( \"--num-reducers\" \"0\" )\n\tOPTS+=( \"--output-format\" \"it.crs4.pydoop.NoSeparatorTextOutputFormat\" )\n\t;;\n    map_only_java_writer )\n\tDATA=\"${this_dir}\"/../input\n\tOPTS+=( \"--num-reducers\" \"0\" )\n\t;;\n    map_only_python_writer )\n\tDATA=\"${this_dir}\"/../input\n\tOPTS+=( \"--num-reducers\" \"0\" )\n\tOPTS+=( \"--do-not-use-java-record-writer\" )\n\t;;\nesac\nOPTS+=( \"--upload-file-to-cache\" \"${APP_DIR}/${MODULE}.py\" )\n[ -n \"${DEBUG:-}\" ] && OPTS+=( \"--log-level\" \"DEBUG\" )\n\nWD=$(mktemp -d)\n\nif [ \"$(hadoop_fs)\" != \"file\" ]; then\n    ensure_dfs_home\n    INPUT=\"input\"\n    OUTPUT=\"output\"\n    ${HDFS} dfs -rm -r -f \"${INPUT}\" \"${OUTPUT}\"\n    ${HDFS} dfs -put \"${DATA}\" \"${INPUT}\"\nelse\n    INPUT=\"${DATA}\"\n    OUTPUT=\"${WD}/output\"\nfi\n${PYDOOP} submit \"${OPTS[@]}\" ${MODULE} \"${INPUT}\" \"${OUTPUT}\"\n${PYTHON} \"${this_dir}\"/check.py ${MODULE} \"${OUTPUT}\"\n\nrm -rf \"${WD}\"\n"
  },
  {
    "path": "examples/run_all",
    "content": "#!/bin/bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/config.sh\"\n\ntrap exit ERR\n\nexamples=(\n    hdfs\n    input_format\n    pydoop_script\n    pydoop_submit\n    self_contained\n    sequence_file\n)\nsome_failed=0\n\nfor e in ${examples[@]}; do\n    pushd ${e}\n    echo -ne \"\\n\\n *** RUNNING ${e} EXAMPLE(S) ***\\n\\n\"\n    ./run\n    exit_code=$?\n    if [ ${exit_code} -ne 0 ]; then\n      echo -ne \"\\n\\n #### Error!!  Example ${e} finished with code ${exit_code} ###\\n\\n\" >&2\n      some_failed=1\n    fi\n    popd\ndone\n\nif [ ${some_failed} -ne 0 ]; then\n  echo \"##############################################\" >&2\n  echo \"Some examples failed to run correctly.  Please\" >&2\n  echo \"verify your installation\"                       >&2\n  echo \"##############################################\" >&2\nfi\n"
  },
  {
    "path": "examples/self_contained/check_results.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport sys\nimport re\nimport logging\nfrom collections import Counter\n\nlogging.basicConfig(level=logging.INFO)\n\nimport pydoop.hdfs as hdfs\nimport pydoop.test_support as pts\nimport pydoop.hadut as hadut\n\n\ndef compute_vc(input_dir):\n    data = []\n    for path in hdfs.ls(input_dir):\n        with hdfs.open(path, 'rt') as f:\n            data.append(f.read())\n    all_data = ''.join(data)\n    vowels = re.findall('[AEIOUY]', all_data.upper())\n    return Counter(vowels)\n\n\ndef get_res(output_dir):\n    return pts.parse_mr_output(hadut.collect_output(output_dir), vtype=int)\n\n\ndef check(measured_res, expected_res):\n    res = pts.compare_counts(measured_res, expected_res)\n    if res:\n        return \"ERROR: %s\" % res\n    else:\n        return \"OK.\"\n\n\ndef main(argv):\n    logger = logging.getLogger(\"main\")\n    logger.setLevel(logging.INFO)\n    input_dir = argv[1]\n    output_dir = argv[2]\n    logger.info(\"checking results\")\n    measured_res = get_res(output_dir)\n    expected_res = compute_vc(input_dir)\n    logger.info(check(measured_res, expected_res))\n\n\nif __name__ == \"__main__\":\n    main(sys.argv)\n"
  },
  {
    "path": "examples/self_contained/run",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\nold_pwd=$(pwd)\n\nwd=$(mktemp -d)\nvc_tar=\"${wd}\"/vowelcount.tgz\npydoop_tar=\"${wd}\"/pydoop.tgz\n\ncd \"${this_dir}/../..\"\n${PYTHON} -m pip install --pre -t \"${wd}\" .\ncd \"${wd}/pydoop\"\ntar cfz \"${pydoop_tar}\" .\ncd \"${this_dir}/vowelcount\"\ntar czf \"${vc_tar}\" .\nif [ \"$(hadoop_fs)\" != \"file\" ]; then\n    ensure_dfs_home\n    input=\"input\"\n    output=\"output\"\n    ${HDFS} dfs -rm -r -f \"${input}\" \"${output}\"\n    ${HDFS} dfs -put \"${this_dir}/../input\" \"${input}\"\nelse\n    input=\"${this_dir}/../input\"\n    output=\"${wd}/output\"\nfi\n\nopts=(\n    \"--python-zip\" \"${vc_tar}\"\n    \"--upload-archive-to-cache\" \"${pydoop_tar}\"\n    \"--job-name\" \"self_contained\"\n    \"--entry-point\" \"main\"\n    \"--no-override-home\"\n    \"--no-override-env\"\n    \"-D\" \"mapreduce.task.timeout=10000\"\n)\n[ -n \"${DEBUG:-}\" ] && opts+=( \"--log-level\" \"DEBUG\" )\n${PYDOOP} submit \"${opts[@]}\" vowelcount.mr.main \"${input}\" \"${output}\"\n${PYTHON} \"${this_dir}/check_results.py\" \"${input}\" \"${output}\"\n\ncd ${old_pwd}\nrm -rf \"${wd}\"\n"
  },
  {
    "path": "examples/self_contained/vowelcount/__init__.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nA trivial MapReduce application that counts the occurence of each\nvowel in a text input stream. It is more structured than would be\nnecessary because we want to test automatic distribution of a package\nrather than a single module.\n\"\"\"\n"
  },
  {
    "path": "examples/self_contained/vowelcount/lib/__init__.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n# DOCS_INCLUDE_START\n_VOWELS = set(\"AEIOUYaeiouy\")\n\n\ndef is_vowel(c):\n    return c in _VOWELS\n"
  },
  {
    "path": "examples/self_contained/vowelcount/mr/__init__.py",
    "content": ""
  },
  {
    "path": "examples/self_contained/vowelcount/mr/main.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nfrom pydoop.mapreduce.pipes import run_task, Factory\nfrom .mapper import Mapper\nfrom .reducer import Reducer\n\n\ndef main():\n    return run_task(Factory(Mapper, Reducer, combiner_class=Reducer))\n"
  },
  {
    "path": "examples/self_contained/vowelcount/mr/mapper.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport pydoop.mapreduce.api as api\nfrom vowelcount.lib import is_vowel\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, context):\n        for c in context.value:\n            if is_vowel(c):\n                context.emit(c.upper(), 1)\n"
  },
  {
    "path": "examples/self_contained/vowelcount/mr/reducer.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport pydoop.mapreduce.api as api\n\n\nclass Reducer(api.Reducer):\n\n    def reduce(self, context):\n        s = sum(context.values)\n        context.emit(context.key, s)\n"
  },
  {
    "path": "examples/sequence_file/bin/filter.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nFilter out words whose occurrence falls below a specified value.\n\"\"\"\n\nimport struct\n\nfrom pydoop.mapreduce.pipes import run_task, Factory\nfrom pydoop.mapreduce.api import Mapper\n\n\nclass FilterMapper(Mapper):\n    \"\"\"\n    Process a wordcount output stream, emitting only records relative to\n    words whose count is equal to or above the configured threshold.\n    \"\"\"\n    def __init__(self, context):\n        super(FilterMapper, self).__init__(context)\n        jc = context.job_conf\n        self.threshold = jc.get_int(\"filter.occurrence.threshold\")\n\n    def map(self, context):\n        word, occurrence = context.key, context.value\n        occurrence = struct.unpack(\">i\", occurrence)[0]\n        if occurrence >= self.threshold:\n            context.emit(word, str(occurrence))\n\n\ndef __main__():\n    factory = Factory(FilterMapper)\n    run_task(factory, raw_values=True)\n"
  },
  {
    "path": "examples/sequence_file/bin/wordcount.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport struct\n\nfrom pydoop.mapreduce.pipes import run_task, Factory\nfrom pydoop.mapreduce.api import Mapper, Reducer\n\n\nclass WordCountMapper(Mapper):\n\n    def map(self, context):\n        for w in context.value.split():\n            context.emit(w, 1)\n\n\nclass WordCountReducer(Reducer):\n\n    def reduce(self, context):\n        s = sum(context.values)\n        context.emit(context.key.encode(\"utf-8\"), struct.pack(\">i\", s))\n\n\ndef __main__():\n    factory = Factory(WordCountMapper, WordCountReducer)\n    run_task(factory, auto_serialize=False)\n"
  },
  {
    "path": "examples/sequence_file/check.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport argparse\n\nimport pydoop.test_support as pts\nimport pydoop.hadut as hadut\n\n\ndef main(args):\n    output = hadut.collect_output(args.output)\n    local_wc = pts.LocalWordCount(args.input, min_occurrence=args.threshold)\n    res = local_wc.check(output)\n    if res.startswith(\"OK\"):  # FIXME: change local_wc to raise an exception\n        print(\"OK.\")\n    else:\n        raise RuntimeError(\"output differs from the expected one\")\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"input\", metavar=\"INPUT_DIR\")\n    parser.add_argument(\"output\", metavar=\"OUTPUT_DIR\")\n    parser.add_argument(\"-t\", \"--threshold\", type=int, metavar=\"INT\",\n                        help=\"min word occurrence\", default=10)\n    main(parser.parse_args())\n"
  },
  {
    "path": "examples/sequence_file/run",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n# This example shows how to use Hadoop's SequenceFile input and output formats\n# with Pydoop. First we run a word count on the input, storing counts as\n# 32-bit integers in Hadoop SequenceFiles; next we run a MapReduce application\n# that filters out those words whose count falls below a specified threshold.\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\nlocal_input=\"${this_dir}/../input\"\noccurrence_threshold=10\nopts=(\n    \"--python-program\" \"${PYTHON}\"\n    \"-D\" \"mapreduce.task.timeout=10000\"\n)\n[ -n \"${DEBUG:-}\" ] && OPTS+=( \"--log-level\" \"DEBUG\" )\n\nrun_wc() {\n    local input=$1\n    local output=$2\n    local opts=( \"${opts[@]}\" )\n    opts+=(\n\t\"--job-name\" \"wordcount\"\n\t\"--num-reducers\" \"2\"\n\t\"--output-format\" \"org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat\"\n\t\"--upload-file-to-cache\" \"${this_dir}/bin/wordcount.py\"\n\t\"-D\" \"mapreduce.output.fileoutputformat.compress.type=NONE\"\n    )\n    ${PYDOOP} submit \"${opts[@]}\" wordcount \"${input}\" \"${output}\"\n}\n\nrun_filter() {\n    local input=$1\n    local output=$2\n    local opts=( \"${opts[@]}\" )\n    opts+=(\n\t\"--job-name\" \"filter\"\n\t\"--num-reducers\" \"0\"\n\t\"--input-format\" \"org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat\"\n\t\"--upload-file-to-cache\" \"${this_dir}/bin/filter.py\"\n\t\"-D\" \"filter.occurrence.threshold=${occurrence_threshold}\"\n    )\n    ${PYDOOP} submit \"${opts[@]}\" filter \"${input}\" \"${output}\"\n}\n\nwd=$(mktemp -d)\nif [ \"$(hadoop_fs)\" != \"file\" ]; then\n    ensure_dfs_home\n    input=\"input\"\n    wc_output=\"wc_output\"\n    filter_output=\"filter_output\"\n    ${HDFS} dfs -rm -r -f \"${input}\" \"${wc_output}\" \"${filter_output}\"\n    ${HDFS} dfs -put \"${local_input}\" \"${input}\"\nelse\n    input=\"${local_input}\"\n    wc_output=\"${wd}/wc_output\"\n    filter_output=\"${wd}/filter_output\"\nfi\n\nrun_wc \"${input}\" \"${wc_output}\"\nrun_filter \"${wc_output}\" \"${filter_output}\"\n${PYTHON} \"${this_dir}/check.py\" \"${local_input}\" \"${filter_output}\" -t ${occurrence_threshold}\n\nrm -rf \"${wd}\"\n"
  },
  {
    "path": "int_test/config.sh",
    "content": "[ -n \"${PYDOOP_INT_TESTS:-}\" ] && return || readonly PYDOOP_INT_TESTS=1\n\ndie() {\n    echo $1 1>&2\n    exit 1\n}\n\nexport USER=\"${USER:-$(whoami)}\"\nexport HADOOP=\"${HADOOP:-hadoop}\"\nexport HDFS=\"${HDFS:-hdfs}\"\nexport MAPRED=\"${MAPRED:-mapred}\"\nexport YARN=\"${YARN:-yarn}\"\nexport PYTHON=\"${PYTHON:-python}\"\nexport PY_VER=$(\"${PYTHON}\" -c 'import sys; print(sys.version_info[0])')\nexport PYDOOP=\"pydoop${PY_VER}\"\n\nensure_dfs_home() {\n    ${HDFS} dfs -mkdir -p /user/${USER}\n}\n\nhadoop_fs() {\n    ${HDFS} getconf -confKey fs.defaultFS | cut -d : -f 1\n}\n\nexport -f die ensure_dfs_home hadoop_fs\n"
  },
  {
    "path": "int_test/mapred_submitter/check.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport argparse\nimport io\nimport os\nimport pstats\nimport sys\nfrom collections import Counter\nfrom itertools import chain\n\n\ndef get_lines(dir_path):\n    rval = []\n    for name in sorted(os.listdir(dir_path)):\n        path = os.path.join(dir_path, name)\n        if not os.path.isdir(path):\n            with io.open(path, \"rt\") as f:\n                for line in f:\n                    rval.append(line.rstrip())\n    return rval\n\n\ndef check_output(items, exp_items):\n    if len(items) != len(exp_items):\n        raise RuntimeError(\"n. output items = %d (expected: %d)\" % (\n            len(items), len(exp_items)\n        ))\n    for i, (it, exp_it) in enumerate(zip(items, exp_items)):\n        if it != exp_it:\n            raise RuntimeError(\"wrong output item #%d: %r (expected: %r)\" % (\n                i, it, exp_it\n            ))\n\n\ndef check_counters(counter, exp_counter):\n    return check_output(sorted(counter.items()), sorted(exp_counter.items()))\n\n\ndef word_count(lines):\n    return Counter(chain(*(_.split() for _ in lines)))\n\n\ndef check_map_only(in_dir, out_dir):\n    uc_lines = [_.upper() for _ in get_lines(in_dir)]\n    out_values = [_.split(\"\\t\", 1)[1] for _ in get_lines(out_dir)]\n    check_output(out_values, uc_lines)\n\n\ndef check_map_reduce(in_dir, out_dir):\n    wc = word_count(get_lines(in_dir))\n    out_pairs = (_.split(\"\\t\", 1) for _ in get_lines(out_dir))\n    out_wc = {k: int(v) for k, v in out_pairs}\n    check_counters(out_wc, wc)\n\n\ndef check_pstats(pstats_dir):\n    pstats_names = os.listdir(pstats_dir)\n    try:\n        bn = pstats_names[0]\n    except IndexError:\n        raise RuntimeError(\"%r is empty\" % (pstats_dir,))\n    pstats.Stats(os.path.join(pstats_dir, bn))\n\n\nCHECKS = {\n    \"map_only_java_writer\": check_map_only,\n    \"map_only_python_writer\": check_map_only,\n    \"map_reduce_combiner\": check_map_reduce,\n    \"map_reduce_java_rw\": check_map_reduce,\n    \"map_reduce_java_rw_pstats\": check_map_reduce,\n    \"map_reduce_python_partitioner\": check_map_reduce,\n    \"map_reduce_python_reader\": check_map_reduce,\n    \"map_reduce_python_writer\": check_map_reduce,\n    \"map_reduce_raw_io\": check_map_reduce,\n    \"map_reduce_slow_java_rw\": check_map_reduce,\n    \"map_reduce_slow_python_rw\": check_map_reduce,\n}\n\n\nif __name__ == \"__main__\":\n    choices = sorted(CHECKS)\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"name\", metavar=\"NAME\", choices=choices,\n                        help=\"one of: %s\" % \"; \".join(choices))\n    parser.add_argument(\"mr_in\", metavar=\"IN_DIR\", help=\"MapReduce in dir\")\n    parser.add_argument(\"mr_out\", metavar=\"OUT_DIR\", help=\"MapReduce out dir\")\n    args = parser.parse_args(sys.argv[1:])\n    check = CHECKS[args.name]\n    check(args.mr_in, args.mr_out)\n    if \"pstats\" in args.name:\n        check_pstats(\"%s.stats\" % args.mr_out)\n    sys.stdout.write(\"OK\\n\")\n"
  },
  {
    "path": "int_test/mapred_submitter/genwords.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport argparse\nimport io\nimport os\nimport sys\nfrom random import choice\n\n\nPOOL = b\"\"\"\\\nlorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod\ntempor incididunt ut labore et dolore magna aliqua ut enim ad minim veniam\nquis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\nconsequat duis aute irure dolor in reprehenderit in voluptate velit esse\ncillum dolore eu fugiat nulla pariatur excepteur sint occaecat cupidatat non\nproident sunt in culpa qui officia deserunt mollit anim id est laborum\n\"\"\".splitlines(True)\n\n\ndef genfile(path, size):\n    current_size = 0\n    with io.open(path, \"wb\") as f:\n        while current_size < size:\n            line = choice(POOL)\n            f.write(line)\n            current_size += len(line)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"out_dir\", metavar=\"OUT_DIR\")\n    parser.add_argument(\"--n-files\", metavar=\"INT\", type=int, default=2)\n    parser.add_argument(\"--file-size\", metavar=\"BYTES\", type=int, default=1000)\n    args = parser.parse_args(sys.argv[1:])\n    os.makedirs(args.out_dir)\n    for i in range(args.n_files):\n        path = os.path.join(args.out_dir, \"f%d.txt\" % i)\n        genfile(path, args.file_size)\n"
  },
  {
    "path": "int_test/mapred_submitter/input/map_only/f1.txt",
    "content": "line1\nline2\n"
  },
  {
    "path": "int_test/mapred_submitter/input/map_only/f2.txt",
    "content": "line3\nline4\n"
  },
  {
    "path": "int_test/mapred_submitter/input/map_reduce/f1.txt",
    "content": "the quick brown fox\nhad a meeting with\nthe lazy red FӦX\n"
  },
  {
    "path": "int_test/mapred_submitter/input/map_reduce/f2.txt",
    "content": "the young black FӦX\nhad breakfast with\nthe old pink fox\n"
  },
  {
    "path": "int_test/mapred_submitter/input/map_reduce_long/f.txt",
    "content": "we need more\nthan ten\nlines\nbecause\nwe are\nsetting the\ntimeout to\nten seconds\nand the map\nand reduce\nfunctions\nsleep for one\nseconds before\neach emit\nif things are\nworking the\ntimeout will\nreset at each\nemit and the\njob will\ncomplete\nif not the\njob will\ncrash\n"
  },
  {
    "path": "int_test/mapred_submitter/mr/map_only_java_writer.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, context):\n        context.emit(context.key, context.value.upper())\n\n\ndef __main__():\n    pipes.run_task(pipes.Factory(Mapper))\n\n\nif __name__ == \"__main__\":\n    __main__()\n"
  },
  {
    "path": "int_test/mapred_submitter/mr/map_only_python_writer.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport pydoop.hdfs as hdfs\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\n\nSEP_KEY = \"mapreduce.output.textoutputformat.separator\"\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, context):\n        context.emit(context.key, context.value.upper())\n\n\nclass Writer(api.RecordWriter):\n\n    def __init__(self, context):\n        super(Writer, self).__init__(context)\n        outfn = context.get_default_work_file()\n        self.file = hdfs.open(outfn, \"wt\")\n        self.sep = context.job_conf.get(SEP_KEY, \"\\t\")\n\n    def close(self):\n        self.file.close()\n\n    def emit(self, key, value):\n        self.file.write(str(key) + self.sep + value + \"\\n\")\n\n\ndef __main__():\n    pipes.run_task(pipes.Factory(Mapper, record_writer_class=Writer))\n\n\nif __name__ == \"__main__\":\n    __main__()\n"
  },
  {
    "path": "int_test/mapred_submitter/mr/map_reduce_combiner.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, context):\n        for w in context.value.split():\n            context.emit(w, 1)\n\n\nclass Reducer(api.Reducer):\n\n    def reduce(self, context):\n        context.emit(context.key, sum(context.values))\n\n\ndef __main__():\n    pipes.run_task(pipes.Factory(\n        Mapper,\n        combiner_class=Reducer,\n        reducer_class=Reducer,\n    ))\n\n\nif __name__ == \"__main__\":\n    __main__()\n"
  },
  {
    "path": "int_test/mapred_submitter/mr/map_reduce_java_rw.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, context):\n        for w in context.value.split():\n            context.emit(w, 1)\n\n\nclass Reducer(api.Reducer):\n\n    def reduce(self, context):\n        context.emit(context.key, sum(context.values))\n\n\ndef __main__():\n    pipes.run_task(pipes.Factory(Mapper, reducer_class=Reducer))\n\n\nif __name__ == \"__main__\":\n    __main__()\n"
  },
  {
    "path": "int_test/mapred_submitter/mr/map_reduce_java_rw_pstats.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, context):\n        for w in context.value.split():\n            context.emit(w, 1)\n\n\nclass Reducer(api.Reducer):\n\n    def reduce(self, context):\n        context.emit(context.key, sum(context.values))\n\n\ndef __main__():\n    factory = pipes.Factory(Mapper, reducer_class=Reducer)\n    pipes.run_task(factory, pstats_dir=\"pstats\")\n\n\nif __name__ == \"__main__\":\n    __main__()\n"
  },
  {
    "path": "int_test/mapred_submitter/mr/map_reduce_python_partitioner.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nfrom hashlib import md5\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, context):\n        for w in context.value.split():\n            context.emit(w, 1)\n\n\nclass Reducer(api.Reducer):\n\n    def reduce(self, context):\n        context.emit(context.key, sum(context.values))\n\n\nclass Partitioner(api.Partitioner):\n\n    def partition(self, key, n_reduces):\n        return int(md5(key).hexdigest(), 16) % n_reduces\n\n\ndef __main__():\n    pipes.run_task(pipes.Factory(\n        Mapper,\n        reducer_class=Reducer,\n        partitioner_class=Partitioner\n    ))\n\n\nif __name__ == \"__main__\":\n    __main__()\n"
  },
  {
    "path": "int_test/mapred_submitter/mr/map_reduce_python_reader.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nfrom __future__ import division\n\nimport pydoop.hdfs as hdfs\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, context):\n        for w in context.value.split():\n            context.emit(w, 1)\n\n\nclass Reducer(api.Reducer):\n\n    def reduce(self, context):\n        context.emit(context.key, sum(context.values))\n\n\nclass Reader(api.RecordReader):\n\n    def __init__(self, context):\n        super(Reader, self).__init__(context)\n        self.split = context.input_split\n        self.file = hdfs.open(self.split.filename)\n        self.bytes_read = 0\n        if self.split.offset > 0:\n            self.file.seek(self.split.offset)\n            discarded = self.file.readline()  # handled in previous split\n            self.bytes_read += len(discarded)\n\n    def close(self):\n        self.file.close()\n\n    def next(self):\n        if self.bytes_read > self.split.length:\n            raise StopIteration\n        key = self.split.offset + self.bytes_read\n        value = self.file.readline()\n        if not value:  # end of file\n            raise StopIteration\n        self.bytes_read += len(value)\n        return key, value.decode(\"utf-8\")\n\n    def get_progress(self):\n        return min(self.bytes_read / self.split.length, 1.0)\n\n\ndef __main__():\n    pipes.run_task(pipes.Factory(\n        Mapper,\n        reducer_class=Reducer,\n        record_reader_class=Reader\n    ))\n\n\nif __name__ == \"__main__\":\n    __main__()\n"
  },
  {
    "path": "int_test/mapred_submitter/mr/map_reduce_python_writer.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport pydoop.hdfs as hdfs\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\n\nSEP_KEY = \"mapreduce.output.textoutputformat.separator\"\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, context):\n        for w in context.value.split():\n            context.emit(w, 1)\n\n\nclass Reducer(api.Reducer):\n\n    def reduce(self, context):\n        context.emit(context.key, sum(context.values))\n\n\nclass Writer(api.RecordWriter):\n\n    def __init__(self, context):\n        super(Writer, self).__init__(context)\n        outfn = context.get_default_work_file()\n        self.file = hdfs.open(outfn, \"wt\")\n        self.sep = context.job_conf.get(SEP_KEY, \"\\t\")\n\n    def close(self):\n        self.file.close()\n\n    def emit(self, key, value):\n        self.file.write(key + self.sep + str(value) + \"\\n\")\n\n\ndef __main__():\n    pipes.run_task(pipes.Factory(\n        Mapper,\n        reducer_class=Reducer,\n        record_writer_class=Writer\n    ))\n\n\nif __name__ == \"__main__\":\n    __main__()\n"
  },
  {
    "path": "int_test/mapred_submitter/mr/map_reduce_raw_io.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\n\n\nclass Mapper(api.Mapper):\n\n    # in this case there's no need to serialize/deserialize\n    # key is not used, and bytes objects can be split just like strings\n    def map(self, context):\n        # key = struct.unpack(\">q\", context.key)[0]\n        # value = context.value.decode(\"utf-8\")\n        for word in context.value.split():\n            context.emit(word, b\"1\")\n\n\nclass Reducer(api.Reducer):\n\n    def reduce(self, context):\n        s = sum(int(_) for _ in context.values)\n        context.emit(context.key, b\"%d\" % s)\n\n\ndef __main__():\n    factory = pipes.Factory(Mapper, reducer_class=Reducer)\n    pipes.run_task(\n        factory,\n        raw_keys=True,\n        raw_values=True,\n        private_encoding=False,\n        auto_serialize=False,\n    )\n\n\nif __name__ == \"__main__\":\n    __main__()\n"
  },
  {
    "path": "int_test/mapred_submitter/mr/map_reduce_slow_java_rw.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport sys\nimport time\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\n\n\nclass Mapper(api.Mapper):\n\n    def __init__(self, context):\n        super(Mapper, self).__init__(context)\n        self.t0 = time.time()\n\n    def map(self, context):\n        sys.stderr.write(\"in: %r, %r\\n\" % (context.key, context.value))\n        time.sleep(1)\n        for w in context.value.split():\n            context.emit(w, 1)\n\n    def close(self):\n        sys.stderr.write(\"total time: %.3f s\\n\" % (time.time() - self.t0))\n\n\nclass Reducer(api.Reducer):\n\n    def __init__(self, context):\n        super(Reducer, self).__init__(context)\n        self.t0 = time.time()\n\n    def reduce(self, context):\n        sys.stderr.write(\"input key: %r\\n\" % (context.key,))\n        time.sleep(1)\n        context.emit(context.key, sum(context.values))\n\n    def close(self):\n        sys.stderr.write(\"total time: %.3f s\\n\" % (time.time() - self.t0))\n\n\ndef __main__():\n    pipes.run_task(pipes.Factory(\n        Mapper,\n        combiner_class=Reducer,\n        reducer_class=Reducer,\n    ))\n\n\nif __name__ == \"__main__\":\n    __main__()\n"
  },
  {
    "path": "int_test/mapred_submitter/mr/map_reduce_slow_python_rw.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nfrom __future__ import division\n\nimport sys\nimport time\n\nimport pydoop.hdfs as hdfs\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\n\nSEP_KEY = \"mapreduce.output.textoutputformat.separator\"\n\n\nclass Mapper(api.Mapper):\n\n    def __init__(self, context):\n        super(Mapper, self).__init__(context)\n        self.t0 = time.time()\n\n    def map(self, context):\n        sys.stderr.write(\"in: %r, %r\\n\" % (context.key, context.value))\n        time.sleep(1)\n        for w in context.value.split():\n            context.emit(w, 1)\n\n    def close(self):\n        sys.stderr.write(\"total time: %.3f s\\n\" % (time.time() - self.t0))\n\n\nclass Reducer(api.Reducer):\n\n    def __init__(self, context):\n        super(Reducer, self).__init__(context)\n        self.t0 = time.time()\n\n    def reduce(self, context):\n        sys.stderr.write(\"input key: %r\\n\" % (context.key,))\n        time.sleep(1)\n        context.emit(context.key, sum(context.values))\n\n    def close(self):\n        sys.stderr.write(\"total time: %.3f s\\n\" % (time.time() - self.t0))\n\n\nclass Reader(api.RecordReader):\n\n    def __init__(self, context):\n        super(Reader, self).__init__(context)\n        self.split = context.input_split\n        self.file = hdfs.open(self.split.filename)\n        self.bytes_read = 0\n        if self.split.offset > 0:\n            self.file.seek(self.split.offset)\n            discarded = self.file.readline()  # handled in previous split\n            self.bytes_read += len(discarded)\n\n    def close(self):\n        self.file.close()\n\n    def next(self):\n        if self.bytes_read > self.split.length:\n            raise StopIteration\n        key = self.split.offset + self.bytes_read\n        value = self.file.readline()\n        if not value:  # end of file\n            raise StopIteration\n        self.bytes_read += len(value)\n        return key, value.decode(\"utf-8\")\n\n    def get_progress(self):\n        return min(self.bytes_read / self.split.length, 1.0)\n\n\nclass Writer(api.RecordWriter):\n\n    def __init__(self, context):\n        super(Writer, self).__init__(context)\n        outfn = context.get_default_work_file()\n        self.file = hdfs.open(outfn, \"wt\")\n        self.sep = context.job_conf.get(SEP_KEY, \"\\t\")\n\n    def close(self):\n        self.file.close()\n\n    def emit(self, key, value):\n        self.file.write(key + self.sep + str(value) + \"\\n\")\n\n\ndef __main__():\n    pipes.run_task(pipes.Factory(\n        Mapper,\n        reducer_class=Reducer,\n        record_reader_class=Reader,\n        record_writer_class=Writer\n    ))\n\n\nif __name__ == \"__main__\":\n    __main__()\n"
  },
  {
    "path": "int_test/mapred_submitter/run",
    "content": "#!/usr/bin/env bash\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\napp_names=(\n    map_only_java_writer\n    map_only_python_writer\n    map_reduce_combiner\n    map_reduce_java_rw\n    map_reduce_java_rw_pstats\n    map_reduce_python_partitioner\n    map_reduce_python_reader\n    map_reduce_python_writer\n    map_reduce_raw_io\n    map_reduce_slow_java_rw\n    map_reduce_slow_python_rw\n)\n\nfor name in \"${app_names[@]}\"; do\n    bash \"${this_dir}\"/run_app.sh ${name}\ndone\n"
  },
  {
    "path": "int_test/mapred_submitter/run_app.sh",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\npushd \"${this_dir}\"\n\n[ $# -ge 1 ] || die \"Usage: $0 APP_NAME\"\nname=$1\n\n${PYTHON} -c \"import pydoop; pydoop.check_local_mode()\"\n\nopts=(\n    \"-D\" \"mapreduce.job.name=${name}\"\n    \"-D\" \"mapreduce.task.timeout=10000\"\n)\n\nwd=$(mktemp -d)\n\ncase ${name} in\n    map_only_java_writer )\n\tinput=\"input/map_only\"\n\topts+=(\n\t    \"-D\" \"mapreduce.pipes.isjavarecordreader=true\"\n\t    \"-D\" \"mapreduce.pipes.isjavarecordwriter=true\"\n\t    \"-reduces\" \"0\"\n\t)\n\t;;\n    map_only_python_writer )\n\tinput=\"input/map_only\"\n\topts+=(\n\t    \"-D\" \"mapreduce.pipes.isjavarecordreader=true\"\n\t    \"-D\" \"mapreduce.pipes.isjavarecordwriter=false\"\n\t    \"-reduces\" \"0\"\n\t)\n\t;;\n    map_reduce_combiner )\n\tio_sort_mb=1\n\tfile_size=$((2 * io_sort_mb * 1024 * 1024))\n\tinput=\"${wd}/map_reduce_very_long\"\n\t${PYTHON} genwords.py \"${input}\" --file-size ${file_size}\n\topts+=(\n\t    \"-D\" \"mapreduce.pipes.isjavarecordreader=true\"\n\t    \"-D\" \"mapreduce.pipes.isjavarecordwriter=true\"\n\t    \"-D\" \"mapreduce.task.io.sort.mb=${io_sort_mb}\"\n\t    \"-reduces\" \"2\"\n\t)\n\t;;\n    map_reduce_python_reader )\n\tinput=\"input/map_reduce\"\n\topts+=(\n\t    \"-D\" \"mapreduce.pipes.isjavarecordreader=false\"\n\t    \"-D\" \"mapreduce.pipes.isjavarecordwriter=true\"\n\t    \"-reduces\" \"2\"\n\t)\n\t;;\n    map_reduce_python_writer )\n\tinput=\"input/map_reduce\"\n\topts+=(\n\t    \"-D\" \"mapreduce.pipes.isjavarecordreader=true\"\n\t    \"-D\" \"mapreduce.pipes.isjavarecordwriter=false\"\n\t    \"-reduces\" \"2\"\n\t)\n\t;;\n    map_reduce_slow_java_rw )\n\tinput=\"input/map_reduce_long\"\n\topts+=(\n\t    \"-D\" \"mapreduce.job.maps=1\"\n\t    \"-D\" \"mapreduce.pipes.isjavarecordreader=true\"\n\t    \"-D\" \"mapreduce.pipes.isjavarecordwriter=true\"\n\t    \"-reduces\" \"1\"\n\t)\n\t;;\n    map_reduce_slow_python_rw )\n\tinput=\"input/map_reduce_long\"\n\topts+=(\n\t    \"-D\" \"mapreduce.job.maps=1\"\n\t    \"-D\" \"mapreduce.pipes.isjavarecordreader=false\"\n\t    \"-D\" \"mapreduce.pipes.isjavarecordwriter=false\"\n\t    \"-reduces\" \"1\"\n\t)\n\t;;\n    map_reduce_* )\n\tinput=\"input/map_reduce\"\n\topts+=(\n\t    \"-D\" \"mapreduce.pipes.isjavarecordreader=true\"\n\t    \"-D\" \"mapreduce.pipes.isjavarecordwriter=true\"\n\t    \"-reduces\" \"2\"\n\t)\n\t;;\n    * )\n\trm -rf \"${wd}\"\n\tdie \"unknown app name: \\\"${name}\\\"\"\nesac\n\nmrapp=\"mr/${name}.py\"\n[ -e \"${mrapp}\" ] || die \"\\\"${mrapp}\\\" not found\"\n\n# wrap the python app with a bash layer that sets PATH\ncat >\"${wd}/mrapp\" <<EOF\n#!${BASH}\n\"\"\":\"\nexport PATH=\"${PATH}\"\nexec \"${PYTHON}\" -u \"\\$0\" \"\\$@\"\n\":\"\"\"\nEOF\ncat \"${mrapp}\" >>\"${wd}/mrapp\"\nmrapp=\"${wd}/mrapp\"\n\nensure_dfs_home\n${HDFS} dfs -rm -r -f \"input\" \"output\" \"mrapp.py\" \"pstats\"\n${HDFS} dfs -put \"${input}\" \"input\"\n${HDFS} dfs -put \"${mrapp}\" \"mrapp.py\"\n${MAPRED} pipes \"${opts[@]}\" -program \"mrapp.py\" -input \"input\" -output \"output\"\n\necho \"checking results\"\n${HDFS} dfs -get output \"${wd}/output\"\ncase \"${name}\" in\n    *pstats )\n\t${HDFS} dfs -get pstats \"${wd}/output.stats\"\n\t;;\nesac\n${PYTHON} check.py \"${name}\" \"${input}\" \"${wd}/output\"\n\nrm -rf \"${wd}\"\npopd\n"
  },
  {
    "path": "int_test/mapred_submitter/run_perf.sh",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n# E.g., rm -f log; bash run_perf.sh map_reduce_java_rw > >(tee -a log) 2>&1\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\npushd \"${this_dir}\"\n\n[ $# -ge 1 ] || die \"Usage: $0 APP_NAME\"\nname=$1\n\nopts=(\n    \"-D\" \"mapreduce.job.name=${name}\"\n    \"-D\" \"mapreduce.task.timeout=10000\"\n    \"-D\" \"mapreduce.task.io.sort.mb=10\"\n)\n\ncase ${name} in\n    map_reduce_python_reader )\n\topts+=(\n\t    \"-D\" \"mapreduce.pipes.isjavarecordreader=false\"\n\t    \"-D\" \"mapreduce.pipes.isjavarecordwriter=true\"\n\t)\n\t;;\n    map_reduce_python_writer )\n\topts+=(\n\t    \"-D\" \"mapreduce.pipes.isjavarecordreader=true\"\n\t    \"-D\" \"mapreduce.pipes.isjavarecordwriter=false\"\n\t)\n\t;;\n    map_reduce_* )\n\topts+=(\n\t    \"-D\" \"mapreduce.pipes.isjavarecordreader=true\"\n\t    \"-D\" \"mapreduce.pipes.isjavarecordwriter=true\"\n\t)\n\t;;\n    * )\n\tdie \"unknown app name: \\\"${name}\\\"\"\nesac\nopts+=( \"-reduces\" \"2\" )\n\nwd=$(mktemp -d)\nmrapp=\"mr/${name}.py\"\n[ -e \"${mrapp}\" ] || die \"\\\"${mrapp}\\\" not found\"\n\ncp \"${mrapp}\" \"${wd}/mrapp.py\"\nmrapp=\"${wd}/mrapp.py\"\npy_exe=$(${PYTHON} -c \"import sys; print(sys.executable)\")\nsed -i \"1c#!${py_exe}\" \"${mrapp}\"\n\ninput=\"${wd}/input\"\n${PYTHON} genwords.py --n-files 2 --file-size $((50 * 1024 * 1024)) \"${input}\"\n\nensure_dfs_home\n${HDFS} dfs -rm -r -f \"input\" \"output\" \"mrapp.py\"\n${HDFS} dfs -put \"${input}\" \"input\"\n${HDFS} dfs -put \"${mrapp}\" \"mrapp.py\"\n${MAPRED} pipes \"${opts[@]}\" -program \"mrapp.py\" -input \"input\" -output \"output\"\n\necho \"checking results\"\n${HDFS} dfs -get output \"${wd}/output\"\n${PYTHON} check.py \"${name}\" \"${input}\" \"${wd}/output\"\n\nrm -rf \"${wd}\"\npopd\n"
  },
  {
    "path": "int_test/opaque_split/check.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport argparse\nimport io\nimport os\nimport sys\n\nfrom gen_splits import N_TASKS, ITEMS_PER_TASK\n\n\ndef check_output(mr_out_dir):\n    names = [_ for _ in os.listdir(mr_out_dir) if not _.startswith(\"_\")]\n    if len(names) != N_TASKS:\n        raise RuntimeError(\"found %d output files (expected: %d)\" %\n                           (len(names), N_TASKS))\n    idx = []\n    for n in names:\n        path = os.path.join(mr_out_dir, n)\n        with io.open(path, \"rt\") as f:\n            lines = [_.rstrip() for _ in f]\n        if len(lines) != ITEMS_PER_TASK:\n            raise RuntimeError(\"%s has %d lines (expected: %d)\" %\n                               (n, len(lines), ITEMS_PER_TASK))\n        idx.extend(int(_.split(\"\\t\")[0]) for _ in lines)\n    idx.sort()  # not sure order is guaranteed in a map-only job\n    nitems = N_TASKS * ITEMS_PER_TASK\n    if idx != list(range(nitems)):\n        raise RuntimeError(\"overall indices != range(%d)\" % nitems)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"mr_out\", metavar=\"OUT_DIR\", help=\"MapReduce out dir\")\n    args = parser.parse_args(sys.argv[1:])\n    check_output(args.mr_out)\n    sys.stdout.write(\"OK\\n\")\n"
  },
  {
    "path": "int_test/opaque_split/gen_splits.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport argparse\nimport sys\n\nimport pydoop.hdfs as hdfs\nfrom pydoop.mapreduce.pipes import OpaqueSplit, write_opaque_splits\n\n\nN_TASKS = 2\nITEMS_PER_TASK = 5\n\n\ndef gen_ranges():\n    for i in range(N_TASKS):\n        start = ITEMS_PER_TASK * i\n        yield start, start + ITEMS_PER_TASK\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"splits_path\", metavar=\"HDFS_PATH\")\n    args = parser.parse_args(sys.argv[1:])\n    splits = [OpaqueSplit(_) for _ in gen_ranges()]\n    with hdfs.open(args.splits_path, \"wb\") as f:\n        write_opaque_splits(splits, f)\n"
  },
  {
    "path": "int_test/opaque_split/mrapp.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\\\nStub of an application where each task is assigned an int range as a (start,\nstop) tuple. The record reader feeds numbers from the specified range to the\nmapper, which in this case does nothing but generate a random string. Besides\nrandom data generation (e.g., for terasort), this could be used to assign a\nsubset of files from an HDFS directory to each mapper (e.g., for image\nrecognition).\n\"\"\"\n\nfrom __future__ import division\n\nimport uuid\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\n\n# py2 compat\ntry:\n    range = xrange\nexcept NameError:\n    pass\n\n\nclass Reader(api.RecordReader):\n\n    def __init__(self, context):\n        super(Reader, self).__init__(context)\n        start, stop = context.input_split.payload\n        self.gen = iter(range(start, stop))\n        self.nitems = max(stop - start, 0)\n        self.key = self.start = start\n\n    def next(self):\n        self.key = next(self.gen)\n        return self.key, None\n\n    def get_progress(self):\n        done = self.key - self.start + 1\n        return min(done / self.nitems, 1.0)\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, context):\n        context.emit(context.key, uuid.uuid4().hex)\n\n\ndef __main__():\n    pipes.run_task(pipes.Factory(Mapper, record_reader_class=Reader))\n\n\nif __name__ == \"__main__\":\n    __main__()\n"
  },
  {
    "path": "int_test/opaque_split/run",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\nmodule=\"mrapp\"\ninput=\"input\"\noutput=\"output\"\nsplits_path=\"${input}.splits\"\n\nopts=(\n    \"-D\" \"pydoop.mapreduce.pipes.externalsplits.uri=${splits_path}\"\n    \"-D\" \"mapreduce.task.timeout=10000\"\n    \"-D\" \"mapreduce.job.maps=2\"\n    \"--python-program\" \"${PYTHON}\"\n    \"--job-name\" \"${module}\"\n    \"--num-reducers\" \"0\"\n    \"--upload-file-to-cache\" \"${this_dir}/${module}.py\"\n    \"--do-not-use-java-record-reader\"\n)\n[ -n \"${DEBUG:-}\" ] && opts+=( \"--log-level\" \"DEBUG\" )\n\npushd \"${this_dir}\"\n${PYTHON} gen_splits.py \"${splits_path}\"\nensure_dfs_home\n${HDFS} dfs -rm -r -f \"${input}\" \"${output}\"\n${HDFS} dfs -mkdir -p \"${input}\"  # TODO: can we remove this constraint?\n${PYDOOP} submit \"${opts[@]}\" ${module} \"${input}\" \"${output}\"\nwd=$(mktemp -d)\n${HDFS} dfs -get \"${output}\" \"${wd}/output\"\n${PYTHON} check.py \"${wd}/output\"\nrm -rf \"${wd}\"\npopd\n"
  },
  {
    "path": "int_test/progress/mrapp.py",
    "content": "#!/usr/bin/env python\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\nimport sys\nimport time\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.pipes as pipes\nimport pydoop.hdfs as hdfs\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, context):\n        time.sleep(1)\n        sys.stderr.write(\"processing: %r\\n\" % (context.value,))\n        context.emit(context.key, len(context.value))\n\n\nclass Writer(api.RecordWriter):\n\n    def __init__(self, context):\n        super(Writer, self).__init__(context)\n        jc = context.job_conf\n        outfn = context.get_default_work_file()\n        hdfs_user = jc.get(\"pydoop.hdfs.user\", None)\n        self.file = hdfs.open(outfn, \"wt\", user=hdfs_user)\n        self.sep = jc.get(\"mapreduce.output.textoutputformat.separator\", \"\\t\")\n\n    def close(self):\n        self.file.close()\n\n    def emit(self, key, value):\n        self.file.write(str(key) + self.sep + str(value) + \"\\n\")\n\n\nFACTORY = pipes.Factory(Mapper, record_writer_class=Writer)\n\n\ndef __main__():\n    pipes.run_task(FACTORY)\n"
  },
  {
    "path": "int_test/progress/run",
    "content": "#!/usr/bin/env bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nset -euo pipefail\n[ -n \"${DEBUG:-}\" ] && set -x\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/../config.sh\"\n\nMODULE=\"mrapp\"\nTIMEOUT_SECS=10\nN_LINES=$((5 * TIMEOUT_SECS))\nOPTS=(\n    \"-D\" \"mapreduce.task.timeout=$((1000 * TIMEOUT_SECS))\"\n    \"-D\" \"mapreduce.job.maps=1\"\n    \"--python-program\" \"${PYTHON}\"\n    \"--job-name\" \"${MODULE}\"\n    \"--num-reducers\" \"0\"\n    \"--upload-file-to-cache\" \"${this_dir}/${MODULE}.py\"\n    \"--do-not-use-java-record-writer\"\n)\n[ -n \"${DEBUG:-}\" ] && OPTS+=( \"--log-level\" \"DEBUG\" )\n\nWD=$(mktemp -d)\nDATA=\"${WD}\"/${RANDOM}\nfor i in $(seq 1 ${N_LINES}); do\n    echo \"foobar_${i}\" >> \"${DATA}\"\ndone\n\nif [ \"$(hadoop_fs)\" != \"file\" ]; then\n    ensure_dfs_home\n    INPUT=$(basename ${DATA})_in\n    OUTPUT=$(basename ${DATA})_out\n    ${HDFS} dfs -rm -r -f \"${INPUT}\" \"${OUTPUT}\"\n    ${HDFS} dfs -put \"${DATA}\" \"${INPUT}\"\nelse\n    INPUT=\"${DATA}\"\n    OUTPUT=\"${WD}\"/$(basename ${DATA})_out\nfi\n${PYDOOP} submit \"${OPTS[@]}\" ${MODULE} \"${INPUT}\" \"${OUTPUT}\"\n\n${HDFS} dfs -test -e \"${OUTPUT}\"/part-m-00000\nrm -rf \"${WD}\"\n"
  },
  {
    "path": "int_test/run_all",
    "content": "#!/bin/bash\n\n# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nset -euo pipefail\n\nthis=\"${BASH_SOURCE-$0}\"\nthis_dir=$(cd -P -- \"$(dirname -- \"${this}\")\" && pwd -P)\n. \"${this_dir}/config.sh\"\n\ntests=(\n    progress\n)\n\n# https://issues.apache.org/jira/browse/MAPREDUCE-4000\nif [ \"$(hadoop_fs)\" != \"file\" ]; then\n    tests+=( mapred_submitter )\nfi\n\nfor e in ${tests[@]}; do\n    pushd \"${this_dir}/${e}\"\n    ./run\n    popd\ndone\n"
  },
  {
    "path": "notice_template.txt",
    "content": "Copyright %(year) %(owner).\n\nLicensed under the Apache License, Version 2.0 (the \"License\"); you\nmay not use this file except in compliance with the License. You may\nobtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\nimplied. See the License for the specific language governing\npermissions and limitations under the License.\n"
  },
  {
    "path": "pydoop/__init__.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n# DEV NOTE: some of the variables defined here (docstring included)\n# are parsed by setup.py, check it before modifying them.\n\n\"\"\"\nPydoop: a Python MapReduce and HDFS API for Hadoop\n--------------------------------------------------\n\nPydoop is a Python interface to Hadoop that allows you to write\nMapReduce applications and interact with HDFS in pure Python.\n\"\"\"\n\nimport os\nimport errno\nfrom importlib import import_module\nimport pydoop.hadoop_utils as hu\nfrom pydoop.utils.py3compat import configparser, parser_read\n\ntry:\n    from pydoop.version import version as __version__\nexcept ImportError:  # should only happen at compile time\n    __version__ = None\n_PATH_FINDER = hu.PathFinder()\n\n__author__ = \", \".join((\n    \"Simone Leo\",\n    \"Gianluigi Zanetti\",\n    \"Luca Pireddu\",\n    \"Francesco Cabras\",\n    \"Mauro Del Rio\",\n    \"Marco Enrico Piras\",\n))\n__author_email__ = \", \".join((\n    \"<simone.leo@crs4.it>\",\n    \"<gianluigi.zanetti@crs4.it>\",\n    \"<luca.pireddu@crs4.it>\",\n    \"<francesco.cabras@crs4.it>\",\n    \"<mauro@crs4.it>\",\n    \"<kikkomep@crs4.it>\",\n))\n__url__ = \"http://crs4.github.io/pydoop\"\n__propfile_basename__ = \"pydoop.properties\"\n\n\ndef reset():\n    _PATH_FINDER.reset()\n\n\ndef hadoop_home():\n    return _PATH_FINDER.hadoop_home()\n\n\ndef hadoop_conf():\n    return _PATH_FINDER.hadoop_conf()\n\n\ndef hadoop_params():\n    return _PATH_FINDER.hadoop_params()\n\n\ndef hadoop_classpath():\n    return _PATH_FINDER.hadoop_classpath()\n\n\ndef package_dir():\n    return os.path.dirname(os.path.abspath(__file__))\n\n\n##############################\n# Since Pydoop 1.0, we've stopped supporting installations for multiple\n# Hadoop versions, so we only have a single module, so the following\n# functions now return the same value regardless of the Hadoop version.\n##############################\n\ndef jar_name(hadoop_vinfo=None):\n    return \"pydoop.jar\"\n\n\ndef jar_path(hadoop_vinfo=None):\n    path = os.path.join(package_dir(), jar_name())\n    if os.path.exists(path):\n        return path\n    else:\n        return None\n\n\ndef complete_mod_name(module, hadoop_vinfo=None):\n    return \"%s.%s\" % (__package__, module)\n\n\ndef import_version_specific_module(name):\n    return import_module(name)\n\n\n# --- get properties ---\nPROP_FN = os.path.join(\n    os.path.dirname(os.path.abspath(__file__)), __propfile_basename__\n)\n\n\n# http://stackoverflow.com/questions/2819696\nclass AddSectionWrapper(object):\n\n    SEC_NAME = 'dummy'\n\n    def __init__(self, f):\n        self.f = f\n        self.sechead = '[dummy]' + os.linesep\n\n    def __iter__(self):\n        return self\n\n    def __next__(self):\n        line = self.readline()\n        if not line:\n            raise StopIteration\n        return line\n\n    def readline(self):\n        if self.sechead:\n            try:\n                return self.sechead\n            finally:\n                self.sechead = None\n        else:\n            return self.f.readline()\n\n\ndef read_properties(fname):\n    parser = configparser.SafeConfigParser()\n    parser.optionxform = str  # preserve key case\n    try:\n        with open(fname) as f:\n            parser_read(parser, AddSectionWrapper(f))\n    except IOError as e:\n        if e.errno != errno.ENOENT:\n            raise\n        return None  # compile time, prop file is not there\n    return dict(parser.items(AddSectionWrapper.SEC_NAME))\n\n\nclass LocalModeNotSupported(RuntimeError):\n    def __init__(self):\n        msg = 'ERROR: Hadoop is configured to run in local mode'\n        super(LocalModeNotSupported, self).__init__(msg)\n\n\ndef check_local_mode():\n    if _PATH_FINDER.is_local():\n        raise LocalModeNotSupported()\n"
  },
  {
    "path": "pydoop/app/__init__.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n"
  },
  {
    "path": "pydoop/app/argparse_types.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport argparse\nimport pydoop.hdfs as hdfs\n\n\ndef kv_pair(s):\n    try:\n        k, v = s.split(\"=\", 1)\n    except ValueError:\n        raise argparse.ArgumentTypeError(\"arg must be in the k=v form\")\n    return k, v\n\n\nclass UpdateMap(argparse.Action):\n    \"\"\"\\\n    Update the destination map with a K=V pair.\n\n    >>> parser = argparse.ArgumentParser()\n    >>> _ = parser.add_argument(\"-D\", metavar=\"K=V\", action=UpdateMap)\n    >>> args = parser.parse_args([\"-D\", \"k1=v1\", \"-D\", \"k2=v2\", \"-D\", \"k2=v3\"])\n    >>> args.D == {'k1': 'v1', 'k2': 'v3'}\n    True\n    \"\"\"\n\n    def __init__(self, option_strings, dest, **kwargs):\n        kwargs = {k: v for k, v in kwargs.items() if k in {\"help\", \"metavar\"}}\n        kwargs[\"type\"] = kv_pair\n        super(UpdateMap, self).__init__(option_strings, dest, **kwargs)\n\n    def __call__(self, parser, namespace, values, option_string=None):\n        if getattr(namespace, self.dest, None) is None:\n            setattr(namespace, self.dest, {})\n        getattr(namespace, self.dest).update([values])\n\n\ndef a_file_that_can_be_read(x):\n    with open(x, 'r'):\n        pass\n    return x\n\n\ndef a_hdfs_file(x):\n    _, _, _ = hdfs.path.split(x)\n    return x\n\n\ndef a_comma_separated_list(x):\n    # FIXME unclear how does one check for bad lists...\n    return x\n"
  },
  {
    "path": "pydoop/app/main.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nPydoop command line tool.\n\"\"\"\n\nimport os\nimport argparse\nimport importlib\nimport sys\n\n\nfrom pydoop.version import version\n\nSUBMOD_NAMES = [\n    \"script\",\n    \"submit\",\n]\n\nPYDOOP_CONF_FILE = \"~/.pydoop/pydoop.conf\"\n\n\nclass PatchedArgumentParser(argparse.ArgumentParser):\n    \"\"\"\n    This is a work-around for a bug in ArgumentParser that is triggered\n    when there is a zero length argument and fromfile_prefix_chars is\n    not None.\n    \"\"\"\n    def _read_args_from_files(self, arg_strings):\n        place_holder = \"abcjdkje-32333a290\"\n        assert not (place_holder in arg_strings)\n        args = [x if len(x) > 0 else place_holder for x in arg_strings]\n        new_args = super(PatchedArgumentParser,\n                         self)._read_args_from_files(args)\n        return [x if x != place_holder else '' for x in new_args]\n\n\ndef make_parser():\n    parser = PatchedArgumentParser(\n        description=\"Pydoop command line tool\",\n        formatter_class=argparse.ArgumentDefaultsHelpFormatter,\n        epilog=(\"Supports argparse @confile syntax \"),\n        fromfile_prefix_chars='@'\n    )\n    parser._pydoop_docs_helper = {}\n    parser.add_argument('-V', '--version', action='version', version=version,\n                        help='print version number and exit')\n    subparsers = parser.add_subparsers(help=\"sub-commands\")\n    for n in SUBMOD_NAMES:\n        mod = importlib.import_module(\"%s.%s\" % (__package__, n))\n        subp = mod.add_parser(subparsers)\n        parser._pydoop_docs_helper[n] = subp\n    return parser\n\n\ndef main(argv=None):\n    parser = make_parser()\n    if os.path.exists(PYDOOP_CONF_FILE):\n        argv = argv + ['@' + PYDOOP_CONF_FILE]\n    args, unknown = parser.parse_known_args(argv)\n    try:\n        if args.combiner_fn and not args.combine_fn:\n            args.combine_fn = args.combiner_fn  # backwards compatibility\n    except AttributeError:  # not the script app\n        pass\n    try:\n        func = args.func\n    except AttributeError:\n        parser.error(\"too few arguments\")\n    try:\n        func(args, unknown)\n    except RuntimeError as e:\n        sys.exit(\"ERROR - {}:  {}\".format(type(e).__name__, e))\n"
  },
  {
    "path": "pydoop/app/script.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nPydoop Script\n=============\n\nA quick and easy to use interface for running simple MapReduce jobs.\n\nPydoop script is a front-end to pydoop submit that automatically builds\na map-reduce program using functions contained in a user provided\npython module.\n\n\"\"\"\n\nimport os\nimport pydoop.utils as utils\nimport argparse\nfrom tempfile import NamedTemporaryFile\nfrom zipfile import ZipFile\nfrom .submit import PydoopSubmitter, add_parser_common_arguments\nfrom .script_template import DRIVER_TEMPLATE\n\nOUT_SEP_KEY = 'mapreduce.output.textoutputformat.separator'\nNOSEP_OUTPUT_FORMAT = 'it.crs4.pydoop.NoSeparatorTextOutputFormat'\n\nDESCRIPTION = \"Simplified interface for running simple MapReduce jobs\"\n\n\nclass PydoopScript(object):\n\n    def __init__(self, args, unknown_args):\n        self.script_archive = None\n        self.args = None\n        self.convert_args(args, unknown_args)\n\n    @staticmethod\n    def generate_driver(mr_module, args):\n        combine_fn = args.combine_fn or args.reduce_fn\n        combiner_wp = 'PydoopScriptCombiner' if args.combine_fn else 'None'\n        return DRIVER_TEMPLATE.substitute(\n            module=mr_module,\n            map_fn=args.map_fn,\n            reduce_fn=args.reduce_fn,\n            combine_fn=combine_fn,\n            combiner_wp=combiner_wp,\n        )\n\n    def convert_args(self, args, unknown_args):\n        # Create a zip archive containing all we need to run the\n        # script (including the script itself.  We use\n        # NamedTemporaryFile which will take care of deleting the temp\n        # archive once we're done\n        self.script_archive = NamedTemporaryFile(\n            prefix=\"pydoop_script_\",\n            suffix='.zip'\n        )\n        zip_filename = self.script_archive.name\n        # Create a one-off temporary file name to avoid name clashes\n        # in the distcache.  Keep the same module extension -- it may\n        # be a source file or a byte-compiled file\n        mr_module = utils.make_random_str(\n            prefix=\"pydoop_script_module_\",\n            postfix=os.path.basename(args.module)\n        )\n        mr_driver = utils.make_random_str(prefix=\"pydoop_script_driver_\")\n        with ZipFile(zip_filename, 'w') as zipf:\n            zipf.write(args.module, arcname=mr_module)\n            zipf.writestr(\n                mr_driver + '.py',\n                self.generate_driver(os.path.splitext(mr_module)[0], args)\n            )\n        if args.python_zip is None:\n            args.python_zip = [zip_filename]\n        else:\n            args.python_zip.append(zip_filename)\n        args.module = mr_driver\n        args.entry_point = 'main'\n        args.program = mr_driver\n        args.do_not_use_java_record_reader = False\n        args.do_not_use_java_record_writer = False\n        args.output_format = None\n        args.cache_file = None\n        args.cache_archive = None\n        args.upload_to_cache = None\n        args.libjars = None\n        args.conf = None\n        args.disable_property_name_conversion = True\n        args.avro_input = None\n        args.avro_output = None\n        args.keep_wd = False\n        args.pstats_dir = None\n        args.pstats_fmt = None\n\n        self.args, self.unknown_args = args, unknown_args\n\n    def run(self):\n        submitter = PydoopSubmitter()\n        if self.args.kv_separator is not None:\n            submitter.properties[OUT_SEP_KEY] = self.args.kv_separator\n        if submitter.properties.get(OUT_SEP_KEY) == '':\n            self.args.output_format = NOSEP_OUTPUT_FORMAT\n        submitter.set_args(self.args, self.unknown_args)\n        submitter.run()\n        return 0\n\n    def clean(self):\n        self.script_archive.close()\n\n\ndef run(args, unknown_args=None):\n    if unknown_args is None:\n        unknown_args = []\n    scripter = PydoopScript(args, unknown_args)\n    scripter.run()\n    scripter.clean()\n    return 0\n\n\ndef add_parser_arguments(parser):\n    parser.add_argument('module', metavar='MODULE', help='python module file')\n    parser.add_argument('input', metavar='INPUT', help='hdfs input path')\n    parser.add_argument('output', metavar='OUTPUT', help='hdfs output path')\n    parser.add_argument('-m', '--map-fn', metavar='MAP', default='mapper',\n                        help=\"name of map function within module\")\n    parser.add_argument('-r', '--reduce-fn', metavar='RED', default='reducer',\n                        help=\"name of reduce function within module\")\n    parser.add_argument('-c', '--combine-fn', metavar='COM', default=None,\n                        help=\"name of combine function within module\")\n    parser.add_argument('--combiner-fn', metavar='COM', default=None,\n                        help=\"--combine-fn alias for backwards compatibility\")\n    parser.add_argument('-t', '--kv-separator', metavar='SEP',\n                        help=\"output key-value separator\")\n\n\ndef add_parser(subparsers):\n    parser = subparsers.add_parser(\n        \"script\",\n        description=DESCRIPTION,\n        formatter_class=argparse.ArgumentDefaultsHelpFormatter,\n        epilog=(\"Hadoop pipes generic options are supported too.  \"\n                \"Run `hadoop pipes` for more information\")\n    )\n    add_parser_common_arguments(parser)\n    add_parser_arguments(parser)\n    parser.set_defaults(func=run)\n    return parser\n"
  },
  {
    "path": "pydoop/app/script_template.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport string\n\nDRIVER_TEMPLATE = string.Template(\"\"\"\\\nimport sys\nimport os\nimport inspect\n\nsys.path.insert(0, os.getcwd())\n\nimport pydoop.mapreduce.api as api  # noqa: E402\nimport pydoop.mapreduce.pipes as pipes  # noqa: E402\nimport ${module}  # noqa: E402\n\n\nclass ContextWriter(object):\n\n    def __init__(self, context):\n        self.context = context\n        self.counters = {}\n\n    def emit(self, k, v):\n        self.context.emit(k, v)\n\n    def count(self, what, howmany):\n        counter = self.counters.setdefault(\n            what, self.context.get_counter('${module}', what)\n        )\n        self.context.increment_counter(counter, howmany)\n\n    def status(self, msg):\n        self.context.set_status(msg)\n\n    def progress(self):\n        self.context.progress()\n\n\ndef setup_script_object(obj, fn_attr_name, user_fn, ctx):\n    # Generic constructor for both map and reduce objects.\n    #\n    # Sets the 'writer' and 'conf' attributes.  Then, based on the arity\n    # of the given user function (user_fn), sets the object attribute\n    # (fn_attr_name, which should be either 'map' or 'reduce') to point\n    # to either:\n    #\n    #   * obj.with_conf (when arity == 4)\n    #   * obj.without_conf (when arity == 3)\n    #\n    # This way, when pipes calls the map/reduce function of the object\n    # it actually gets either of the with_conf/without_conf functions\n    # (which must be defined by the PydoopScriptMapper or\n    # PydoopScriptReducer object passed into this function).\n    #\n    # Why all this?  The idea is to raise any decision about which\n    # function to call out of the map/reduce functions, which get called\n    # a number of times proportional to the amount of data to process.\n    # On the other hand, the constructor only gets called once per task.\n    if fn_attr_name not in ('map', 'reduce'):\n        raise RuntimeError('Unexpected function attribute ' + fn_attr_name)\n    obj.writer = ContextWriter(ctx)\n    obj.conf = ctx.get_job_conf()\n    spec = inspect.getargspec(user_fn)\n    if spec.varargs or len(spec.args) not in (3, 4):\n        raise ValueError(\n            user_fn +\n            ' must take parameters key, value, writer, and optionally config'\n        )\n    if len(spec.args) == 3:\n        setattr(obj, fn_attr_name, obj.without_conf)\n    elif len(spec.args) == 4:\n        setattr(obj, fn_attr_name, obj.with_conf)\n    else:\n        raise RuntimeError(\n            'Unexpected number of ${map_fn} arguments ' + len(spec.args)\n        )\n\n\nclass PydoopScriptMapper(api.Mapper):\n\n    def __init__(self, ctx):\n        super(PydoopScriptMapper, self).__init__(ctx)\n        setup_script_object(self, 'map', ${module}.${map_fn}, ctx)\n\n    def without_conf(self, ctx):\n        # old style map function, without the conf parameter\n        writer = ContextWriter(ctx)\n        ${module}.${map_fn}(ctx.key, ctx.value, writer)\n\n    def with_conf(self, ctx):\n        # new style map function, without the conf parameter\n        writer = ContextWriter(ctx)\n        ${module}.${map_fn}(ctx.key, ctx.value, writer, self.conf)\n\n    def map(self, ctx):\n        pass\n\n\nclass PydoopScriptReducer(api.Reducer):\n\n    def __init__(self, ctx):\n        super(PydoopScriptReducer, self).__init__(ctx)\n        setup_script_object(self, 'reduce', ${module}.${reduce_fn}, ctx)\n\n    def without_conf(self, ctx):\n        writer = ContextWriter(ctx)\n        ${module}.${reduce_fn}(ctx.key, ctx.values, writer)\n\n    def with_conf(self, ctx):\n        writer = ContextWriter(ctx)\n        ${module}.${reduce_fn}(ctx.key, ctx.values, writer, self.conf)\n\n    def reduce(self, ctx):\n        pass\n\n\nclass PydoopScriptCombiner(api.Reducer):\n\n    def __init__(self, ctx):\n        super(PydoopScriptCombiner, self).__init__(ctx)\n        setup_script_object(self, 'reduce', ${module}.${combine_fn}, ctx)\n\n    def without_conf(self, ctx):\n        writer = ContextWriter(ctx)\n        ${module}.${combine_fn}(ctx.key, ctx.values, writer)\n\n    def with_conf(self, ctx):\n        writer = ContextWriter(ctx)\n        ${module}.${combine_fn}(ctx.key, ctx.values, writer, self.conf)\n\n    def reduce(self, ctx):\n        pass\n\n\ndef main():\n    pipes.run_task(pipes.Factory(\n        PydoopScriptMapper, PydoopScriptReducer,\n        record_reader_class=None,\n        record_writer_class=None,\n        combiner_class=${combiner_wp},\n        partitioner_class=None))\n\"\"\")\n"
  },
  {
    "path": "pydoop/app/submit.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nAn interface to simplify pydoop jobs submission.\n\"\"\"\n\nimport os\nimport sys\nimport glob\nimport argparse\nimport logging\nimport uuid\nlogging.basicConfig(level=logging.INFO)\n\nimport pydoop\nimport pydoop.hdfs as hdfs\nimport pydoop.hadut as hadut\nimport pydoop.utils as utils\nimport pydoop.utils.conversion_tables as conv_tables\nfrom pydoop.mapreduce.api import AVRO_IO_MODES\nfrom pydoop.mapreduce.pipes import PSTATS_DIR, PSTATS_FMT\n\nfrom .argparse_types import a_file_that_can_be_read, UpdateMap\nfrom .argparse_types import a_comma_separated_list, a_hdfs_file\n\n\nDEFAULT_ENTRY_POINT = '__main__'\nIS_JAVA_RR = \"mapreduce.pipes.isjavarecordreader\"\nIS_JAVA_RW = \"mapreduce.pipes.isjavarecordwriter\"\nCACHE_FILES = \"mapreduce.job.cache.files\"\nCACHE_ARCHIVES = \"mapreduce.job.cache.archives\"\nUSER_HOME = \"mapreduce.admin.user.home.dir\"\nJOB_REDUCES = \"mapreduce.job.reduces\"\nJOB_NAME = \"mapreduce.job.name\"\nCOMPRESS_MAP_OUTPUT = \"mapreduce.map.output.compress\"\n\n\nclass PydoopSubmitter(object):\n    \"\"\"\n    Builds and launches pydoop jobs.\n    \"\"\"\n    DESCRIPTION = \"Simplified pydoop jobs submission\"\n\n    def __init__(self):\n        pydoop.check_local_mode()\n        self.logger = logging.getLogger(\"PydoopSubmitter\")\n        self.properties = {\n            CACHE_FILES: '',\n            CACHE_ARCHIVES: '',\n            'mapred.create.symlink': 'yes',  # backward compatibility\n            COMPRESS_MAP_OUTPUT: 'true',\n        }\n        self.args = None\n        self.requested_env = dict()\n        self.remote_wd = None\n        self.remote_module = None\n        self.remote_module_bn = None\n        self.remote_exe = None\n        self.pipes_code = None\n        self.files_to_upload = []\n        self.unknown_args = None\n\n    @staticmethod\n    def __cache_archive_link(archive_name):\n        # XXX: should we really be dropping the extension from the link name?\n        return os.path.splitext(os.path.basename(archive_name))[0]\n\n    def __set_files_to_cache_helper(self, prop, upload_and_cache, cache):\n        cfiles = self.properties[prop] if self.properties[prop] else []\n        cfiles += cache if cache else []\n        if upload_and_cache:\n            upf_to_cache = [\n                ('file://' + os.path.realpath(e),\n                 hdfs.path.join(self.remote_wd, bn),\n                 bn if prop == CACHE_FILES else self.__cache_archive_link(e))\n                for (e, bn) in ((e, os.path.basename(e))\n                                for e in upload_and_cache)\n            ]\n            self.files_to_upload += upf_to_cache\n            for t in self.files_to_upload:\n                if not hdfs.path.isfile(t[0]):\n                    raise RuntimeError(\"not a file: %r\" % (t[0]))\n            cached_files = [\"%s#%s\" % (h, b) for (_, h, b) in upf_to_cache]\n            cfiles += cached_files\n        self.properties[prop] = ','.join(cfiles)\n\n    def __set_files_to_cache(self, args):\n        if args.upload_file_to_cache is None:\n            args.upload_file_to_cache = []\n        self.__set_files_to_cache_helper(CACHE_FILES,\n                                         args.upload_file_to_cache,\n                                         args.cache_file)\n\n    def __set_archives_to_cache(self, args):\n        if args.upload_archive_to_cache is None:\n            args.upload_archive_to_cache = []\n        if args.python_zip:\n            args.upload_archive_to_cache += args.python_zip\n        self.__set_files_to_cache_helper(CACHE_ARCHIVES,\n                                         args.upload_archive_to_cache,\n                                         args.cache_archive)\n\n    @staticmethod\n    def _env_arg_to_dict(set_env_list):\n        retval = dict()\n        for item in set_env_list:\n            try:\n                name, value = item.split('=', 1)\n                retval[name.strip()] = value.strip()\n            except ValueError:\n                raise RuntimeError(\n                    \"Bad syntax in env variable argument '%s'\" % item\n                )\n        return retval\n\n    def set_args(self, args, unknown_args=None):\n        \"\"\"\n        Configure job, based on the arguments provided.\n        \"\"\"\n        if unknown_args is None:\n            unknown_args = []\n        self.logger.setLevel(getattr(logging, args.log_level))\n\n        parent = hdfs.path.dirname(hdfs.path.abspath(args.output.rstrip(\"/\")))\n        self.remote_wd = hdfs.path.join(\n            parent, utils.make_random_str(prefix=\"pydoop_submit_\")\n        )\n        self.remote_exe = hdfs.path.join(self.remote_wd, str(uuid.uuid4()))\n        self.properties[JOB_NAME] = args.job_name or 'pydoop'\n        self.properties[IS_JAVA_RR] = (\n            'false' if args.do_not_use_java_record_reader else 'true'\n        )\n        self.properties[IS_JAVA_RW] = (\n            'false' if args.do_not_use_java_record_writer else 'true'\n        )\n        if args.num_reducers is not None:\n            self.properties[JOB_REDUCES] = args.num_reducers\n        if args.job_name:\n            self.properties[JOB_NAME] = args.job_name\n        self.properties.update(args.job_conf or {})\n        self.__set_files_to_cache(args)\n        self.__set_archives_to_cache(args)\n        self.requested_env = self._env_arg_to_dict(args.set_env or [])\n        self.args = args\n        self.unknown_args = unknown_args\n\n    def __warn_user_if_wd_maybe_unreadable(self, abs_remote_path):\n        \"\"\"\n        Check directories above the remote module and issue a warning if\n        they are not traversable by all users.\n\n        The reasoning behind this is mainly aimed at set-ups with a\n        centralized Hadoop cluster, accessed by all users, and where\n        the Hadoop task tracker user is not a superuser; an example\n        may be if you're running a shared Hadoop without HDFS (using\n        only a POSIX shared file system).  The task tracker correctly\n        changes user to the job requester's user for most operations,\n        but not when initializing the distributed cache, so jobs who\n        want to place files not accessible by the Hadoop user into\n        dist cache fail.\n        \"\"\"\n        host, port, path = hdfs.path.split(abs_remote_path)\n        if host == '' and port == 0:  # local file system\n            host_port = \"file:///\"\n        else:\n            # FIXME: this won't work with any scheme other than\n            # hdfs:// (e.g., s3)\n            host_port = \"hdfs://%s:%s/\" % (host, port)\n        path_pieces = path.strip('/').split(os.path.sep)\n        fs = hdfs.hdfs(host, port)\n        for i in range(0, len(path_pieces)):\n            part = os.path.join(\n                host_port, os.path.sep.join(path_pieces[0: i + 1])\n            )\n            permissions = fs.get_path_info(part)['permissions']\n            if permissions & 0o111 != 0o111:\n                self.logger.warning(\n                    (\"remote module %s may not be readable by the task \"\n                     \"tracker when initializing the distributed cache.  \"\n                     \"Permissions on %s: %s\"),\n                    abs_remote_path, part, oct(permissions)\n                )\n                break\n\n    def _generate_pipes_code(self):\n        env = dict()\n        for e in ('LD_LIBRARY_PATH', 'PATH', 'PYTHONPATH'):\n            env[e] = ''\n        lines = []\n        if not self.args.no_override_env and not self.args.no_override_ld_path:\n            env['LD_LIBRARY_PATH'] = os.environ.get('LD_LIBRARY_PATH', '')\n        if not self.args.no_override_env and not self.args.no_override_path:\n            env['PATH'] = os.environ.get('PATH', '')\n\n        if not self.args.no_override_env and not self.args.no_override_pypath:\n            env['PYTHONPATH'] = os.environ.get('PYTHONPATH', '')\n        else:\n            env['PYTHONPATH'] = \"${PYTHONPATH}\"\n\n        # set user-requested env variables\n        for var, value in self.requested_env.items():\n            env[var] = value\n\n        if self.args.pstats_dir:\n            env[PSTATS_DIR] = self.args.pstats_dir\n            if self.args.pstats_fmt:\n                env[PSTATS_FMT] = self.args.pstats_fmt\n\n        executable = self.args.python_program\n        if self.args.python_zip:\n            env['PYTHONPATH'] = ':'.join([\n                self.__cache_archive_link(ar) for ar in self.args.python_zip\n            ] + [env['PYTHONPATH']])\n        # Note that we have to explicitly put the working directory\n        # in the python path otherwise it will miss cached modules and\n        # packages.\n        env['PYTHONPATH'] = \"${PWD}:\" + env['PYTHONPATH']\n\n        lines.append(\"#!/bin/bash\")\n        lines.append('\"\"\":\"')\n        if self.args.log_level == \"DEBUG\":\n            lines.append(\"printenv 1>&2\")\n            lines.append(\"echo PWD=${PWD} 1>&2\")\n            lines.append(\"echo ls -l; ls -l  1>&2\")\n        if (\n            USER_HOME not in self.properties and\n            \"HOME\" in os.environ and\n            not self.args.no_override_home\n        ):\n            lines.append('export HOME=\"%s\"' % os.environ['HOME'])\n        # set environment variables\n        for var, value in env.items():\n            if value:\n                self.logger.debug(\"Setting env variable %s=%s\", var, value)\n                lines.append('export %s=\"%s\"' % (var, value))\n        if self.args.log_level == \"DEBUG\":\n            lines.append(\"echo PATH=${PATH} 1>&2\")\n            lines.append(\"echo LD_LIBRARY_PATH=${LD_LIBRARY_PATH} 1>&2\")\n            lines.append(\"echo PYTHONPATH=${PYTHONPATH} 1>&2\")\n            lines.append(\"echo HOME=${HOME} 1>&2\")\n            lines.append('echo \"executable is $(type -P %s)\" 1>&2' %\n                         executable)\n        cmd = 'exec \"%s\" -u \"$0\" \"$@\"' % executable\n        if self.args.log_level == 'DEBUG':\n            lines.append(\"echo cmd to execute: %s\" % cmd)\n        lines.append(cmd)\n        lines.append('\":\"\"\"')\n        if self.args.log_level == \"DEBUG\":\n            lines.append('import sys')\n            lines.append('sys.stderr.write(\"%r\\\\n\" % sys.path)')\n            lines.append('sys.stderr.write(\"%s\\\\n\" % sys.version)')\n        lines.append('import %s as module' % self.args.module)\n        lines.append('module.%s()' % self.args.entry_point)\n        return os.linesep.join(lines) + os.linesep\n\n    def __validate(self):\n        if not hdfs.path.exists(self.args.input):\n            raise RuntimeError(\n                \"Input path %r does not exist\" % (self.args.input,)\n            )\n        if hdfs.path.exists(self.args.output):\n            raise RuntimeError(\n                \"Output path %r already exists\" % (self.args.output,)\n            )\n\n    def __clean_wd(self):\n        if self.remote_wd:\n            try:\n                self.logger.debug(\n                    \"Removing temporary working directory %s\", self.remote_wd\n                )\n                hdfs.rm(self.remote_wd)\n            except IOError:\n                pass\n\n    def __setup_remote_paths(self):\n        \"\"\"\n        Actually create the working directory and copy the module into it.\n\n        Note: the script has to be readable by Hadoop; though this may not\n        generally be a problem on HDFS, where the Hadoop user is usually\n        the superuser, things may be different if our working directory is\n        on a shared POSIX filesystem.  Therefore, we make the directory\n        and the script accessible by all.\n        \"\"\"\n        self.logger.debug(\"remote_wd: %s\", self.remote_wd)\n        self.logger.debug(\"remote_exe: %s\", self.remote_exe)\n        self.logger.debug(\"remotes: %s\", self.files_to_upload)\n        if self.args.module:\n            self.logger.debug(\n                'Generated pipes_code:\\n\\n %s', self._generate_pipes_code()\n            )\n        if not self.args.pretend:\n            hdfs.mkdir(self.remote_wd)\n            hdfs.chmod(self.remote_wd, \"a+rx\")\n            self.logger.debug(\"created and chmod-ed: %s\", self.remote_wd)\n            pipes_code = self._generate_pipes_code()\n            hdfs.dump(pipes_code, self.remote_exe)\n            self.logger.debug(\"dumped pipes_code to: %s\", self.remote_exe)\n            hdfs.chmod(self.remote_exe, \"a+rx\")\n            self.__warn_user_if_wd_maybe_unreadable(self.remote_wd)\n            for (l, h, _) in self.files_to_upload:\n                self.logger.debug(\"uploading: %s to %s\", l, h)\n                hdfs.cp(l, h)\n        self.logger.debug(\"Created%sremote paths:\" %\n                          (' [simulation] ' if self.args.pretend else ' '))\n\n    def run(self):\n        if self.args is None:\n            raise RuntimeError(\"cannot run without args, please call set_args\")\n        if not self.args.pretend:\n            self.__validate()\n        pydoop_classpath = []\n        libjars = []\n        if self.args.libjars:\n            libjars.extend(self.args.libjars)\n        if self.args.avro_input or self.args.avro_output:\n            # append Pydoop's avro-mapred jar.  Don't put it at the front of\n            # the list or the user won't be able to override it.\n            avro_jars = glob.glob(os.path.join(\n                pydoop.package_dir(), \"avro*.jar\"\n            ))\n            pydoop_classpath.extend(avro_jars)\n            libjars.extend(avro_jars)\n        pydoop_jar = pydoop.jar_path()\n        if pydoop_jar is None:\n            raise RuntimeError(\"Can't find pydoop.jar\")\n        job_args = []\n        submitter_class = 'it.crs4.pydoop.mapreduce.pipes.Submitter'\n        pydoop_classpath.append(pydoop_jar)\n        libjars.append(pydoop_jar)\n        self.logger.debug(\"Submitter class: %s\", submitter_class)\n        if self.args.hadoop_conf:\n            job_args.extend(['-conf', self.args.hadoop_conf.name])\n        if self.args.input_format:\n            job_args.extend(['-inputformat', self.args.input_format])\n        if self.args.output_format:\n            job_args.extend(['-writer', self.args.output_format])\n        job_args.extend(['-input', self.args.input])\n        job_args.extend(['-output', self.args.output])\n        job_args.extend(['-program', self.remote_exe])\n        if libjars:\n            job_args.extend([\"-libjars\", ','.join(libjars)])\n        if self.args.avro_input:\n            job_args.extend(['-avroInput', self.args.avro_input])\n        if self.args.avro_output:\n            job_args.extend(['-avroOutput', self.args.avro_output])\n        if not self.args.disable_property_name_conversion:\n            ctable = conv_tables.mrv1_to_mrv2\n            props = [\n                (ctable.get(k, k), v) for (k, v) in self.properties.items()\n            ]\n            self.properties = dict(props)\n            self.logger.debug(\"properties after projection: %r\",\n                              self.properties)\n        try:\n            self.__setup_remote_paths()\n            executor = (hadut.run_class if not self.args.pretend\n                        else self.fake_run_class)\n            executor(submitter_class, args=job_args,\n                     properties=self.properties, classpath=pydoop_classpath,\n                     logger=self.logger, keep_streams=False)\n            self.logger.info(\"Done\")\n        finally:\n            if not self.args.keep_wd:\n                self.__clean_wd()\n\n    def fake_run_class(self, *args, **kwargs):\n        kwargs['logger'].info(\"Fake run class\")\n        repr_list = [repr(_) for _ in args]\n        repr_list.extend('%s=%r' % (k, v) for k, v in kwargs.items())\n        sys.stdout.write(\"hadut.run_class(%s)\\n\" % ', '.join(repr_list))\n\n\ndef run(args, unknown_args=None):\n    if unknown_args is None:\n        unknown_args = []\n    script = PydoopSubmitter()\n    script.set_args(args, unknown_args)\n    script.run()\n    return 0\n\n\ndef add_parser_common_arguments(parser):\n    parser.add_argument(\n        '--num-reducers', metavar='INT', type=int,\n        help=\"Number of reduce tasks. Specify 0 to only perform map phase\"\n    )\n    parser.add_argument(\n        '--no-override-home', action='store_true',\n        help=(\"Don't set the script's HOME directory to the $HOME in your \"\n              \"environment.  Hadoop will set it to the value of the \"\n              \"'mapreduce.admin.user.home.dir' property\")\n    )\n    parser.add_argument(\n        '--no-override-env', action='store_true',\n        help=(\"Use the default PATH, LD_LIBRARY_PATH and PYTHONPATH, instead \"\n              \"of copying them from the submitting client node\")\n    )\n    parser.add_argument(\n        '--no-override-ld-path', action='store_true',\n        help=(\"Use the default LD_LIBRARY_PATH instead of copying it from the \"\n              \"submitting client node\")\n    )\n    parser.add_argument(\n        '--no-override-pypath', action='store_true',\n        help=(\"Use the default PYTHONPATH instead of copying it from the \"\n              \"submitting client node\")\n    )\n    parser.add_argument(\n        '--no-override-path', action='store_true',\n        help=(\"Use the default PATH instead of copying it from the \"\n              \"submitting client node\")\n    )\n    parser.add_argument(\n        '--set-env', metavar=\"VAR=VALUE\", type=str, action=\"append\",\n        help=(\"Set environment variables for the tasks. If a variable \"\n              \"is set to '', it will not be overridden by Pydoop.\")\n    )\n    parser.add_argument(\n        '-D', '--job-conf', metavar='NAME=VALUE', action=UpdateMap,\n        help='Set a Hadoop property, e.g., -D mapreduce.job.priority=high'\n    )\n    parser.add_argument(\n        '--python-zip', metavar='ZIP_FILE', type=a_file_that_can_be_read,\n        action=\"append\", help=\"Additional python zip file\"\n    )\n    parser.add_argument(\n        '--upload-file-to-cache', metavar='FILE', type=a_file_that_can_be_read,\n        action=\"append\",\n        help=\"Upload and add this file to the distributed cache.\"\n    )\n    parser.add_argument(\n        '--upload-archive-to-cache', metavar='FILE',\n        type=a_file_that_can_be_read, action=\"append\",\n        help=\"Upload and add this archive file to the distributed cache.\"\n    )\n    parser.add_argument(\n        '--log-level', metavar=\"LEVEL\", default=\"INFO\", help=\"Logging level\",\n        choices=[\"DEBUG\", \"INFO\", \"WARNING\", \"ERROR\", \"CRITICAL\", \"FATAL\"]\n    )\n    parser.add_argument(\n        '--job-name', metavar='NAME', type=str, help=\"name of the job\"\n    )\n    parser.add_argument(\n        '--python-program', metavar='PYTHON', type=str, default=sys.executable,\n        help=\"python executable that should be used by the wrapper\"\n    )\n    parser.add_argument(\n        '--pretend', action='store_true',\n        help=(\"Do not actually submit a job, print the generated config \"\n              \"settings and the command line that would be invoked\")\n    )\n    parser.add_argument(\n        '--hadoop-conf', metavar='HADOOP_CONF_FILE',\n        type=a_file_that_can_be_read,\n        help=\"Hadoop configuration file\"\n    )\n    parser.add_argument(\n        '--input-format', metavar='CLASS', type=str,\n        help=\"java classname of InputFormat\"\n    )\n\n\ndef add_parser_arguments(parser):\n    parser.add_argument(\n        'module', metavar='MODULE', type=str,\n        help=(\"The module containing the Python MapReduce program\")\n    )\n    parser.add_argument(\n        'input', metavar='INPUT', help='input path to the maps',\n    )\n    parser.add_argument(\n        'output', metavar='OUTPUT', help='output path from the reduces',\n    )\n    parser.add_argument(\n        '--disable-property-name-conversion', action='store_true',\n        help=\"Do not adapt property names to the hadoop version used.\"\n    )\n    parser.add_argument(\n        '--do-not-use-java-record-reader', action='store_true',\n        help=\"Disable java RecordReader\"\n    )\n    parser.add_argument(\n        '--do-not-use-java-record-writer', action='store_true',\n        help=\"Disable java RecordWriter\"\n    )\n    parser.add_argument(\n        '--output-format', metavar='CLASS', type=str,\n        help=\"java classname of OutputFormat\"\n    )\n    parser.add_argument(\n        '--libjars', metavar='JAR_FILE', type=a_comma_separated_list,\n        action=\"append\", help=\"Additional comma-separated list of jar files\"\n    )\n    parser.add_argument(\n        '--cache-file', metavar='HDFS_FILE', type=a_hdfs_file,\n        action=\"append\",\n        help=\"Add this HDFS file to the distributed cache as a file.\"\n    )\n    parser.add_argument(\n        '--cache-archive', metavar='HDFS_FILE', type=a_hdfs_file,\n        action=\"append\",\n        help=\"Add this HDFS archive file to the distributed cache\" +\n             \"as an archive.\"\n    )\n    parser.add_argument(\n        '--entry-point', metavar='ENTRY_POINT', type=str,\n        default=DEFAULT_ENTRY_POINT,\n        help=(\"Explicitly execute MODULE.ENTRY_POINT() \"\n              \"in the launcher script.\")\n    )\n    parser.add_argument(\n        '--avro-input', metavar='k|v|kv', choices=AVRO_IO_MODES,\n        help=\"Avro input mode (key, value or both)\",\n    )\n    parser.add_argument(\n        '--avro-output', metavar='k|v|kv', choices=AVRO_IO_MODES,\n        help=\"Avro output mode (key, value or both)\",\n    )\n    parser.add_argument(\n        '--pstats-dir', metavar=\"HDFS_DIR\", type=str,\n        help=\"Profile each task and store stats in this dir\"\n    )\n    parser.add_argument(\n        '--pstats-fmt', metavar=\"STRING\", type=str,\n        help=\"pstats filename pattern (expert use only)\"\n    )\n    parser.add_argument(\n        '--keep-wd', action='store_true', help=\"Don't remove the work dir\"\n    )\n\n\ndef add_parser(subparsers):\n    parser = subparsers.add_parser(\n        \"submit\",\n        description=PydoopSubmitter.DESCRIPTION,\n        formatter_class=argparse.ArgumentDefaultsHelpFormatter,\n    )\n    add_parser_common_arguments(parser)\n    add_parser_arguments(parser)\n    parser.set_defaults(func=run)\n    return parser\n"
  },
  {
    "path": "pydoop/avrolib.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nAvro tools.\n\"\"\"\n# DEV NOTE: since Avro is not a requirement, do *not* import this\n# module unconditionally anywhere in the main code (importing it in\n# the Avro examples is OK, ofc).\n\nimport sys\nimport avro.schema\nfrom avro.datafile import DataFileReader, DataFileWriter\nfrom avro.io import DatumReader, DatumWriter, BinaryDecoder, BinaryEncoder\n\nfrom pydoop.mapreduce.api import RecordWriter, RecordReader\nimport pydoop.hdfs as hdfs\nfrom pydoop.utils.py3compat import StringIO\n\n\nparse = avro.schema.Parse if sys.version_info[0] == 3 else avro.schema.parse\n\n\nclass Deserializer(object):\n\n    def __init__(self, schema_str):\n        schema = parse(schema_str)\n        self.reader = DatumReader(schema)\n\n    def deserialize(self, rec_bytes):\n        return self.reader.read(BinaryDecoder(StringIO(rec_bytes)))\n\n\nclass Serializer(object):\n\n    def __init__(self, schema_str):\n        schema = parse(schema_str)\n        self.writer = DatumWriter(schema)\n\n    def serialize(self, record):\n        f = StringIO()\n        encoder = BinaryEncoder(f)\n        self.writer.write(record, encoder)\n        return f.getvalue()\n\n\ntry:\n    from pyavroc import AvroDeserializer\nexcept ImportError:\n    AvroDeserializer = Deserializer\n\ntry:\n    from pyavroc import AvroSerializer\nexcept ImportError:\n    AvroSerializer = Serializer\n\n\nclass SeekableDataFileReader(DataFileReader):\n\n    FORWARD_WINDOW_SIZE = 8192\n\n    def align_after(self, offset):\n        \"\"\"\n        Search for a sync point after offset and align just after that.\n        \"\"\"\n        f = self.reader\n        if offset <= 0:  # FIXME what is a negative offset??\n            f.seek(0)\n            self._block_count = 0\n            self._read_header()  # FIXME we can't estimate how big it is...\n            return\n        sm = self.sync_marker\n        sml = len(sm)\n        pos = offset\n        while pos < self.file_length - sml:\n            f.seek(pos)\n            data = f.read(self.FORWARD_WINDOW_SIZE)\n            sync_offset = data.find(sm)\n            if sync_offset > -1:\n                f.seek(pos + sync_offset)\n                self._block_count = 0\n                return\n            pos += len(data)\n\n\n# FIXME this is just an example with no error checking\nclass AvroReader(RecordReader):\n    \"\"\"\n    Avro data file reader.\n\n    Reads all data blocks that begin within the given input split.\n    \"\"\"\n    def __init__(self, ctx):\n        super(AvroReader, self).__init__(ctx)\n        isplit = ctx.input_split\n        self.region_start = isplit.offset\n        self.region_end = isplit.offset + isplit.length\n        self.reader = SeekableDataFileReader(hdfs.open(isplit.filename),\n                                             DatumReader())\n        self.reader.align_after(isplit.offset)\n\n    def next(self):\n        pos = self.reader.reader.tell()\n        if pos > self.region_end and self.reader._block_count == 0:\n            raise StopIteration\n        record = next(self.reader)\n        return pos, record\n\n    def get_progress(self):\n        \"\"\"\n        Give a rough estimate of the progress done.\n        \"\"\"\n        pos = self.reader.reader.tell()\n        return min((pos - self.region_start) /\n                   float(self.region_end - self.region_start),\n                   1.0)\n\n\n# FIXME this is just an example with no error checking\nclass AvroWriter(RecordWriter):\n\n    schema = None\n\n    def __init__(self, context):\n        super(AvroWriter, self).__init__(context)\n        job_conf = context.job_conf\n        part = int(job_conf['mapreduce.task.partition'])\n        outdir = job_conf[\"mapreduce.task.output.dir\"]\n        outfn = \"%s/part-r-%05d.avro\" % (outdir, part)\n        wh = hdfs.open(outfn, \"w\")\n        self.writer = DataFileWriter(wh, DatumWriter(), self.schema)\n\n    def close(self):\n        self.writer.close()\n        # FIXME do we really need to explicitly close the filesystem?\n        self.writer.writer.fs.close()\n"
  },
  {
    "path": "pydoop/hadoop_utils.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n# DEV NOTE: this module is used by the setup script, so it MUST be\n# importable even if Pydoop has not been installed (yet).\n\n\"\"\"\nTools for retrieving Hadoop-related information.\n\"\"\"\n\nimport os\nimport glob\nimport re\nimport platform\nimport subprocess\nimport xml.dom.minidom as dom\nfrom xml.parsers.expat import ExpatError\n\n\nclass HadoopXMLError(Exception):\n    pass\n\n\ndef extract_text(node):\n    return \"\".join(\n        c.data.strip() for c in node.childNodes if c.nodeType == c.TEXT_NODE\n    )\n\n\ndef parse_hadoop_conf_file(fn):\n    items = []\n    try:\n        doc = dom.parse(fn)\n    except ExpatError as e:\n        raise HadoopXMLError(\"not a valid XML file (%s)\" % e)\n    conf = doc.documentElement\n    if conf.nodeName != \"configuration\":\n        raise HadoopXMLError(\"not a valid Hadoop configuration file\")\n    props = [n for n in conf.childNodes if n.nodeName == \"property\"]\n    nv = {}\n    for p in props:\n        for n in p.childNodes:\n            if n.childNodes:\n                nv[n.nodeName] = extract_text(n)\n        try:\n            items.append((nv[\"name\"], nv[\"value\"]))\n        except KeyError:\n            pass\n    return dict(items)\n\n\nclass PathFinder(object):\n    \"\"\"\n    Encapsulates the logic to find paths and other info required by Pydoop.\n    \"\"\"\n    def __init__(self):\n        self.__hadoop_home = None\n        self.__hadoop_conf = None\n        self.__hadoop_params = None\n        self.__hadoop_classpath = None\n        self.__is_local = None\n\n    def reset(self):\n        self.__init__()\n\n    # note that this can be None even after trying detection\n    def hadoop_home(self):\n        if not self.__hadoop_home:\n            hh = os.getenv(\"HADOOP_HOME\", os.getenv(\"HADOOP_PREFIX\"))\n            if not hh:\n                exe = subprocess.check_output(\n                    \"command -v hadoop\", shell=True, universal_newlines=True\n                ).strip()\n                candidate, child = os.path.split(os.path.dirname(exe))\n                if child == \"bin\" and os.path.isdir(candidate):\n                    hh = os.environ[\"HADOOP_HOME\"] = candidate\n            self.__hadoop_home = hh\n        return self.__hadoop_home\n\n    def hadoop_conf(self):\n        if not self.__hadoop_conf:\n            error = \"Hadoop config not found, try setting HADOOP_CONF_DIR\"\n            try:\n                self.__hadoop_conf = os.environ[\"HADOOP_CONF_DIR\"]\n            except KeyError:\n                hh = self.hadoop_home()\n                if not hh:\n                    raise RuntimeError(error)\n                candidate = os.path.join(hh, 'etc', 'hadoop')\n                if not os.path.isdir(candidate):\n                    raise RuntimeError(error)\n                self.__hadoop_conf = os.environ[\"HADOOP_CONF_DIR\"] = candidate\n        return self.__hadoop_conf\n\n    def hadoop_params(self):\n        if not self.__hadoop_params:\n            params = {}\n            hadoop_conf = self.hadoop_conf()\n            for n in \"hadoop\", \"core\", \"hdfs\", \"mapred\":\n                fn = os.path.join(hadoop_conf, \"%s-site.xml\" % n)\n                try:\n                    params.update(parse_hadoop_conf_file(fn))\n                except (IOError, HadoopXMLError):\n                    pass  # silently ignore, as in Hadoop\n            self.__hadoop_params = params\n        return self.__hadoop_params\n\n    def hadoop_classpath(self):\n        if not self.__hadoop_classpath:\n            cp = subprocess.check_output(\n                \"hadoop classpath --glob\", shell=True, universal_newlines=True\n            ).strip()\n            # older hadoop versions ignore --glob\n            if 'hadoop-common' not in cp:\n                cp = ':'.join(':'.join(glob.iglob(_)) for _ in cp.split(':'))\n            self.__hadoop_classpath = cp\n        return self.__hadoop_classpath\n\n    def __get_is_local(self):\n        conf = self.hadoop_params()\n        keys = ('mapreduce.framework.name',\n                'mapreduce.jobtracker.address',\n                'mapred.job.tracker')\n        for k in keys:\n            if conf.get(k, 'local').lower() != 'local':\n                return False\n        return True\n\n    def is_local(self):\n        \"\"\"\\\n        Is Hadoop configured to run in local mode?\n\n        By default, it is. [pseudo-]distributed mode must be\n        explicitly configured.\n        \"\"\"\n        if self.__is_local is None:\n            self.__is_local = self.__get_is_local()\n        return self.__is_local\n"
  },
  {
    "path": "pydoop/hadut.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nProvides access to some functionalities available via the Hadoop shell.\n\"\"\"\n\nimport os\nimport shlex\nimport subprocess\n\nimport pydoop.utils.misc as utils\nimport pydoop.hdfs as hdfs\nfrom .utils.py3compat import basestring\n\n\n# --- FIXME: perhaps we need a more sophisticated tool for setting args ---\nGENERIC_ARGS = frozenset([\n    \"-conf\", \"-D\", \"-fs\", \"-jt\", \"-files\", \"-libjars\", \"-archives\"\n])\n\nCSV_ARGS = frozenset([\n    \"-files\", \"-libjars\", \"-archives\"\n])\n\n\n# generic args must go before command-specific args\ndef _pop_generic_args(args):\n    generic_args = []\n    i = len(args) - 1\n    while i >= 0:\n        if args[i] in GENERIC_ARGS:\n            try:\n                args[i + 1]\n            except IndexError:\n                raise ValueError(\"option %s has no value\" % args[i])\n            generic_args.extend(args[i: i + 2])\n            del args[i: i + 2]\n        i -= 1\n    return generic_args\n\n\n# -files f1 -files f2 --> -files f1,f2\ndef _merge_csv_args(args):\n    merge_map = {}\n    i = len(args) - 1\n    while i >= 0:\n        if args[i] in CSV_ARGS:\n            try:\n                args[i + 1]\n            except IndexError:\n                raise ValueError(\"option %s has no value\" % args[i])\n            k, v = args[i: i + 2]\n            merge_map.setdefault(k, []).append(v.strip())\n            del args[i: i + 2]\n        i -= 1\n    for k, vlist in merge_map.items():\n        args.extend([k, \",\".join(vlist)])\n\n# FIXME: the above functions share a lot of code\n# -------------------------------------------------------------------------\n\n\ndef _construct_property_args(prop_dict):\n    return sum((['-D', '%s=%s' % p] for p in prop_dict.items()), [])\n\n\n# inherits from RuntimeError for backwards compatibility\nclass RunCmdError(RuntimeError):\n    \"\"\"\n    Raised by :func:`run_tool_cmd` and all functions that make\n    use of it to indicate that the call failed (returned non-zero).\n    \"\"\"\n    def __init__(self, returncode, cmd, output=None):\n        RuntimeError.__init__(self, output)\n        self.returncode = returncode\n        self.cmd = cmd\n\n    def __str__(self):\n        m = RuntimeError.__str__(self)\n        if m:\n            return m  # mimic old run_cmd behaviour\n        else:\n            return \"Command '%s' returned non-zero exit status %d\" % (\n                self.cmd, self.returncode\n            )\n\n\n# keep_streams must default to True for backwards compatibility\ndef run_tool_cmd(tool, cmd, args=None, properties=None, hadoop_conf_dir=None,\n                 logger=None, keep_streams=True):\n    \"\"\"\n    Run a Hadoop command.\n\n    If ``keep_streams`` is set to :obj:`True` (the default), the\n    stdout and stderr of the command will be buffered in memory.  If\n    the command succeeds, the former will be returned; if it fails, a\n    ``RunCmdError`` will be raised with the latter as the message.\n    This mode is appropriate for short-running commands whose \"result\"\n    is represented by their standard output (e.g.,\n    ``rval = run_tool_cmd(\"hdfs\", \"dfsadmin\", [\"-safemode\", \"get\"])``).\n\n    If ``keep_streams`` is set to :obj:`False`, the command will write\n    directly to the stdout and stderr of the calling process, and the\n    return value will be empty.  This mode is appropriate for long\n    running commands that do not write their \"real\" output to stdout.\n    \"\"\"\n    if logger is None:\n        logger = utils.NullLogger()\n    _args = [tool]\n    if hadoop_conf_dir:\n        _args.extend([\"--config\", hadoop_conf_dir])\n    _args.append(cmd)\n    if properties:\n        _args.extend(_construct_property_args(properties))\n    if args:\n        if isinstance(args, basestring):\n            args = shlex.split(args)\n        _merge_csv_args(args)\n        gargs = _pop_generic_args(args)\n        for seq in gargs, args:\n            _args.extend(map(str, seq))\n    logger.debug('final args: %r', (_args,))\n    if keep_streams:\n        p = subprocess.Popen(\n            _args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,\n            universal_newlines=True,\n        )\n        output, error = p.communicate()\n    else:\n        p = subprocess.Popen(_args, stdout=None, stderr=None, bufsize=1)\n        ret = p.wait()\n        error = 'command exited with %d status' % ret if ret else ''\n        output = ''\n    if p.returncode:\n        raise RunCmdError(p.returncode, ' '.join(_args), error)\n    return output\n\n\ndef run_cmd(cmd, args=None, properties=None, hadoop_home=None,\n            hadoop_conf_dir=None, logger=None, keep_streams=True):\n    \"\"\"\n    Runs the ``hadoop`` command.\n\n    Calls :func:`run_tool_cmd` with ``\"hadoop\"`` as the first argument.\n    \"\"\"\n    return run_tool_cmd(\"hadoop\", cmd, args=args, properties=properties,\n                        hadoop_conf_dir=hadoop_conf_dir, logger=logger,\n                        keep_streams=keep_streams)\n\n\ndef run_class(class_name, args=None, properties=None, classpath=None,\n              hadoop_conf_dir=None, logger=None, keep_streams=True):\n    \"\"\"\n    Run a Java class with Hadoop (equivalent of running ``hadoop\n    <class_name>`` from the command line).\n\n    Additional ``HADOOP_CLASSPATH`` elements can be provided via\n    ``classpath`` (either as a non-string sequence where each element\n    is a classpath element or as a ``':'``-separated string).  Other\n    arguments are passed to :func:`run_cmd`.\n\n    .. note::\n\n      ``HADOOP_CLASSPATH`` makes dependencies available **only on the\n      client side**.  If you are running a MapReduce application, use\n      ``args=['-libjars', 'jar1,jar2,...']`` to make them available to\n      the server side as well.\n    \"\"\"\n    if logger is None:\n        logger = utils.NullLogger()\n    old_classpath = None\n    if classpath:\n        old_classpath = os.getenv('HADOOP_CLASSPATH', '')\n        if isinstance(classpath, basestring):\n            classpath = [classpath]\n        # Prepend the classpaths provided by the user to the existing\n        # HADOOP_CLASSPATH value.  Order matters.  We could work a little\n        # harder to avoid duplicates, but it's not essential\n        os.environ['HADOOP_CLASSPATH'] = \":\".join(\n            classpath + old_classpath.split(':', 1)\n        )\n        logger.debug('HADOOP_CLASSPATH: %r', os.getenv('HADOOP_CLASSPATH'))\n    try:\n        res = run_cmd(class_name, args, properties,\n                      hadoop_conf_dir=hadoop_conf_dir, logger=logger,\n                      keep_streams=keep_streams)\n    finally:\n        if old_classpath is not None:\n            os.environ['HADOOP_CLASSPATH'] = old_classpath\n    return res\n\n\ndef iter_mr_out_files(mr_out_dir):\n    for fn in hdfs.ls(mr_out_dir):\n        if hdfs.path.basename(fn).startswith(\"part\"):\n            yield fn\n\n\ndef collect_output(mr_out_dir, out_file=None):\n    \"\"\"\n    Return all mapreduce output in ``mr_out_dir``.\n\n    Append the output to ``out_file`` if provided.  Otherwise, return\n    the result as a single string (it is the caller's responsibility to\n    ensure that the amount of data retrieved fits into memory).\n    \"\"\"\n    if out_file is None:\n        output = []\n        for fn in iter_mr_out_files(mr_out_dir):\n            with hdfs.open(fn, \"rt\") as f:\n                output.append(f.read())\n        return \"\".join(output)\n    else:\n        block_size = 16777216\n        with open(out_file, 'a') as o:\n            for fn in iter_mr_out_files(mr_out_dir):\n                with hdfs.open(fn) as f:\n                    data = f.read(block_size)\n                    while len(data) > 0:\n                        o.write(data)\n                        data = f.read(block_size)\n"
  },
  {
    "path": "pydoop/hdfs/__init__.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nr\"\"\"\nThis module allows you to connect to an HDFS installation, read and\nwrite files and get information on files, directories and global\nfilesystem properties.\n\n\nConfiguration\n-------------\n\nThe hdfs module is built on top of ``libhdfs``, in turn a JNI wrapper\naround the Java fs code: therefore, for the module to work properly,\nthe Java class path must include all relevant Hadoop jars. Pydoop tries to\npopulate the class path automatically by calling ``hadoop classpath``, so make\nsure the ``hadoop`` command is in the ``PATH`` on all cluster nodes. If your\nHadoop configuration directory is in a non-standard location, also ensure that\nthe ``HADOOP_CONF_DIR`` env var is set to the appropriate value.\n\nAnother important environment variable for this module is ``LIBHDFS_OPTS``,\nused to set options for the JVM on top of which it runs. To control the heap\nsize, for instance, you could set ``LIBHDFS_OPTS`` to ``\"-Xms32m -Xmx512m\"``.\n\"\"\"\n\n__all__ = [\n    'path',\n    'init',\n    'reset',\n    'hdfs',\n    'default_is_local',\n    'open',\n    'dump',\n    'load',\n    'cp',\n    'put',\n    'get',\n    'mkdir',\n    'rm',\n    'rmr',\n    'lsl',\n    'ls',\n    'chmod',\n    'move',\n    'chown',\n    'rename',\n    'renames',\n    'stat',\n    'lstat',\n    'access',\n    'utime',\n]\n\n\nimport os\n\nimport pydoop\nfrom . import common, path\nfrom pydoop.utils.py3compat import bintype\n\ntry:\n    _ORIG_CLASSPATH\nexcept NameError:\n    _ORIG_CLASSPATH = os.getenv(\"CLASSPATH\", \"\")\n\n\n# --- MODULE CONFIG ---\ndef init():\n    os.environ[\"CLASSPATH\"] = \"%s:%s\" % (\n        pydoop.hadoop_classpath(), _ORIG_CLASSPATH\n    )\n\n\ninit()\n\n\ndef reset():\n    pydoop.reset()\n    init()\n# ---------------------\n\n\nfrom .fs import hdfs, default_is_local\n\n\ndef open(hdfs_path, mode=\"r\", buff_size=0, replication=0, blocksize=0,\n         user=None, encoding=None, errors=None):\n    \"\"\"\n    Open a file, returning an :class:`~.file.hdfs_file` object.\n\n    ``hdfs_path`` and ``user`` are passed to :func:`~path.split`,\n    while the other args are passed to the :class:`~.file.hdfs_file`\n    constructor.\n    \"\"\"\n    host, port, path_ = path.split(hdfs_path, user)\n    fs = hdfs(host, port, user)\n    return fs.open_file(path_, mode, buff_size, replication, blocksize,\n                        encoding, errors)\n\n\ndef dump(data, hdfs_path, **kwargs):\n    \"\"\"\\\n    Write ``data`` to ``hdfs_path``.\n\n    Keyword arguments are passed to :func:`open`, except for ``mode``,\n    which is forced to ``\"w\"`` (or ``\"wt\"`` for text data).\n    \"\"\"\n    kwargs[\"mode\"] = \"w\" if isinstance(data, bintype) else \"wt\"\n    with open(hdfs_path, **kwargs) as fo:\n        i = 0\n        bufsize = common.BUFSIZE\n        while i < len(data):\n            fo.write(data[i: i + bufsize])\n            i += bufsize\n    fo.fs.close()\n\n\ndef load(hdfs_path, **kwargs):\n    \"\"\"\\\n    Read the content of ``hdfs_path`` and return it.\n\n    Keyword arguments are passed to :func:`open`. The `\"mode\"` kwarg\n    must be readonly.\n    \"\"\"\n    m, _ = common.parse_mode(kwargs.get(\"mode\", \"r\"))\n    if m != \"r\":\n        raise ValueError(\"opening mode must be readonly\")\n    with open(hdfs_path, **kwargs) as fi:\n        data = fi.read()\n    fi.fs.close()\n    return data\n\n\ndef _cp_file(src_fs, src_path, dest_fs, dest_path, **kwargs):\n    kwargs.pop(\"mode\", None)\n    kwargs[\"mode\"] = \"r\"\n    with src_fs.open_file(src_path, **kwargs) as fi:\n        kwargs[\"mode\"] = \"w\"\n        with dest_fs.open_file(dest_path, **kwargs) as fo:\n            bufsize = common.BUFSIZE\n            while 1:\n                chunk = fi.read(bufsize)\n                if chunk:\n                    fo.write(chunk)\n                else:\n                    break\n\n\ndef cp(src_hdfs_path, dest_hdfs_path, **kwargs):\n    \"\"\"\\\n    Copy the contents of ``src_hdfs_path`` to ``dest_hdfs_path``.\n\n    If ``src_hdfs_path`` is a directory, its contents will be copied\n    recursively. Source file(s) are opened for reading and copies are\n    opened for writing. Additional keyword arguments, if any, are\n    handled like in :func:`open`.\n    \"\"\"\n    src, dest = {}, {}\n    try:\n        for d, p in ((src, src_hdfs_path), (dest, dest_hdfs_path)):\n            d[\"host\"], d[\"port\"], d[\"path\"] = path.split(p)\n            d[\"fs\"] = hdfs(d[\"host\"], d[\"port\"])\n        # --- does src exist? ---\n        try:\n            src[\"info\"] = src[\"fs\"].get_path_info(src[\"path\"])\n        except IOError:\n            raise IOError(\"no such file or directory: %r\" % (src[\"path\"]))\n        # --- src exists. Does dest exist? ---\n        try:\n            dest[\"info\"] = dest[\"fs\"].get_path_info(dest[\"path\"])\n        except IOError:\n            if src[\"info\"][\"kind\"] == \"file\":\n                _cp_file(src[\"fs\"], src[\"path\"], dest[\"fs\"], dest[\"path\"],\n                         **kwargs)\n                return\n            else:\n                dest[\"fs\"].create_directory(dest[\"path\"])\n                dest_hdfs_path = dest[\"fs\"].get_path_info(dest[\"path\"])[\"name\"]\n                for item in src[\"fs\"].list_directory(src[\"path\"]):\n                    cp(item[\"name\"], dest_hdfs_path, **kwargs)\n                return\n        # --- dest exists. Is it a file? ---\n        if dest[\"info\"][\"kind\"] == \"file\":\n            raise IOError(\"%r already exists\" % (dest[\"path\"]))\n        # --- dest is a directory ---\n        dest[\"path\"] = path.join(dest[\"path\"], path.basename(src[\"path\"]))\n        if dest[\"fs\"].exists(dest[\"path\"]):\n            raise IOError(\"%r already exists\" % (dest[\"path\"]))\n        if src[\"info\"][\"kind\"] == \"file\":\n            _cp_file(src[\"fs\"], src[\"path\"], dest[\"fs\"], dest[\"path\"],\n                     **kwargs)\n        else:\n            dest[\"fs\"].create_directory(dest[\"path\"])\n            dest_hdfs_path = dest[\"fs\"].get_path_info(dest[\"path\"])[\"name\"]\n            for item in src[\"fs\"].list_directory(src[\"path\"]):\n                cp(item[\"name\"], dest_hdfs_path, **kwargs)\n    finally:\n        for d in src, dest:\n            try:\n                d[\"fs\"].close()\n            except KeyError:\n                pass\n\n\ndef put(src_path, dest_hdfs_path, **kwargs):\n    \"\"\"\\\n    Copy the contents of ``src_path`` to ``dest_hdfs_path``.\n\n    ``src_path`` is forced to be interpreted as an ordinary local path\n    (see :func:`~path.abspath`). The source file is opened for reading\n    and the copy is opened for writing. Additional keyword arguments,\n    if any, are handled like in :func:`open`.\n    \"\"\"\n    cp(path.abspath(src_path, local=True), dest_hdfs_path, **kwargs)\n\n\ndef get(src_hdfs_path, dest_path, **kwargs):\n    \"\"\"\\\n    Copy the contents of ``src_hdfs_path`` to ``dest_path``.\n\n    ``dest_path`` is forced to be interpreted as an ordinary local\n    path (see :func:`~path.abspath`). The source file is opened for\n    reading and the copy is opened for writing. Additional keyword\n    arguments, if any, are handled like in :func:`open`.\n    \"\"\"\n    cp(src_hdfs_path, path.abspath(dest_path, local=True), **kwargs)\n\n\ndef mkdir(hdfs_path, user=None):\n    \"\"\"\n    Create a directory and its parents as needed.\n    \"\"\"\n    host, port, path_ = path.split(hdfs_path, user)\n    fs = hdfs(host, port, user)\n    retval = fs.create_directory(path_)\n    fs.close()\n    return retval\n\n\ndef rm(hdfs_path, recursive=True, user=None):\n    \"\"\"\n    Remove a file or directory.\n\n    If ``recursive`` is :obj:`True` (the default), directory contents are\n    removed recursively.\n    \"\"\"\n    host, port, path_ = path.split(hdfs_path, user)\n    fs = hdfs(host, port, user)\n    retval = fs.delete(path_, recursive=recursive)\n    fs.close()\n    return retval\n\n\n# backwards compatibility\ndef rmr(hdfs_path, user=None):\n    return rm(hdfs_path, recursive=True, user=user)\n\n\ndef lsl(hdfs_path, user=None, recursive=False):\n    \"\"\"\n    Return a list of dictionaries of file properties.\n\n    If ``hdfs_path`` is a file, there is only one item corresponding to\n    the file itself; if it is a directory and ``recursive`` is\n    :obj:`False`, each list item corresponds to a file or directory\n    contained by it; if it is a directory and ``recursive`` is\n    :obj:`True`, the list contains one item for every file or directory\n    in the tree rooted at ``hdfs_path``.\n    \"\"\"\n    host, port, path_ = path.split(hdfs_path, user)\n    fs = hdfs(host, port, user)\n    if not recursive:\n        dir_list = fs.list_directory(path_)\n    else:\n        treewalk = fs.walk(path_)\n        top = next(treewalk)\n        if top['kind'] == 'directory':\n            dir_list = list(treewalk)\n        else:\n            dir_list = [top]\n    fs.close()\n    return dir_list\n\n\ndef ls(hdfs_path, user=None, recursive=False):\n    \"\"\"\n    Return a list of hdfs paths.\n\n    Works in the same way as :func:`lsl`, except for the fact that list\n    items are hdfs paths instead of dictionaries of properties.\n    \"\"\"\n    dir_list = lsl(hdfs_path, user, recursive)\n    return [d[\"name\"] for d in dir_list]\n\n\ndef chmod(hdfs_path, mode, user=None):\n    \"\"\"\n    Change file mode bits.\n\n    :type path: string\n    :param path: the path to the file or directory\n    :type mode: int\n    :param mode: the bitmask to set it to (e.g., 0777)\n    \"\"\"\n    host, port, path_ = path.split(hdfs_path, user)\n    fs = hdfs(host, port, user)\n    retval = fs.chmod(path_, mode)\n    fs.close()\n    return retval\n\n\ndef move(src, dest, user=None):\n    \"\"\"\n    Move or rename src to dest.\n    \"\"\"\n    src_host, src_port, src_path = path.split(src, user)\n    dest_host, dest_port, dest_path = path.split(dest, user)\n    src_fs = hdfs(src_host, src_port, user)\n    dest_fs = hdfs(dest_host, dest_port, user)\n    try:\n        retval = src_fs.move(src_path, dest_fs, dest_path)\n        return retval\n    finally:\n        src_fs.close()\n        dest_fs.close()\n\n\ndef chown(hdfs_path, user=None, group=None, hdfs_user=None):\n    \"\"\"\n    See :meth:`fs.hdfs.chown`.\n    \"\"\"\n    user = user or ''\n    group = group or ''\n    host, port, path_ = path.split(hdfs_path, hdfs_user)\n    with hdfs(host, port, hdfs_user) as fs:\n        return fs.chown(path_, user=user, group=group)\n\n\ndef rename(from_path, to_path, user=None):\n    \"\"\"\n    See :meth:`fs.hdfs.rename`.\n    \"\"\"\n    fhost, fport, fpath = path.split(from_path, user)\n    thost, tport, tpath = path.split(to_path, user)\n    with hdfs(thost, tport, user) as fs:\n        chost, cport = fs.host, fs.port\n    with hdfs(fhost, fport, user) as fs:\n        if fs.host != chost or fs.port != cport:\n            raise RuntimeError(\"can't do a cross-fs rename\")\n        return fs.rename(fpath, tpath)\n\n\ndef renames(from_path, to_path, user=None):\n    \"\"\"\n    Rename ``from_path`` to ``to_path``, creating parents as needed.\n    \"\"\"\n    to_dir = path.dirname(to_path)\n    if to_dir:\n        mkdir(to_dir, user=user)\n    rename(from_path, to_path, user=user)\n\n\n# direct bindings\nstat = path.stat\nlstat = path.lstat\naccess = path.access\nutime = path.utime\n"
  },
  {
    "path": "pydoop/hdfs/common.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nCommon hdfs utilities.\n\"\"\"\n\nimport getpass\nimport pwd\nimport grp\nimport sys\n\n__is_py3 = sys.version_info >= (3, 0)\n\n\nBUFSIZE = 16384\nDEFAULT_PORT = 8020  # org/apache/hadoop/hdfs/server/namenode/NameNode.java\nDEFAULT_USER = getpass.getuser()\n\n# Unicode objects are encoded using this encoding:\nTEXT_ENCODING = 'utf-8'\n# We use UTF-8 since this is what the Hadoop TextFileFormat uses\n# NOTE:  If you change this, you'll also need to fix the encoding\n# used by the native extension.\n\n\nBASE_MODES = frozenset(\"rwa\")\n\n\ndef parse_mode(mode):\n    try:\n        base_mode = mode[0]\n    except IndexError:\n        raise ValueError(\"mode cannot be empty\")\n    if base_mode not in BASE_MODES:\n        raise ValueError(\"base mode must be one of %s\" % \", \".join(BASE_MODES))\n    try:\n        is_text = mode[1] == \"t\"\n    except IndexError:\n        is_text = False\n    return base_mode, is_text\n\n\nif __is_py3:\n    def encode_path(path):\n        return path\n\n    def decode_path(path):\n        return path\n\n    def encode_host(host):\n        return host\n\n    def decode_host(host):\n        return host\nelse:\n    def encode_path(path):\n        if isinstance(path, unicode):  # noqa: F821\n            path = path.encode('utf-8')\n        return path\n\n    def decode_path(path):\n        if isinstance(path, str):\n            path = path.decode('utf-8')\n        return path\n\n    def encode_host(host):\n        if isinstance(host, unicode):  # noqa: F821\n            host = host.encode('idna')\n        return host\n\n    def decode_host(host):\n        if isinstance(host, str):\n            host = host.decode('idna')\n        return host\n\n\ndef get_groups(user=DEFAULT_USER):\n    groups = set(_.gr_name for _ in grp.getgrall() if user in set(_.gr_mem))\n    primary_gid = pwd.getpwnam(user).pw_gid\n    groups.add(grp.getgrgid(primary_gid).gr_name)\n    return groups\n"
  },
  {
    "path": "pydoop/hdfs/core/__init__.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nHDFS core implementation.\n\"\"\"\n\nimport os\n\n\ndef init():\n    import pydoop.utils.jvm as jvm\n    jvm.load_jvm_lib()\n    try:\n        # NOTE: JVM must be already instantiated\n        import pydoop.native_core_hdfs\n    except ImportError:\n        return None  # should only happen at compile time\n    else:\n        return pydoop.native_core_hdfs\n\n\ndef core_hdfs_fs(host, port, user):\n    _CORE_MODULE = init()\n    if _CORE_MODULE is None:\n        if os.path.isdir(\"pydoop\"):\n            msg = \"Trying to import from the source directory?\"\n        else:\n            msg = \"Check that Pydoop is correctly installed\"\n        raise RuntimeError(\"Core module unavailable. %s\" % msg)\n    return _CORE_MODULE.CoreHdfsFs(host, port, user)\n"
  },
  {
    "path": "pydoop/hdfs/file.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\npydoop.hdfs.file -- HDFS File Objects\n-------------------------------------\n\"\"\"\n\nimport os\nimport io\nimport codecs\n\nfrom pydoop.hdfs import common\n\n\ndef _complain_ifclosed(closed):\n    if closed:\n        raise ValueError(\"I/O operation on closed HDFS file object\")\n\n\nclass FileIO(object):\n    \"\"\"\n    Instances of this class represent HDFS file objects.\n\n    Objects from this class should not be instantiated directly.  To\n    open an HDFS file, use :meth:`~.fs.hdfs.open_file`, or the\n    top-level ``open`` function in the hdfs package.\n    \"\"\"\n    ENCODING = \"utf-8\"\n    ERRORS = \"strict\"\n\n    def __init__(self, raw_hdfs_file, fs, mode, encoding=None, errors=None):\n        self.mode = mode\n        self.base_mode, is_text = common.parse_mode(self.mode)\n        self.buff_size = raw_hdfs_file.buff_size\n        if self.buff_size <= 0:\n            self.buff_size = common.BUFSIZE\n        if is_text:\n            self.__encoding = encoding or self.__class__.ENCODING\n            self.__errors = errors or self.__class__.ERRORS\n            try:\n                codecs.lookup(self.__encoding)\n                codecs.lookup_error(self.__errors)\n            except LookupError as e:\n                raise ValueError(e)\n        else:\n            if encoding:\n                raise ValueError(\n                    \"binary mode doesn't take an encoding argument\")\n            if errors:\n                raise ValueError(\"binary mode doesn't take an errors argument\")\n            self.__encoding = self.__errors = None\n        cls = io.BufferedReader if self.base_mode == \"r\" else io.BufferedWriter\n        self.f = cls(raw_hdfs_file, buffer_size=self.buff_size)\n        self.__fs = fs\n        info = fs.get_path_info(self.f.raw.name)\n        self.__name = info[\"name\"]\n        self.__size = info[\"size\"]\n        self.closed = False\n\n    def __enter__(self):\n        return self\n\n    def __exit__(self, exc_type, exc_value, traceback):\n        self.close()\n\n    @property\n    def fs(self):\n        \"\"\"\n        The file's hdfs instance.\n        \"\"\"\n        return self.__fs\n\n    @property\n    def name(self):\n        \"\"\"\n        The file's fully qualified name.\n        \"\"\"\n        return self.__name\n\n    @property\n    def size(self):\n        \"\"\"\n        The file's size in bytes. This attribute is initialized when the\n        file is opened and updated when it is closed.\n        \"\"\"\n        return self.__size\n\n    def writable(self):\n        return self.f.raw.writable()\n\n    def readline(self):\n        \"\"\"\n        Read and return a line of text.\n\n        :rtype: str\n        :return: the next line of text in the file, including the\n          newline character\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        line = self.f.readline()\n        if self.__encoding:\n            return line.decode(self.__encoding, self.__errors)\n        else:\n            return line\n\n    def next(self):\n        \"\"\"\n        Return the next input line, or raise :class:`StopIteration`\n        when EOF is hit.\n        \"\"\"\n        return self.__next__()\n\n    def __next__(self):\n        \"\"\"\n        Return the next input line, or raise :class:`StopIteration`\n        when EOF is hit.\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        line = self.readline()\n        if not line:\n            raise StopIteration\n        return line\n\n    def __iter__(self):\n        return self\n\n    def available(self):\n        \"\"\"\n        Number of bytes that can be read from this input stream without\n        blocking.\n\n        :rtype: int\n        :return: available bytes\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.f.raw.available()\n\n    def close(self):\n        \"\"\"\n        Close the file.\n        \"\"\"\n        if not self.closed:\n            self.closed = True\n            retval = self.f.close()\n            if self.base_mode != \"r\":\n                self.__size = self.fs.get_path_info(self.name)[\"size\"]\n            return retval\n\n    def pread(self, position, length):\n        r\"\"\"\n        Read ``length`` bytes of data from the file, starting from\n        ``position``\\ .\n\n        :type position: int\n        :param position: position from which to read\n        :type length: int\n        :param length: the number of bytes to read\n        :rtype: string\n        :return: the chunk of data read from the file\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        if position > self.size:\n            raise IOError(\"position cannot be past EOF\")\n        if length < 0:\n            length = self.size - position\n        data = self.f.raw.pread(position, length)\n        if self.__encoding:\n            return data.decode(self.__encoding, self.__errors)\n        else:\n            return data\n\n    def read(self, length=-1):\n        \"\"\"\n        Read ``length`` bytes from the file.  If ``length`` is negative or\n        omitted, read all data until EOF.\n\n        :type length: int\n        :param length: the number of bytes to read\n        :rtype: string\n        :return: the chunk of data read from the file\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        # NOTE: libhdfs read stops at block boundaries: it is *essential*\n        # to ensure that we actually read the required number of bytes.\n        if length < 0:\n            length = self.size\n        chunks = []\n        while 1:\n            if length <= 0:\n                break\n            c = self.f.read(min(self.buff_size, length))\n            if c == b\"\":\n                break\n            chunks.append(c)\n            length -= len(c)\n        data = b\"\".join(chunks)\n        if self.__encoding:\n            return data.decode(self.__encoding, self.__errors)\n        else:\n            return data\n\n    def seek(self, position, whence=os.SEEK_SET):\n        \"\"\"\n        Seek to ``position`` in file.\n\n        :type position: int\n        :param position: offset in bytes to seek to\n        :type whence: int\n        :param whence: defaults to ``os.SEEK_SET`` (absolute); other\n          values are ``os.SEEK_CUR`` (relative to the current position)\n          and ``os.SEEK_END`` (relative to the file's end).\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.f.seek(position, whence)\n\n    def tell(self):\n        \"\"\"\n        Get the current byte offset in the file.\n\n        :rtype: int\n        :return: current offset in bytes\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.f.tell()\n\n    def write(self, data):\n        \"\"\"\n        Write ``data`` to the file.\n\n        :type data: bytes\n        :param data: the data to be written to the file\n        :rtype: int\n        :return: the number of bytes written\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        if self.__encoding:\n            self.f.write(data.encode(self.__encoding, self.__errors))\n            return len(data)\n        else:\n            return self.f.write(data)\n\n    def flush(self):\n        \"\"\"\n        Force any buffered output to be written.\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.f.flush()\n\n\nclass hdfs_file(FileIO):\n\n    def pread_chunk(self, position, chunk):\n        r\"\"\"\n        Works like :meth:`pread`\\ , but data is stored in the writable\n        buffer ``chunk`` rather than returned. Reads at most a number of\n        bytes equal to the size of ``chunk``\\ .\n\n        :type position: int\n        :param position: position from which to read\n        :type chunk: buffer\n        :param chunk: a writable object that supports the buffer protocol\n        :rtype: int\n        :return: the number of bytes read\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        if position > self.size:\n            raise IOError(\"position cannot be past EOF\")\n        return self.f.raw.pread_chunk(position, chunk)\n\n    def read_chunk(self, chunk):\n        r\"\"\"\n        Works like :meth:`read`\\ , but data is stored in the writable\n        buffer ``chunk`` rather than returned. Reads at most a number of\n        bytes equal to the size of ``chunk``\\ .\n\n        :type chunk: buffer\n        :param chunk: a writable object that supports the buffer protocol\n        :rtype: int\n        :return: the number of bytes read\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.f.readinto(chunk)\n\n\nclass local_file(io.FileIO):\n    \"\"\"\\\n    Support class to handle local files.\n\n    Object of this type have the same interface as :class:`FileIO` (and should\n    also be obtained via higher level methods rather than instantiated\n    directly), but act as handles to local files.\n    \"\"\"\n    def __init__(self, fs, name, mode):\n        if not mode.startswith(\"r\"):\n            local_file.__make_parents(fs, name)\n        super(local_file, self).__init__(name, mode)\n        name = os.path.abspath(name)\n        self.__fs = fs\n        self.__size = os.fstat(super(local_file, self).fileno()).st_size\n        self.f = self\n        self.buff_size = io.DEFAULT_BUFFER_SIZE\n\n    @staticmethod\n    def __make_parents(fs, name):\n        d = os.path.dirname(name)\n        if d:\n            try:\n                fs.create_directory(d)\n            except IOError:\n                raise IOError(\"Cannot open file %s\" % name)\n\n    @property\n    def fs(self):\n        return self.__fs\n\n    @property\n    def size(self):\n        return self.__size\n\n    def available(self):\n        _complain_ifclosed(self.closed)\n        return self.size\n\n    def close(self):\n        if self.writable():\n            self.flush()\n            os.fsync(self.fileno())\n            self.__size = os.fstat(self.fileno()).st_size\n        super(local_file, self).close()\n\n    def seek(self, position, whence=os.SEEK_SET):\n        if position > self.__size:\n            raise IOError(\"position cannot be past EOF\")\n        return super(local_file, self).seek(position, whence)\n\n    def __seek_and_read(self, position, length=None, buf=None):\n        assert (length is None) != (buf is None)\n        _complain_ifclosed(self.closed)\n        old_pos = self.tell()\n        self.seek(position)\n        if buf is not None:\n            ret = self.readinto(buf)\n        else:\n            if length < 0:\n                length = self.size - position\n            ret = self.read(length)\n        self.seek(old_pos)\n        return ret\n\n    def pread(self, position, length):\n        return self.__seek_and_read(position, length=length)\n\n    def pread_chunk(self, position, chunk):\n        return self.__seek_and_read(position, buf=chunk)\n\n    def read_chunk(self, chunk):\n        _complain_ifclosed(self.closed)\n        return self.readinto(chunk)\n\n\nclass TextIOWrapper(io.TextIOWrapper):\n\n    def __getattr__(self, name):\n        # there is no readinto method in text mode (strings are immutable)\n        if name.endswith(\"_chunk\"):\n            raise AttributeError(\"%r object has no attribute %r\" % (\n                self.__class__.__name__, name\n            ))\n        a = getattr(self.buffer.raw, name)\n        if name == \"mode\":\n            a = \"%st\" % self.buffer.raw.mode[0]\n        return a\n\n    def pread(self, position, length):\n        data = self.buffer.raw.pread(position, length)\n        return data.decode(self.encoding, self.errors)\n"
  },
  {
    "path": "pydoop/hdfs/fs.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\npydoop.hdfs.fs -- File System Handles\n-------------------------------------\n\"\"\"\n\nimport os\nimport socket\nimport getpass\nimport re\nimport operator as ops\nimport io\n\nimport pydoop\nfrom . import common\nfrom .file import FileIO, hdfs_file, local_file, TextIOWrapper\nfrom .core import core_hdfs_fs\n\n# py3 compatibility\nfrom functools import reduce\n\ntry:\n    from urllib.parse import urlparse\nexcept ImportError:\n    from urlparse import urlparse\n\n\nclass _FSStatus(object):\n\n    def __init__(self, fs, host, port, user, refcount=1):\n        self.fs = fs\n        self.host = host\n        self.port = port\n        self.user = user\n        self.refcount = refcount\n\n    def __repr__(self):\n        return \"_FSStatus(%s, %s)\" % (self.fs, self.refcount)\n\n\ndef _complain_ifclosed(closed):\n    if closed:\n        raise ValueError(\"I/O operation on closed HDFS instance\")\n\n\ndef _get_ip(host, default=None):\n    try:\n        ip = socket.gethostbyname(host)\n    except socket.gaierror:\n        ip = \"0.0.0.0\"  # same as socket.gethostbyname(\"\")\n    return ip if ip != \"0.0.0.0\" else default\n\n\ndef _get_connection_info(host, port, user):\n    fs = core_hdfs_fs(host, port, user)\n    res = urlparse(fs.get_working_directory())\n    if not res.scheme or res.scheme == \"file\":\n        h, p, u = \"\", 0, getpass.getuser()\n        fs.set_working_directory(os.getcwd())  # libhdfs \"remembers\" old cwd\n    else:\n        try:\n            h, p = res.netloc.split(\":\")\n        except ValueError:\n            h, p = res.netloc, common.DEFAULT_PORT\n\n            # try to find an IP address if we can't extract it from res.netloc\n            if not res.netloc:\n                hosts = fs.get_hosts(str(res.path), 0, 0)\n                if hosts and hosts[0] and hosts[0][0]:\n                    h, p = hosts[0][0], common.DEFAULT_PORT\n        u = res.path.split(\"/\", 2)[2]\n    return h, int(p), u, fs\n\n\ndef _default_fs():\n    params = pydoop.hadoop_params()\n    _fs = params.get(\"fs.defaultFS\", params.get(\"fs.default.name\", \"file:///\"))\n    return urlparse(_fs)\n\n\ndef default_is_local():\n    \"\"\"\\\n    Is Hadoop configured to use the local file system?\n\n    By default, it is. A DFS must be explicitly configured.\n    \"\"\"\n    _fs = _default_fs()\n    return _fs.scheme == \"file\"\n\n\nclass hdfs(object):\n    \"\"\"\n    A handle to an HDFS instance.\n\n    :type host: str\n    :param host: hostname or IP address of the HDFS NameNode. Set to an\n      empty string (and ``port`` to 0) to connect to the local file\n      system; set to ``'default'`` (and ``port`` to 0) to connect to the\n      default (i.e., the one defined in the Hadoop configuration files)\n      file system.\n    :type port: int\n    :param port: the port on which the NameNode is listening\n    :type user: str\n    :param user: the Hadoop domain user name. Defaults to the current\n      UNIX user. Note that, in MapReduce applications, since tasks are\n      spawned by the JobTracker, the default user will be the one that\n      started the JobTracker itself.\n    :type groups: list\n    :param groups: ignored. Included for backwards compatibility.\n\n    **Note:** when connecting to the local file system, ``user`` is\n    ignored (i.e., it will always be the current UNIX user).\n    \"\"\"\n    _CACHE = {}\n    _ALIASES = {\"host\": {}, \"port\": {}, \"user\": {}}\n\n    def __canonize_hpu(self, hpu):\n        host, port, user = hpu\n        host = self._ALIASES[\"host\"].get(host, host)\n        port = self._ALIASES[\"port\"].get(port, port)\n        user = self._ALIASES[\"user\"].get(user, user)\n        return host, port, user\n\n    def __lookup(self, hpu):\n        if hpu[0]:\n            hpu = self.__canonize_hpu(hpu)\n        return self._CACHE[hpu]\n\n    def __eq__(self, other):\n        \"\"\"\n        :obj:`True` if ``self`` and ``other`` wrap the same Hadoop file\n        system instance\n        \"\"\"\n        return type(self) == type(other) and self.fs == other.fs\n\n    def __init__(self, host=\"default\", port=0, user=None, groups=None):\n        host = host.strip()\n        raw_host = host\n        host = common.encode_host(host)\n        if user is None:\n            user = \"\"\n        if not host:\n            port = 0\n            user = user or getpass.getuser()\n        try:\n            self.__status = self.__lookup((host, port, user))\n        except KeyError:\n            h, p, u, fs = _get_connection_info(host, port, user)\n            aliasing_info = [] if user else [(\"user\", u, user)]\n            if h != \"\":\n                aliasing_info.append((\"port\", p, port))\n            ip = _get_ip(h, None)\n            if ip:\n                aliasing_info.append((\"host\", ip, h))\n            else:\n                ip = h\n            aliasing_info.append((\"host\", ip, host))\n            if raw_host != host:\n                aliasing_info.append((\"host\", ip, raw_host))\n            for k, true_x, x in aliasing_info:\n                if true_x != x:\n                    self._ALIASES[k][x] = true_x\n            try:\n                self.__status = self.__lookup((h, p, u))\n            except KeyError:\n                self.__status = _FSStatus(fs, h, p, u, refcount=0)\n                self._CACHE[(ip, p, u)] = self.__status\n        self.__status.refcount += 1\n\n    def __enter__(self):\n        return self\n\n    def __exit__(self, exc_type, exc_value, traceback):\n        self.close()\n\n    @property\n    def fs(self):\n        return self.__status.fs\n\n    @property\n    def refcount(self):\n        return self.__status.refcount\n\n    @property\n    def host(self):\n        \"\"\"\n        The actual hdfs hostname (empty string for the local fs).\n        \"\"\"\n        return self.__status.host\n\n    @property\n    def port(self):\n        \"\"\"\n        The actual hdfs port (0 for the local fs).\n        \"\"\"\n        return self.__status.port\n\n    @property\n    def user(self):\n        \"\"\"\n        The user associated with this HDFS connection.\n        \"\"\"\n        return self.__status.user\n\n    def close(self):\n        \"\"\"\n        Close the HDFS handle (disconnect).\n        \"\"\"\n        self.__status.refcount -= 1\n        if self.refcount == 0:\n            self.fs.close()\n            for k, status in list(self._CACHE.items()):  # yes, we want a copy\n                if status.refcount == 0:\n                    del self._CACHE[k]\n\n    @property\n    def closed(self):\n        return self.__status.refcount == 0\n\n    def open_file(self, path,\n                  mode=\"r\",\n                  buff_size=0,\n                  replication=0,\n                  blocksize=0,\n                  encoding=None,\n                  errors=None):\n        \"\"\"\n        Open an HDFS file.\n\n        Supported opening modes are \"r\", \"w\", \"a\". In addition, a\n        trailing \"t\" can be added to specify text mode (e.g., \"rt\" =\n        open for reading text).\n\n        Pass 0 as ``buff_size``, ``replication`` or ``blocksize`` if you want\n        to use the \"configured\" values, i.e., the ones set in the Hadoop\n        configuration files.\n\n        :type path: str\n        :param path: the full path to the file\n        :type mode: str\n        :param mode: opening mode\n        :type buff_size: int\n        :param buff_size: read/write buffer size in bytes\n        :type replication: int\n        :param replication: HDFS block replication\n        :type blocksize: int\n        :param blocksize: HDFS block size\n        :rtpye: :class:`~.file.hdfs_file`\n        :return: handle to the open file\n\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        if not path:\n            raise ValueError(\"Empty path\")\n        m, is_text = common.parse_mode(mode)\n        if not self.host:\n            fret = local_file(self, path, m)\n            if is_text:\n                cls = io.BufferedReader if m == \"r\" else io.BufferedWriter\n                fret = TextIOWrapper(cls(fret), encoding, errors)\n            return fret\n        f = self.fs.open_file(path, m, buff_size, replication, blocksize)\n        cls = FileIO if is_text else hdfs_file\n        fret = cls(f, self, mode)\n        return fret\n\n    def capacity(self):\n        \"\"\"\n        Return the raw capacity of the filesystem.\n\n        :rtype: int\n        :return: filesystem capacity\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        if not self.__status.host:\n            raise RuntimeError('Capacity is not defined for a local fs')\n        return self.fs.get_capacity()\n\n    def copy(self, from_path, to_hdfs, to_path):\n        \"\"\"\n        Copy file from one filesystem to another.\n\n        :type from_path: str\n        :param from_path: the path of the source file\n        :type to_hdfs: :class:`hdfs`\n        :param to_hdfs: destination filesystem\n        :type to_path: str\n        :param to_path: the path of the destination file\n        :raises: :exc:`~exceptions.IOError`\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        if isinstance(to_hdfs, self.__class__):\n            to_hdfs = to_hdfs.fs\n        return self.fs.copy(from_path, to_hdfs, to_path)\n\n    def create_directory(self, path):\n        \"\"\"\n        Create directory ``path`` (non-existent parents will be created as\n        well).\n\n        :type path: str\n        :param path: the path of the directory\n        :raises: :exc:`~exceptions.IOError`\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.fs.create_directory(path)\n\n    def default_block_size(self):\n        \"\"\"\n        Get the default block size.\n\n        :rtype: int\n        :return: the default blocksize\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.fs.get_default_block_size()\n\n    def delete(self, path, recursive=True):\n        \"\"\"\n        Delete ``path``.\n\n        :type path: str\n        :param path: the path of the file or directory\n        :type recursive: bool\n        :param recursive: if ``path`` is a directory, delete it recursively\n          when :obj:`True`\n        :raises: :exc:`~exceptions.IOError` when ``recursive`` is\n          :obj:`False` and directory is non-empty\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.fs.delete(path, recursive)\n\n    def exists(self, path):\n        \"\"\"\n        Check if a given path exists on the filesystem.\n\n        :type path: str\n        :param path: the path to look for\n        :rtype: bool\n        :return: :obj:`True` if ``path`` exists\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.fs.exists(path)\n\n    def get_hosts(self, path, start, length):\n        \"\"\"\n        Get hostnames where a particular block (determined by pos and\n        blocksize) of a file is stored. Due to replication, a single block\n        could be present on multiple hosts.\n\n        :type path: str\n        :param path: the path of the file\n        :type start: int\n        :param start: the start of the block\n        :type length: int\n        :param length: the length of the block\n        :rtype: list\n        :return: list of hosts that store the block\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.fs.get_hosts(path, start, length)\n\n    def get_path_info(self, path):\n        \"\"\"\n        Get information about ``path`` as a dict of properties.\n\n        The return value, based upon ``fs.FileStatus`` from the Java API,\n        has the following fields:\n\n        * ``block_size``: HDFS block size of ``path``\n        * ``group``: group associated with ``path``\n        * ``kind``: ``'file'`` or ``'directory'``\n        * ``last_access``: last access time of ``path``\n        * ``last_mod``: last modification time of ``path``\n        * ``name``: fully qualified path name\n        * ``owner``: owner of ``path``\n        * ``permissions``: file system permissions associated with ``path``\n        * ``replication``: replication factor of ``path``\n        * ``size``: size in bytes of ``path``\n\n        :type path: str\n        :param path: a path in the filesystem\n        :rtype: dict\n        :return: path information\n        :raises: :exc:`~exceptions.IOError`\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.fs.get_path_info(path)\n\n    def list_directory(self, path):\n        r\"\"\"\n        Get list of files and directories for ``path``\\ .\n\n        :type path: str\n        :param path: the path of the directory\n        :rtype: list\n        :return: list of files and directories in ``path``\n        :raises: :exc:`~exceptions.IOError`\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.fs.list_directory(path)\n\n    def move(self, from_path, to_hdfs, to_path):\n        \"\"\"\n        Move file from one filesystem to another.\n\n        :type from_path: str\n        :param from_path: the path of the source file\n        :type from_hdfs: :class:`hdfs`\n        :param to_hdfs: destination filesystem\n        :type to_path: str\n        :param to_path: the path of the destination file\n        :raises: :exc:`~exceptions.IOError`\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        if isinstance(to_hdfs, self.__class__):\n            to_hdfs = to_hdfs.fs\n        return self.fs.move(from_path, to_hdfs, to_path)\n\n    def rename(self, from_path, to_path):\n        \"\"\"\n        Rename file.\n\n        :type from_path: str\n        :param from_path: the path of the source file\n        :type to_path: str\n        :param to_path: the path of the destination file\n        :raises: :exc:`~exceptions.IOError`\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.fs.rename(from_path, to_path)\n\n    def set_replication(self, path, replication):\n        r\"\"\"\n        Set the replication of ``path`` to ``replication``\\ .\n\n        :type path: str\n        :param path: the path of the file\n        :type replication: int\n        :param replication: the replication value\n        :raises: :exc:`~exceptions.IOError`\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.fs.set_replication(path, replication)\n\n    def set_working_directory(self, path):\n        r\"\"\"\n        Set the working directory to ``path``\\ . All relative paths will\n        be resolved relative to it.\n\n        :type path: str\n        :param path: the path of the directory\n        :raises: :exc:`~exceptions.IOError`\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.fs.set_working_directory(path)\n\n    def used(self):\n        \"\"\"\n        Return the total raw size of all files in the filesystem.\n\n        :rtype: int\n        :return: total size of files in the file system\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.fs.get_used()\n\n    def working_directory(self):\n        \"\"\"\n        Get the current working directory.\n\n        :rtype: str\n        :return: current working directory\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        wd = self.fs.get_working_directory()\n        return wd\n\n    def chown(self, path, user='', group=''):\n        \"\"\"\n        Change file owner and group.\n\n        :type path: str\n        :param path: the path to the file or directory\n        :type user: str\n        :param user: Hadoop username. Set to '' if only setting group\n        :type group: str\n        :param group: Hadoop group name. Set to '' if only setting user\n        :raises: :exc:`~exceptions.IOError`\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.fs.chown(path, user, group)\n\n    @staticmethod\n    def __get_umask():\n        current_umask = os.umask(0)\n        os.umask(current_umask)\n        return current_umask\n\n    def __compute_mode_from_string(self, path, mode_string):\n        \"\"\"\n        Scan a unix-style mode string and apply it to ``path``.\n\n        :type mode_string: str\n        :param mode_string: see ``man chmod`` for details. ``X``, ``s``\n          and ``t`` modes are not supported.  The string should match the\n          following regular expression: ``[ugoa]*[-+=]([rwx]*)``.\n        :rtype: int\n        :return: a new mode integer resulting from applying ``mode_string``\n          to ``path``.\n        :raises: :exc:`~exceptions.ValueError` if ``mode_string`` is invalid.\n        \"\"\"\n        Char_to_perm_byte = {'r': 4, 'w': 2, 'x': 1}\n        Fields = (('u', 6), ('g', 3), ('o', 0))\n        # --\n        m = re.match(r\"\\s*([ugoa]*)([-+=])([rwx]*)\\s*\", mode_string)\n        if not m:\n            raise ValueError(\"Invalid mode string %s\" % mode_string)\n        who = m.group(1)\n        what_op = m.group(2)\n        which_perm = m.group(3)\n        # --\n        old_mode = self.fs.get_path_info(path)['permissions']\n        # The mode to be applied by the operation, repeated three\n        # times in a list, for user, group, and other respectively.\n        # Initially these are identical, but some may change if we\n        # have to respect the umask setting.\n        op_perm = [\n            reduce(ops.ior, [Char_to_perm_byte[c] for c in which_perm])\n        ] * 3\n        if 'a' in who:\n            who = 'ugo'\n        elif who == '':\n            who = 'ugo'\n            # erase the umask bits\n            inverted_umask = ~self.__get_umask()\n            for i, field in enumerate(Fields):\n                op_perm[i] &= (inverted_umask >> field[1]) & 0x7\n        # for each user, compute the permission bit and set it in the mode\n        new_mode = 0\n        for i, tpl in enumerate(Fields):\n            field, shift = tpl\n            # shift by the bits specified for the field; keep only the\n            # 3 lowest bits\n            old = (old_mode >> shift) & 0x7\n            if field in who:\n                if what_op == '-':\n                    new = old & ~op_perm[i]\n                elif what_op == '=':\n                    new = op_perm[i]\n                elif what_op == '+':\n                    new = old | op_perm[i]\n                else:\n                    raise RuntimeError(\n                        \"unexpected permission operation %s\" % what_op\n                    )\n            else:\n                # copy the previous permissions\n                new = old\n            new_mode |= new << shift\n        return new_mode\n\n    def chmod(self, path, mode):\n        \"\"\"\n        Change file mode bits.\n\n        :type path: str\n        :param path: the path to the file or directory\n        :type mode: int\n        :param mode: the bitmask to set it to (e.g., 0777)\n        :raises: :exc:`~exceptions.IOError`\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        try:\n            return self.fs.chmod(path, mode)\n        except TypeError:\n            mode = self.__compute_mode_from_string(path, mode)\n            return self.fs.chmod(path, mode)\n\n    def utime(self, path, mtime, atime):\n        \"\"\"\n        Change file last access and modification times.\n\n        :type path: str\n        :param path: the path to the file or directory\n        :type mtime: int\n        :param mtime: new modification time in seconds\n        :type atime: int\n        :param atime: new access time in seconds\n        :raises: :exc:`~exceptions.IOError`\n        \"\"\"\n        _complain_ifclosed(self.closed)\n        return self.fs.utime(path, int(mtime), int(atime))\n\n    def walk(self, top):\n        \"\"\"\n        Generate infos for all paths in the tree rooted at ``top`` (included).\n\n        The ``top`` parameter can be either an HDFS path string or a\n        dictionary of properties as returned by :meth:`get_path_info`.\n\n        :type top: str, dict\n        :param top: an HDFS path or path info dict\n        :rtype: iterator\n        :return: path infos of files and directories in the tree rooted at\n          ``top``\n        :raises: :exc:`~exceptions.IOError`; :exc:`~exceptions.ValueError`\n          if ``top`` is empty\n        \"\"\"\n        if not top:\n            raise ValueError(\"Empty path\")\n        if not isinstance(top, dict):\n            top = self.get_path_info(top)\n        yield top\n        if top['kind'] == 'directory':\n            for info in self.list_directory(top['name']):\n                for item in self.walk(info):\n                    yield item\n"
  },
  {
    "path": "pydoop/hdfs/path.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\npydoop.hdfs.path -- Path Name Manipulations\n-------------------------------------------\n\"\"\"\n\nimport os\nimport re\nimport time\n\nfrom . import common, fs as hdfs_fs\nfrom pydoop.utils.py3compat import clong\n\n\ncurdir, pardir, sep = '.', '..', '/'  # pylint: disable=C0103\n\n\nclass StatResult(object):\n    \"\"\"\n    Mimics the object type returned by :func:`os.stat`.\n\n    Objects of this class are instantiated from dictionaries with the\n    same structure as the ones returned by :meth:`~.fs.hdfs.get_path_info`.\n\n    Attributes starting with ``st_`` have the same meaning as the\n    corresponding ones in the object returned by :func:`os.stat`, although\n    some of them may not make sense for an HDFS path (in this case,\n    their value will be set to 0).  In addition, the ``kind``, ``name``\n    and ``replication`` attributes are available, with the same values\n    as in the input dict.\n    \"\"\"\n    def __init__(self, path_info):\n        self.st_mode = path_info['permissions']\n        self.st_ino = 0\n        self.st_dev = clong(0)\n        self.st_nlink = 1\n        self.st_uid = path_info['owner']\n        self.st_gid = path_info['group']\n        self.st_size = path_info['size']\n        self.st_atime = path_info['last_access']\n        self.st_mtime = path_info['last_mod']\n        self.st_ctime = 0\n        # --\n        self.st_blksize = path_info['block_size']\n        if self.st_blksize:\n            n, r = divmod(path_info['size'], self.st_blksize)\n            self.st_blocks = n + (r != 0)\n        else:\n            self.st_blocks = 0\n        # --\n        self.kind = path_info['kind']\n        self.name = path_info['name']\n        self.replication = path_info['replication']\n\n    def __repr__(self):\n        names = [_ for _ in dir(self) if _.startswith('st_')]\n        names.extend(['kind', 'name', 'replication'])\n        return '%s(%s)' % (\n            self.__class__.__name__,\n            ', '.join('%s=%r' % (_, getattr(self, _)) for _ in names)\n        )\n\n\nclass _HdfsPathSplitter(object):\n\n    PATTERN = re.compile(r\"([a-z0-9+.-]+):(.*)\")\n\n    @classmethod\n    def raise_bad_path(cls, hdfs_path, why=None):\n        msg = \"'%s' is not a valid HDFS path\" % hdfs_path\n        msg += \" (%s)\" % why if why else \"\"\n        raise ValueError(msg)\n\n    @classmethod\n    def parse(cls, hdfs_path):\n        if not hdfs_path:\n            return \"\", \"\", \"\"\n        try:\n            scheme, rest = cls.PATTERN.match(hdfs_path).groups()\n        except AttributeError:\n            scheme, rest = \"\", hdfs_path\n        if not rest:\n            cls.raise_bad_path(hdfs_path, \"no scheme-specific part\")\n        if rest.startswith(\"//\") and not rest.startswith(\"///\"):\n            if not scheme:\n                cls.raise_bad_path(hdfs_path, 'null scheme')\n            try:\n                netloc, path = rest[2:].split(\"/\", 1)\n                path = \"/%s\" % path\n            except ValueError:\n                netloc, path = rest[2:], \"\"\n        elif scheme and not rest.startswith('/'):\n            cls.raise_bad_path(hdfs_path, \"relative path in absolute URI\")\n        else:\n            netloc, path = \"\", rest\n        if path.startswith(\"/\"):\n            path = \"/%s\" % path.lstrip(\"/\")\n        return scheme, netloc, path\n\n    @classmethod\n    def unparse(cls, scheme, netloc, path):\n        hdfs_path = []\n        if scheme:\n            hdfs_path.append('%s:' % scheme.rstrip(':'))\n        if netloc:\n            if not scheme:\n                raise ValueError('netloc provided, but scheme is empty')\n            hdfs_path.append('//%s' % netloc)\n        if hdfs_path and path and not path.startswith('/'):\n            hdfs_path.append('/')\n        hdfs_path.append(path)\n        return ''.join(hdfs_path)\n\n    @classmethod\n    def split_netloc(cls, netloc):\n        if not netloc:\n            return \"default\", 0\n        netloc = netloc.split(\":\")\n        if len(netloc) > 2:\n            raise ValueError(\"netloc is not well-formed: %r\" % (netloc,))\n        if len(netloc) < 2:\n            return netloc[0], common.DEFAULT_PORT\n        hostname, port = netloc\n        try:\n            port = int(port)\n        except ValueError:\n            raise ValueError(\n                \"bad netloc (port must be an integer): %r\" % (netloc,)\n            )\n        return hostname, port\n\n    @classmethod\n    def split(cls, hdfs_path, user):\n        if not hdfs_path:\n            cls.raise_bad_path(hdfs_path, \"empty\")\n        scheme, netloc, path = cls.parse(hdfs_path)\n        if not scheme:\n            scheme = \"file\" if hdfs_fs.default_is_local() else \"hdfs\"\n        if scheme == \"hdfs\":\n            if not path:\n                cls.raise_bad_path(hdfs_path, \"path part is empty\")\n            if \":\" in path:\n                cls.raise_bad_path(\n                    hdfs_path, \"':' not allowed outside netloc part\"\n                )\n            hostname, port = cls.split_netloc(netloc)\n            if not path.startswith(\"/\"):\n                path = \"/user/%s/%s\" % (user, path)\n        elif scheme == \"file\":\n            hostname, port, path = \"\", 0, netloc + path\n        else:\n            cls.raise_bad_path(hdfs_path, \"unsupported scheme %r\" % scheme)\n        return hostname, port, path\n\n\ndef parse(hdfs_path):\n    \"\"\"\n    Parse the given path and return its components.\n\n    :type hdfs_path: str\n    :param hdfs_path: an HDFS path, e.g., ``hdfs://localhost:9000/user/me``\n    :rtype: tuple\n    :return: scheme, netloc, path\n    \"\"\"\n    return _HdfsPathSplitter.parse(hdfs_path)\n\n\ndef unparse(scheme, netloc, path):\n    \"\"\"\n    Construct a path from its three components (see :func:`parse`).\n    \"\"\"\n    return _HdfsPathSplitter.unparse(scheme, netloc, path)\n\n\ndef split(hdfs_path, user=None):\n    \"\"\"\n    Split ``hdfs_path`` into a (hostname, port, path) tuple.\n\n    :type hdfs_path: str\n    :param hdfs_path: an HDFS path, e.g., ``hdfs://localhost:9000/user/me``\n    :type user: str\n    :param user: user name used to resolve relative paths, defaults to the\n      current user\n    :rtype: tuple\n    :return: hostname, port, path\n    \"\"\"\n    # Use a helper class to compile URL_PATTERN once and for all\n    return _HdfsPathSplitter.split(hdfs_path, user or common.DEFAULT_USER)\n\n\ndef join(*parts):\n    \"\"\"\n    Join path name components, inserting ``/`` as needed.\n\n    If any component is an absolute path (see :func:`isabs`), all\n    previous components will be discarded.  However, full URIs (see\n    :func:`isfull`) take precedence over incomplete ones:\n\n    .. code-block:: python\n\n      >>> import pydoop.hdfs.path as hpath\n      >>> hpath.join('bar', '/foo')\n      '/foo'\n      >>> hpath.join('hdfs://host:1/', '/foo')\n      'hdfs://host:1/foo'\n\n    Note that this is *not* the reverse of :func:`split`, but rather a\n    specialized version of :func:`os.path.join`. No check is made to determine\n    whether the returned string is a valid HDFS path.\n    \"\"\"\n    try:\n        path = [parts[0]]\n    except IndexError:\n        raise TypeError(\"need at least one argument\")\n    for p in parts[1:]:\n        path[-1] = path[-1].rstrip(\"/\")\n        full = isfull(path[0])\n        if isfull(p) or (isabs(p) and not full):\n            path = [p]\n        else:\n            path.append(p.lstrip('/'))\n    return \"/\".join(path)\n\n\ndef abspath(hdfs_path, user=None, local=False):\n    \"\"\"\n    Return an absolute path for ``hdfs_path``.\n\n    The ``user`` arg is passed to :func:`split`. The ``local`` argument\n    forces ``hdfs_path`` to be interpreted as an ordinary local path:\n\n    .. code-block:: python\n\n      >>> import os\n      >>> os.chdir('/tmp')\n      >>> import pydoop.hdfs.path as hpath\n      >>> hpath.abspath('file:/tmp')\n      'file:/tmp'\n      >>> hpath.abspath('file:/tmp', local=True)\n      'file:/tmp/file:/tmp'\n\n    Note that this function always return a full URI:\n\n    .. code-block:: python\n\n      >>> import pydoop.hdfs.path as hpath\n      >>> hpath.abspath('/tmp')\n      'hdfs://localhost:9000/tmp'\n    \"\"\"\n    if local:\n        return 'file:%s' % os.path.abspath(hdfs_path)\n    if isfull(hdfs_path):\n        return hdfs_path\n    hostname, port, path = split(hdfs_path, user=user)\n    if hostname:\n        fs = hdfs_fs.hdfs(hostname, port)\n        apath = join(\"hdfs://%s:%s\" % (fs.host, fs.port), path)\n        fs.close()\n    else:\n        apath = \"file:%s\" % os.path.abspath(path)\n    return apath\n\n\ndef splitpath(hdfs_path):\n    \"\"\"\n    Split ``hdfs_path`` into a (``head``, ``tail``) pair, according to\n    the same rules as :func:`os.path.split`.\n    \"\"\"\n    return (dirname(hdfs_path), basename(hdfs_path))\n\n\ndef basename(hdfs_path):\n    \"\"\"\n    Return the final component of ``hdfs_path``.\n    \"\"\"\n    return os.path.basename(hdfs_path)\n\n\ndef dirname(hdfs_path):\n    \"\"\"\n    Return the directory component of ``hdfs_path``.\n    \"\"\"\n    scheme, netloc, path = parse(hdfs_path)\n    return unparse(scheme, netloc, os.path.dirname(path))\n\n\ndef exists(hdfs_path, user=None):\n    \"\"\"\n    Return :obj:`True` if ``hdfs_path`` exists in the default HDFS.\n    \"\"\"\n    hostname, port, path = split(hdfs_path, user=user)\n    fs = hdfs_fs.hdfs(hostname, port)\n    retval = fs.exists(path)\n    fs.close()\n    return retval\n\n\n# -- libhdfs does not support fs.FileStatus.isSymlink() --\ndef lstat(hdfs_path, user=None):\n    return stat(hdfs_path, user=user)\n\n\ndef lexists(hdfs_path, user=None):\n    return exists(hdfs_path, user=user)\n# --------------------------------------------------------\n\n\ndef kind(path, user=None):\n    \"\"\"\n    Get the kind of item (\"file\" or \"directory\") that the path references.\n\n    Return :obj:`None` if ``path`` doesn't exist.\n    \"\"\"\n    hostname, port, path = split(path, user=user)\n    fs = hdfs_fs.hdfs(hostname, port)\n    try:\n        return fs.get_path_info(path)['kind']\n    except IOError:\n        return None\n    finally:\n        fs.close()\n\n\ndef isdir(path, user=None):\n    \"\"\"\n    Return :obj:`True` if ``path`` refers to a directory.\n    \"\"\"\n    return kind(path, user) == 'directory'\n\n\ndef isfile(path, user=None):\n    \"\"\"\n    Return :obj:`True` if ``path`` refers to a file.\n    \"\"\"\n    return kind(path, user) == 'file'\n\n\ndef expanduser(path):\n    \"\"\"\n    Replace initial ``~`` or ``~user`` with the user's home directory.\n\n    **NOTE:** if the default file system is HDFS, the ``~user`` form is\n    expanded regardless of the user's existence.\n    \"\"\"\n    if hdfs_fs.default_is_local():\n        return os.path.expanduser(path)\n    m = re.match(r'^~([^/]*)', path)\n    if m is None:\n        return path\n    user = m.groups()[0] or common.DEFAULT_USER\n    return '/user/%s%s' % (user, path[m.end(1):])\n\n\ndef expandvars(path):\n    \"\"\"\n    Expand environment variables in ``path``.\n    \"\"\"\n    return os.path.expandvars(path)\n\n\ndef _update_stat(st, path_):\n    try:\n        os_st = os.stat(path_)\n    except OSError:\n        pass\n    else:\n        for name in dir(os_st):\n            if name.startswith('st_'):\n                setattr(st, name, getattr(os_st, name))\n\n\ndef stat(path, user=None):\n    \"\"\"\n    Performs the equivalent of :func:`os.stat` on ``path``, returning a\n    :class:`StatResult` object.\n    \"\"\"\n    host, port, path_ = split(path, user)\n    fs = hdfs_fs.hdfs(host, port, user)\n    retval = StatResult(fs.get_path_info(path_))\n    if not host:\n        _update_stat(retval, path_)\n    fs.close()\n    return retval\n\n\ndef getatime(path, user=None):\n    \"\"\"\n    Get time of last access of ``path``.\n    \"\"\"\n    return stat(path, user=user).st_atime\n\n\ndef getmtime(path, user=None):\n    \"\"\"\n    Get time of last modification of ``path``.\n    \"\"\"\n    return stat(path, user=user).st_mtime\n\n\ndef getctime(path, user=None):\n    \"\"\"\n    Get time of creation / last metadata change of ``path``.\n    \"\"\"\n    return stat(path, user=user).st_ctime\n\n\ndef getsize(path, user=None):\n    \"\"\"\n    Get size, in bytes, of ``path``.\n    \"\"\"\n    return stat(path, user=user).st_size\n\n\ndef isfull(path):\n    \"\"\"\n    Return :obj:`True` if ``path`` is a full URI (starts with a scheme\n    followed by a colon).\n\n    No check is made to determine whether ``path`` is a valid HDFS path.\n    \"\"\"\n    return bool(_HdfsPathSplitter.PATTERN.match(path))\n\n\ndef isabs(path):\n    \"\"\"\n    Return :obj:`True` if ``path`` is absolute.\n\n    A path is absolute if it is a full URI (see :func:`isfull`) or\n    starts with a forward slash. No check is made to determine whether\n    ``path`` is a valid HDFS path.\n    \"\"\"\n    return isfull(path) or path.startswith('/')\n\n\ndef islink(path, user=None):\n    \"\"\"\n    Return :obj:`True` if ``path`` is a symbolic link.\n\n    Currently this function always returns :obj:`False` for non-local paths.\n    \"\"\"\n    host, _, path_ = split(path, user)\n    if host:\n        return False  # libhdfs does not support fs.FileStatus.isSymlink()\n    return os.path.islink(path_)\n\n\ndef ismount(path):\n    \"\"\"\n    Return :obj:`True` if ``path`` is a mount point.\n\n    This function always returns :obj:`False` for non-local paths.\n    \"\"\"\n    host, _, path_ = split(path, None)\n    if host:\n        return False\n    return os.path.ismount(path_)\n\n\ndef normcase(path):\n    return path  # we only support Linux / OS X\n\n\ndef normpath(path):\n    \"\"\"\n    Normalize ``path``, collapsing redundant separators and up-level refs.\n    \"\"\"\n    scheme, netloc, path_ = parse(path)\n    return unparse(scheme, netloc, os.path.normpath(path_))\n\n\ndef realpath(path):\n    \"\"\"\n    Return ``path`` with symlinks resolved.\n\n    Currently this function returns non-local paths unchanged.\n    \"\"\"\n    scheme, netloc, path_ = parse(path)\n    if scheme == 'file' or hdfs_fs.default_is_local():\n        return unparse(scheme, netloc, os.path.realpath(path_))\n    return path\n\n\ndef samefile(path1, path2, user=None):\n    \"\"\"\n    Return :obj:`True` if both path arguments refer to the same path.\n    \"\"\"\n    def tr(p):\n        return abspath(normpath(realpath(p)), user=user)\n    return tr(path1) == tr(path2)\n\n\ndef splitdrive(path):\n    return '', path  # we only support Linux / OS X\n\n\ndef splitext(path):\n    \"\"\"\n    Same as :func:`os.path.splitext`.\n    \"\"\"\n    return os.path.splitext(path)\n\n\ndef access(path, mode, user=None):\n    \"\"\"\n    Perform the equivalent of :func:`os.access` on ``path``.\n    \"\"\"\n    scheme = parse(path)[0]\n    if scheme == 'file' or hdfs_fs.default_is_local():\n        return os.access(path, mode)\n    if user is None:\n        user = common.DEFAULT_USER\n    st = stat(path)\n    if st.st_uid == user:\n        mode <<= 6\n    else:\n        try:\n            groups = common.get_groups(user)\n        except KeyError:\n            # user isn't recognized on the system.  No group\n            # information available\n            groups = []\n        if st.st_gid in groups:\n            mode <<= 3\n    return (st.st_mode & mode) == mode\n\n\ndef utime(hdfs_path, times=None, user=None):\n    atime, mtime = times or 2 * (time.time(),)\n    hostname, port, path = split(hdfs_path, user=user)\n    with hdfs_fs.hdfs(hostname, port) as fs:\n        fs.utime(path, mtime, atime)\n"
  },
  {
    "path": "pydoop/jc.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nProvides a wrapper for the JobConf object.\n\"\"\"\n\n\ndef jc_wrapper(obj):\n    \"\"\"\n    Backward compatibility function to support pydoop 0.* applications\n    \"\"\"\n    return obj\n"
  },
  {
    "path": "pydoop/mapreduce/__init__.py",
    "content": ""
  },
  {
    "path": "pydoop/mapreduce/api.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\\\nThis module provides the base abstract classes used to develop MapReduce\napplication components (:class:`Mapper`, :class:`Reducer`, etc.).\n\"\"\"\n\nimport json\nfrom abc import abstractmethod\nfrom collections import namedtuple\n\nfrom pydoop.utils.py3compat import ABC\n\n\n# move to pydoop.properties?\nAVRO_IO_MODES = {'k', 'v', 'kv', 'K', 'V', 'KV'}\n\n\nclass JobConf(dict):\n    \"\"\"\n    Configuration properties assigned to this job.\n\n    JobConf objects are instantiated by the framework and support the\n    same interface as dictionaries, plus a few methods that perform\n    automatic type conversion::\n\n      >>> jc['a']\n      '1'\n      >>> jc.get_int('a')\n      1\n    \"\"\"\n    def get_int(self, key, default=None):\n        \"\"\"\n        Same as :meth:`dict.get`, but the value is converted to an int.\n        \"\"\"\n        value = self.get(key, default)\n        return None if value is None else int(value)\n\n    def get_float(self, key, default=None):\n        \"\"\"\n        Same as :meth:`dict.get`, but the value is converted to a float.\n        \"\"\"\n        value = self.get(key, default)\n        return None if value is None else float(value)\n\n    def get_bool(self, key, default=None):\n        \"\"\"\n        Same as :meth:`dict.get`, but the value is converted to a bool.\n\n        The boolean value is considered, respectively, :obj:`True` or\n        :obj:`False` if the string is equal, ignoring case, to\n        ``'true'`` or ``'false'``.\n        \"\"\"\n        v = self.get(key, default)\n        if v != default:\n            v = v.strip().lower()\n            if v == 'true':\n                v = True\n            elif v == 'false':\n                v = False\n            elif default is None:\n                raise RuntimeError(\"invalid bool string: %s\" % v)\n            else:\n                v = default\n        return v\n\n    def get_json(self, key, default=None):\n        value = self.get(key, default)\n        return None if value is None else json.loads(value)\n\n\nclass InputSplit(object):\n    \"\"\"\\\n    Represents a subset of the input data assigned to a single map task.\n\n    ``InputSplit`` objects are created by the framework and made available\n    to the user application via the ``input_split`` context attribute.\n    \"\"\"\n    pass\n\n\nclass FileSplit(InputSplit,\n                namedtuple(\"FileSplit\", \"filename, offset, length\")):\n    \"\"\"\\\n    A subset (described by offset and length) of an input file.\n    \"\"\"\n    pass\n\n\nclass OpaqueSplit(InputSplit, namedtuple(\"OpaqueSplit\", \"payload\")):\n    \"\"\"\\\n    A wrapper for an arbitrary Python object.\n\n    Opaque splits are created on the Python side before job submission,\n    serialized as ``hadoop.io.Writable`` objects and stored in an HDFS file.\n    The Java submitter reads the splits from the above file and forwards them\n    to the Python tasks.\n\n    .. note::\n\n      Opaque splits are only available when running a job via ``pydoop\n      submit``. The HDFS path where splits are stored is specified via the\n      ``pydoop.mapreduce.pipes.externalsplits.uri`` configuration key.\n    \"\"\"\n    pass\n\n\nclass Context(ABC):\n    \"\"\"\n    Context objects are used for communication between the framework\n    and the Mapreduce application.  These objects are instantiated by the\n    framework and passed to user methods as parameters::\n\n      class Mapper(api.Mapper):\n\n          def map(self, context):\n              key, value = context.key, context.value\n              ...\n              context.emit(new_key, new_value)\n    \"\"\"\n\n    @property\n    def input_split(self):\n        \"\"\"\\\n        The :class:`InputSplit` for this task (map tasks only).\n\n        This tries to deserialize the raw split sent from upstream. In the\n        most common scenario (file-based input format), the returned value\n        will be a :class:`FileSplit`.\n\n        To get the raw split, call :meth:`get_input_split` with ``raw=True``.\n        \"\"\"\n        return self.get_input_split()\n\n    @abstractmethod\n    def get_input_split(self, raw=False):\n        pass\n\n    @property\n    def job_conf(self):\n        \"\"\"\n        MapReduce job configuration as a :class:`JobConf` object.\n        \"\"\"\n        return self.get_job_conf()\n\n    @abstractmethod\n    def get_job_conf(self):\n        pass\n\n    @property\n    def key(self):\n        \"\"\"\n        Input key.\n        \"\"\"\n        return self.get_input_key()\n\n    @abstractmethod\n    def get_input_key(self):\n        pass\n\n    @property\n    def value(self):\n        \"\"\"\n        Input value (map tasks only).\n        \"\"\"\n        return self.get_input_value()\n\n    @abstractmethod\n    def get_input_value(self):\n        pass\n\n    @property\n    def values(self):\n        \"\"\"\n        Iterator over all values for the current key (reduce tasks only).\n        \"\"\"\n        return self.get_input_values()\n\n    @abstractmethod\n    def get_input_values(self):\n        pass\n\n    @abstractmethod\n    def emit(self, key, value):\n        \"\"\"\n        Emit a key, value pair to the framework.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def progress(self):\n        pass\n\n    @abstractmethod\n    def set_status(self, status):\n        \"\"\"\n        Set the current status.\n\n        :type status: str\n        :param status: a description of the current status\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def get_counter(self, group, name):\n        \"\"\"\n        Get a :class:`Counter` from the framework.\n\n        :type group: str\n        :param group: counter group name\n        :type name: str\n        :param name: counter name\n\n        The counter can be updated via :meth:`increment_counter`.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def increment_counter(self, counter, amount):\n        \"\"\"\n        Update a :class:`Counter` by the specified amount.\n        \"\"\"\n        pass\n\n\nclass Closable(object):\n\n    def close(self):\n        \"\"\"\n        Called after the object has finished its job.\n\n        Overriding this method is **not** required.\n        \"\"\"\n        pass\n\n\nclass Component(ABC):\n\n    def __init__(self, context):\n        self.context = context\n\n\nclass Mapper(Component, Closable):\n    \"\"\"\n    Maps input key/value pairs to a set of intermediate key/value pairs.\n    \"\"\"\n\n    @abstractmethod\n    def map(self, context):\n        \"\"\"\n        Called once for each key/value pair in the input\n        split. Applications must override this, emitting an output\n        key/value pair through the context.\n\n        :type context: :class:`Context`\n        :param context: the context object passed by the\n          framework, used to get the input key/value pair and emit the\n          output key/value pair.\n        \"\"\"\n        pass\n\n\nclass Reducer(Component, Closable):\n    \"\"\"\n    Reduces a set of intermediate values which share a key to a\n    (possibly) smaller set of values.\n    \"\"\"\n\n    @abstractmethod\n    def reduce(self, context):\n        \"\"\"\n        Called once for each key. Applications must override this, emitting\n        an output key/value pair through the context.\n\n        :type context: :class:`Context`\n        :param context: the context object passed by\n          the framework, used to get the input key and corresponding\n          set of values and emit the output key/value pair.\n        \"\"\"\n        pass\n\n\nclass Combiner(Reducer):\n    \"\"\"\\\n    A ``Combiner`` performs the same actions as a :class:`Reducer`, but it\n    runs locally within a map task. This helps cutting down the amount of data\n    sent to reducers across the network, with the downside that map tasks\n    require extra memory to cache intermediate key/value pairs. The cache size\n    is controlled by ``\"mapreduce.task.io.sort.mb\"`` and defaults to 100 MB.\n\n    Note that it's not strictly necessary to extend this class in order to\n    write a combiner: all that's required is that it has the same interface as\n    a :class:`reducer`. Indeed, in many cases it's useful to set the combiner\n    class to be the same as the reducer class.\n    \"\"\"\n    pass\n\n\nclass Partitioner(Component):\n    r\"\"\"\n    Controls the partitioning of intermediate keys output by the\n    :class:`Mapper`\\ . The key (or a subset of it) is used to derive the\n    partition, typically by a hash function. The total number of\n    partitions is the same as the number of reduce tasks for the\n    job. Hence this controls which of the *m* reduce tasks the\n    intermediate key (and hence the record) is sent to for reduction.\n    \"\"\"\n\n    @abstractmethod\n    def partition(self, key, num_of_reduces):\n        r\"\"\"\n        Get the partition number for ``key`` given the total number of\n        partitions, i.e., the number of reduce tasks for the\n        job. Applications must override this.\n\n        :type key: str\n        :param key: the key of the key/value pair being dispatched.\n        :type numOfReduces: int\n        :param numOfReduces: the total number of reduces.\n        :rtype: int\n        :return: the partition number for ``key``\\ .\n        \"\"\"\n        pass\n\n\nclass RecordReader(Component, Closable):\n    r\"\"\"\n    Breaks the data into key/value pairs for input to the :class:`Mapper`\\ .\n    \"\"\"\n\n    def __iter__(self):\n        return self\n\n    @abstractmethod\n    def next(self):\n        r\"\"\"\n        Called by the framework to provide a key/value pair to the\n        :class:`Mapper`\\ . Applications must override this, making\n        sure it raises :exc:`~exceptions.StopIteration` when there are no more\n        records to process.\n\n        :rtype: tuple\n        :return: a tuple of two elements. They are, respectively, the\n          key and the value (as strings)\n        \"\"\"\n        raise StopIteration\n\n    def __next__(self):\n        return self.next()\n\n    @abstractmethod\n    def get_progress(self):\n        \"\"\"\n        The current progress of the record reader through its data.\n\n        :rtype: float\n        :return: the fraction of data read up to now, as a float between 0\n          and 1.\n        \"\"\"\n        pass\n\n\nclass RecordWriter(Component, Closable):\n    \"\"\"\n    Writes the output key/value pairs to an output file.\n    \"\"\"\n\n    @abstractmethod\n    def emit(self, key, value):\n        \"\"\"\n        Writes a key/value pair. Applications must override this.\n\n        :type key: str\n        :param key: a final output key\n        :type value: str\n        :param value: a final output value\n        \"\"\"\n        pass\n\n\nclass Factory(ABC):\n    \"\"\"\\\n    Creates MapReduce application components (e.g., mapper, reducer).\n\n    A factory object must be created by the application and passed to the\n    framework as the first argument to :func:`~.pipes.run_task`. All MapReduce\n    applications need at least a mapper object, while other components are\n    optional (the corresponding ``create_`` method can return :obj:`None`).\n    Note that the reducer is optional only in map-only jobs, where the number\n    of reduce tasks has been set to 0.\n\n    :class:`~.pipes.Factory` provides a generic implementation that takes\n    component *classes* as initialization parameters and creates component\n    objects as needed.\n    \"\"\"\n\n    @abstractmethod\n    def create_mapper(self, context):\n        pass\n\n    def create_reducer(self, context):\n        return None\n\n    def create_combiner(self, context):\n        \"\"\"\n        Create a combiner object.\n\n        Return the new combiner or :obj:`None`, if one is not needed.\n        \"\"\"\n        return None\n\n    def create_partitioner(self, context):\n        \"\"\"\n        Create a partitioner object.\n\n        Return the new partitioner or :obj:`None`, if the default partitioner\n        should be used.\n        \"\"\"\n        return None\n\n    def create_record_reader(self, context):\n        \"\"\"\n        Create a record reader object.\n\n        Return the new record reader or :obj:`None`, if the Java record\n        reader should be used.\n        \"\"\"\n        return None\n\n    def create_record_writer(self, context):\n        \"\"\"\n        Create an application record writer.\n\n        Return the new record writer or :obj:`None`, if the Java record\n        writer should be used.\n        \"\"\"\n        return None\n"
  },
  {
    "path": "pydoop/mapreduce/binary_protocol.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\\\nClient side of the Hadoop pipes protocol.\n\nRef: ``org.apache.hadoop.mapred.pipes.BinaryProtocol``.\n\"\"\"\n\nimport os\ntry:\n    from cPickle import loads\nexcept ImportError:\n    from pickle import loads\nfrom itertools import groupby\nfrom operator import itemgetter\n\nimport pydoop.config as config\nfrom .api import AVRO_IO_MODES, JobConf\n\n\nPROTOCOL_VERSION = 0\n\n# We can use an enum.IntEnum after dropping Python2 compatibility\nSTART = 0\nSET_JOB_CONF = 1\nSET_INPUT_TYPES = 2\nRUN_MAP = 3\nMAP_ITEM = 4\nRUN_REDUCE = 5\nREDUCE_KEY = 6\nREDUCE_VALUE = 7\nCLOSE = 8\nABORT = 9\nAUTHENTICATION_REQ = 10\nOUTPUT = 50\nPARTITIONED_OUTPUT = 51\nSTATUS = 52\nPROGRESS = 53\nDONE = 54\nREGISTER_COUNTER = 55\nINCREMENT_COUNTER = 56\nAUTHENTICATION_RESP = 57\n\nCMD_REPR = {\n    START: \"START\",\n    SET_JOB_CONF: \"SET_JOB_CONF\",\n    SET_INPUT_TYPES: \"SET_INPUT_TYPES\",\n    RUN_MAP: \"RUN_MAP\",\n    MAP_ITEM: \"MAP_ITEM\",\n    RUN_REDUCE: \"RUN_REDUCE\",\n    REDUCE_KEY: \"REDUCE_KEY\",\n    REDUCE_VALUE: \"REDUCE_VALUE\",\n    CLOSE: \"CLOSE\",\n    ABORT: \"ABORT\",\n    AUTHENTICATION_REQ: \"AUTHENTICATION_REQ\",\n    OUTPUT: \"OUTPUT\",\n    PARTITIONED_OUTPUT: \"PARTITIONED_OUTPUT\",\n    STATUS: \"STATUS\",\n    PROGRESS: \"PROGRESS\",\n    DONE: \"DONE\",\n    REGISTER_COUNTER: \"REGISTER_COUNTER\",\n    INCREMENT_COUNTER: \"INCREMENT_COUNTER\",\n    AUTHENTICATION_RESP: \"AUTHENTICATION_RESP\",\n}\n\nIS_JAVA_RW = \"mapreduce.pipes.isjavarecordwriter\"\n\n\ndef get_password():\n    try:\n        pass_fn = os.environ[\"hadoop.pipes.shared.secret.location\"]\n    except KeyError:\n        return None\n    with open(pass_fn, \"rb\") as f:\n        return f.read()\n\n\n# _get_* functions to patch the downlink according to the chosen\n# deserialization policy (see below)\n\ndef _get_LongWritable(downlink):\n    assert downlink.stream.read_vint() == 8\n    return downlink.stream.read_long_writable()\n\n\ndef _get_Text(downlink):\n    return downlink.stream.read_string()\n\n\nDESERIALIZERS = {\n    \"org.apache.hadoop.io.LongWritable\": _get_LongWritable,\n    \"org.apache.hadoop.io.Text\": _get_Text,\n}\n\n\ndef _get_avro_key(downlink):\n    raw = downlink.stream.read_bytes()\n    return downlink.avro_key_deserializer.deserialize(raw)\n\n\ndef _get_avro_value(downlink):\n    raw = downlink.stream.read_bytes()\n    return downlink.avro_value_deserializer.deserialize(raw)\n\n\ndef _get_pickled(downlink):\n    return loads(downlink.stream.read_bytes())\n\n\nclass Downlink(object):\n    \"\"\"\\\n    Reads and executes pipes commands as directed by upstream.\n\n    The downlink drives the entire MapReduce task, plugging in user components\n    and calling their methods as necessary. A task can be either a **map**\n    task or a **reduce** task, but this is not known until after a few initial\n    commands, as shown below.\n\n    All tasks start with the following commands::\n\n        AUTHENTICATION_REQ\n        START\n        SET_JOB_CONF\n\n    Map tasks follow up with::\n\n        RUN_MAP\n        if java_reader:\n            SET_INPUT_TYPES\n            for k, v in input:\n                MAP_ITEM\n            CLOSE\n\n    Reduce tasks follow up with::\n\n        RUN_REDUCE\n        for k in input:\n            REDUCE_KEY\n            for v in values_for(k):\n                REDUCE_VALUE\n        CLOSE\n\n    In both cases, the inner loop consists of handling the key/value\n    stream. All the code involved in this process, namely:\n\n      * reading and optionally deserializing input keys and values\n      * calling user methods\n      * emitting output keys and values back to upstream\n\n    must be as efficient as possible. For this reason, rather than having the\n    ``get_{k,v}`` methods go through a complex ``if`` tree at every call, we\n    patch the class itself by replacing each method with the one appropriate\n    for the current scenario. Note that we can do this because:\n\n      * the deserialization policy (including no deserialization) is the same\n        for all items of a given kind (key or value), meaning that an ``if``\n        tree would pick the same branch for the entire process\n      * there is only one Downlink object in the process, so we don't risk\n        altering the behavior of other instances\n      * the Downlink object is not part of the client API (it's not passed to\n        user code at all)\n\n    Job conf deserialization also needs to be somewhat efficient, since it\n    involves reading thousands of strings.\n    \"\"\"\n\n    def __init__(self, istream, context, **kwargs):\n        self.stream = istream\n        self.context = context\n        self.raw_k = kwargs.get(\"raw_keys\", False)\n        self.raw_v = kwargs.get(\"raw_values\", False)\n        self.password = get_password()\n        self.auth_done = False\n        self.avro_key_deserializer = None\n        self.avro_value_deserializer = None\n\n    def close(self):\n        self.stream.close()\n\n    def read_job_conf(self):\n        n = self.stream.read_vint()\n        if n & 1:\n            raise RuntimeError(\"number of items is not even\")\n        t = self.stream.read_tuple(n * 's')\n        return JobConf(t[i: i + 2] for i in range(0, n, 2))\n\n    def verify_digest(self, digest, challenge):\n        if self.password is not None:\n            self.context._authenticate(self.password, digest, challenge)\n        # self.password is None: assume reading from cmd file\n        self.auth_done = True\n\n    def setup_record_writer(self, piped_output):\n        writer = self.context.create_record_writer()\n        if writer and piped_output:\n            raise RuntimeError(\"record writer defined when not needed\")\n        if not writer and not piped_output:\n            raise RuntimeError(\"record writer not defined\")\n\n    def get_k(self):\n        return self.stream.read_bytes()\n\n    def get_v(self):\n        return self.stream.read_bytes()\n\n    def setup_avro_deser(self):\n        try:\n            from pydoop.avrolib import AvroDeserializer\n        except ImportError as e:\n            raise RuntimeError(\"cannot handle avro input: %s\" % e)\n        jc = self.context.job_conf\n        avro_input = jc.get(config.AVRO_INPUT).upper()\n        if avro_input not in AVRO_IO_MODES:\n            raise RuntimeError('invalid avro input mode: %s' % avro_input)\n        if avro_input == 'K' or avro_input == 'KV' and not self.raw_k:\n            schema = jc.get(config.AVRO_KEY_INPUT_SCHEMA)\n            self.avro_key_deserializer = AvroDeserializer(schema)\n            self.__class__.get_k = _get_avro_key\n        if avro_input == 'V' or avro_input == 'KV' and not self.raw_v:\n            schema = jc.get(config.AVRO_VALUE_INPUT_SCHEMA)\n            self.avro_value_deserializer = AvroDeserializer(schema)\n            self.__class__.get_v = _get_avro_value\n\n    def setup_deser(self, key_type, value_type):\n        if not self.raw_k:\n            d = DESERIALIZERS.get(key_type)\n            if d is not None:\n                self.__class__.get_k = d\n        if not self.raw_v:\n            d = DESERIALIZERS.get(value_type)\n            if d is not None:\n                self.__class__.get_v = d\n\n    def __next__(self):\n        cmd = self.stream.read_vint()\n        if cmd != AUTHENTICATION_REQ and not self.auth_done:\n            raise RuntimeError(\"%d received before authentication\" % cmd)\n        if cmd == AUTHENTICATION_REQ:\n            digest, challenge = self.stream.read_tuple('bb')\n            self.verify_digest(digest, challenge)\n        elif cmd == START:\n            v = self.stream.read_vint()\n            if (v != PROTOCOL_VERSION):\n                raise RuntimeError(\"Unknown protocol id: %d\" % v)\n        elif cmd == SET_JOB_CONF:\n            self.context._job_conf = self.read_job_conf()\n            if config.AVRO_OUTPUT in self.context.job_conf:\n                self.context._setup_avro_ser()\n        elif cmd == RUN_MAP:\n            self.context.task_type = \"m\"\n            split, nred, piped_input = self.stream.read_tuple('bii')\n            self.context._raw_split = split\n            reader = self.context.create_record_reader()\n            if reader and piped_input:\n                raise RuntimeError(\"record reader defined when not needed\")\n            if not reader and not piped_input:\n                raise RuntimeError(\"record reader not defined\")\n            combiner = self.context.create_combiner()\n            if nred < 1:  # map-only job\n                if combiner:\n                    raise RuntimeError(\"combiner defined in map-only job\")\n                self.context._private_encoding = False\n                piped_output = self.context.job_conf.get_bool(IS_JAVA_RW)\n                self.setup_record_writer(piped_output)\n            self.context.nred = nred\n            self.context.create_mapper()\n            self.context.create_partitioner()\n            if reader:\n                for self.context._key, self.context._value in reader:\n                    self.context.mapper.map(self.context)\n                    self.context.progress_value = reader.get_progress()\n                    self.context.progress()\n                # no more commands from upstream, not even CLOSE\n                try:\n                    self.context.close()\n                finally:\n                    raise StopIteration\n        elif cmd == SET_INPUT_TYPES:\n            key_type, value_type = self.stream.read_tuple('ss')\n            if config.AVRO_INPUT in self.context.job_conf:\n                self.setup_avro_deser()\n            else:\n                self.setup_deser(key_type, value_type)\n        elif cmd == MAP_ITEM:\n            self.context._key = self.get_k()\n            self.context._value = self.get_v()\n            self.context.mapper.map(self.context)\n        elif cmd == RUN_REDUCE:\n            self.context.task_type = \"r\"\n            part, piped_output = self.stream.read_tuple('ii')\n            # for some reason, part is always 0\n            self.context.create_reducer()\n            self.setup_record_writer(piped_output)\n            if self.context._private_encoding:\n                self.__class__.get_k = _get_pickled\n                self.__class__.get_v = _get_pickled\n            for cmd, subs in groupby(self, itemgetter(0)):\n                if cmd == REDUCE_KEY:\n                    _, self.context._key = next(subs)\n                if cmd == REDUCE_VALUE:\n                    self.context._values = (v for _, v in subs)\n                    self.context.reducer.reduce(self.context)\n                if cmd == CLOSE:\n                    try:\n                        self.context.close()\n                    finally:\n                        raise StopIteration\n        elif cmd == REDUCE_KEY:\n            k = self.get_k()\n            return cmd, k  # pass on to RUN_REDUCE iterator\n        elif cmd == REDUCE_VALUE:\n            v = self.get_v()\n            return cmd, v  # pass on to RUN_REDUCE iterator\n        elif cmd == ABORT:\n            raise RuntimeError(\"received ABORT command\")\n        elif cmd == CLOSE:\n            if self.context.mapper:\n                try:\n                    self.context.close()\n                finally:\n                    raise StopIteration\n            else:\n                return cmd, None  # pass on to RUN_REDUCE iterator\n        else:\n            raise RuntimeError(\"unknown command: %d\" % cmd)\n\n    def __iter__(self):\n        return self\n\n    # py2 compat\n    def next(self):\n        return self.__next__()\n\n\nclass Uplink(object):\n    \"\"\"\\\n    Writes all information that needs to be sent upstream.\n    \"\"\"\n\n    def __init__(self, stream):\n        self.stream = stream\n\n    def flush(self):\n        self.stream.flush()\n\n    def close(self):\n        self.stream.close()\n\n    # pipes commands\n\n    def authenticate(self, response_digest):\n        self.stream.write_tuple(\"ib\", (AUTHENTICATION_RESP, response_digest))\n\n    def output(self, k, v):\n        self.stream.write_output(k, v)\n\n    def partitioned_output(self, part, k, v):\n        self.stream.write_output(k, v, part)\n\n    def status(self, msg):\n        self.stream.write_tuple(\"is\", (STATUS, msg))\n\n    def progress(self, p):\n        self.stream.write_tuple(\"if\", (PROGRESS, p))\n\n    def done(self):\n        self.stream.write_vint(DONE)\n\n    def register_counter(self, id, group, name):\n        self.stream.write_tuple(\"iiss\", (REGISTER_COUNTER, id, group, name))\n\n    def increment_counter(self, id, amount):\n        self.stream.write_tuple(\"iil\", (INCREMENT_COUNTER, id, amount))\n"
  },
  {
    "path": "pydoop/mapreduce/connections.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\\\nSet up communication channels with the MapReduce framework.\n\nIf \"mapreduce.pipes.command.port\" is in the env, this is a \"real\" Hadoop task:\nwe have to connect to the given port and use the socket for live communication\nwith the Java submitter.\n\nIf the above env variable is not defined, but \"mapreduce.pipes.commandfile\"\nis, a pre-compiled binary file containing the entire command list from\nupstream is available at the specified (local) filesystem path.\n\"\"\"\n\nimport os\nimport socket\n\nimport pydoop.sercore as sercore\nfrom .binary_protocol import Downlink, Uplink\n\n\nclass Connection(object):\n    \"\"\"\\\n    Create up/down links and set up references.\n\n    The ref chain is ``downlink -> context -> uplink``, where ``downlink ->\n    context`` is an owned ref and ``context -> uplink`` is a borrowed one\n    (owner is responsible for closing, borrower must **not** close).\n\n    Other refs::\n\n      downlink -> istream (owned)\n      uplink -> ostream (owned)\n      connection -> downlink (owned)\n      connection -> uplink (owned)\n\n    Connection keeps no reference at all to either istream or ostream.\n    \"\"\"\n\n    def __init__(self, context, istream, ostream, **kwargs):\n        self.uplink = context.uplink = Uplink(ostream)\n        self.downlink = Downlink(istream, context, **kwargs)\n\n    def close(self):\n        self.uplink.close()\n        self.downlink.close()\n\n    def __enter__(self):\n        return self\n\n    def __exit__(self, *args):\n        self.close()\n\n\nclass NetworkConnection(Connection):\n\n    def __init__(self, context, host, port, **kwargs):\n        self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n        self.socket.connect((host, port))\n        istream = sercore.FileInStream(self.socket)\n        ostream = sercore.FileOutStream(self.socket)\n        super(NetworkConnection, self).__init__(\n            context, istream, ostream, **kwargs\n        )\n\n    def close(self):\n        super(NetworkConnection, self).close()\n        self.socket.close()\n\n\nclass FileConnection(Connection):\n\n    def __init__(self, context, in_fn, out_fn, **kwargs):\n        istream = sercore.FileInStream(in_fn)\n        ostream = sercore.FileOutStream(out_fn)\n        super(FileConnection, self).__init__(\n            context, istream, ostream, **kwargs\n        )\n\n\ndef get_connection(context, **kwargs):\n    port = os.getenv(\"mapreduce.pipes.command.port\")\n    if port:\n        return NetworkConnection(context, \"localhost\", int(port), **kwargs)\n    in_fn = os.getenv(\"mapreduce.pipes.commandfile\")\n    if in_fn:\n        out_fn = \"%s.out\" % in_fn\n        return FileConnection(context, in_fn, out_fn, **kwargs)\n    raise RuntimeError(\"no pipes source found\")\n"
  },
  {
    "path": "pydoop/mapreduce/pipes.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\\\nPython driver for Hadoop Pipes tasks.\n\nThe intended usage is to import this module in the executable script passed to\n``mapred pipes`` (or ``pydoop submit``) and call ``run_task`` with the\nappropriate arguments (see the docs and examples for further details).\n\"\"\"\n\nimport base64\nimport hashlib\nimport hmac\nimport io\nimport os\nimport struct\n\ntry:\n    from cPickle import dumps, loads, HIGHEST_PROTOCOL\nexcept ImportError:\n    from pickle import dumps, loads, HIGHEST_PROTOCOL\nfrom time import time\nfrom sys import getsizeof as sizeof\n\nimport pydoop.config as config\nimport pydoop.sercore as sercore\n\nfrom . import api, connections\n\n# py2 compat\ntry:\n    as_text = unicode\nexcept NameError:\n    as_text = str\n\nPSTATS_DIR = \"PYDOOP_PSTATS_DIR\"\nPSTATS_FMT = \"PYDOOP_PSTATS_FMT\"\nDEFAULT_PSTATS_FMT = \"%s_%05d_%s\"  # task_type, task_id, random suffix\n\nINT_WRITABLE_FMT = \">i\"\nINT_WRITABLE_SIZE = struct.calcsize(INT_WRITABLE_FMT)\n\n\ndef create_digest(key, msg):\n    h = hmac.new(key, msg, hashlib.sha1)\n    return base64.b64encode(h.digest())\n\n\n# extra support for java types, not meant for performance-critical sections\n\ndef read_int_writable(f):\n    buf = f.read(INT_WRITABLE_SIZE)\n    return struct.unpack(INT_WRITABLE_FMT, buf)[0]\n\n\ndef write_int_writable(n, f):\n    f.write(struct.pack(INT_WRITABLE_FMT, n))\n\n\ndef read_bytes_writable(f):\n    length = read_int_writable(f)\n    buf = f.read(length)\n    if len(buf) < length:\n        raise RuntimeError(\"expected %d bytes, found %d\" % (length, len(buf)))\n    return buf\n\n\ndef write_bytes_writable(s, f):\n    write_int_writable(len(s), f)\n    if len(s) > 0:\n        f.write(s)\n\n\nclass FileSplit(api.FileSplit):\n\n    @classmethod\n    def frombuffer(cls, buf):\n        filename, offset, length = sercore.deserialize_file_split(buf)\n        return cls(filename, offset, length)\n\n\nclass OpaqueSplit(api.OpaqueSplit):\n\n    @classmethod\n    def frombuffer(cls, buf):\n        return cls.read(io.BytesIO(buf))\n\n    @classmethod\n    def read(cls, f):\n        return cls(loads(read_bytes_writable(f)))\n\n    def write(self, f):\n        write_bytes_writable(dumps(self.payload, HIGHEST_PROTOCOL), f)\n\n\ndef write_opaque_splits(splits, f):\n    write_int_writable(len(splits), f)\n    for s in splits:\n        s.write(f)\n\n\ndef read_opaque_splits(f):\n    n = read_int_writable(f)\n    return [OpaqueSplit.read(f) for _ in range(n)]\n\n\nclass TaskContext(api.Context):\n\n    JOB_OUTPUT_DIR = \"mapreduce.output.fileoutputformat.outputdir\"\n    TASK_OUTPUT_DIR = \"mapreduce.task.output.dir\"\n    TASK_PARTITION = \"mapreduce.task.partition\"\n\n    def __init__(self, factory, **kwargs):\n        self.factory = factory\n        self.uplink = None\n        self.combiner = None\n        self.mapper = None\n        self.partitioner = None\n        self.record_reader = None\n        self.record_writer = None\n        self.reducer = None\n        self.nred = None\n        self.progress_value = 0.0\n        self.last_progress_t = 0.0\n        self.status = None\n        self.counters = {}\n        self.task_type = None\n        self.avro_key_serializer = None\n        self.avro_value_serializer = None\n        self._private_encoding = kwargs.get(\"private_encoding\", True)\n        self._raw_split = None\n        self._input_split = None\n        self._job_conf = {}\n        self._key = None\n        self._value = None\n        self._values = None\n        self.__auto_serialize = kwargs.get(\"auto_serialize\", True)\n        self.__cache = {}\n        self.__cache_size = 0\n        self.__spill_size = None  # delayed until (if) create_combiner\n        self.__spilling = True  # enable actual emit\n\n    def get_input_split(self, raw=False):\n        if raw:\n            return self._raw_split\n        if not self._input_split:\n            if config.PIPES_EXTERNALSPLITS_URI in self._job_conf:\n                self._input_split = OpaqueSplit.frombuffer(self._raw_split)\n            else:\n                self._input_split = FileSplit.frombuffer(self._raw_split)\n        return self._input_split\n\n    def get_job_conf(self):\n        return self._job_conf\n\n    def get_input_key(self):\n        return self._key\n\n    def get_input_value(self):\n        return self._value\n\n    def get_input_values(self):\n        return self._values\n\n    def create_combiner(self):\n        self.combiner = self.factory.create_combiner(self)\n        if self.combiner:\n            self.__spill_size = 1024 * 1024 * self.job_conf.get_int(\n                \"mapreduce.task.io.sort.mb\", 100\n            )\n            self.__spilling = False\n        return self.combiner\n\n    def create_mapper(self):\n        self.mapper = self.factory.create_mapper(self)\n        return self.mapper\n\n    def create_partitioner(self):\n        self.partitioner = self.factory.create_partitioner(self)\n        return self.partitioner\n\n    def create_record_reader(self):\n        self.record_reader = self.factory.create_record_reader(self)\n        return self.record_reader\n\n    def create_record_writer(self):\n        self.record_writer = self.factory.create_record_writer(self)\n        return self.record_writer\n\n    def create_reducer(self):\n        self.reducer = self.factory.create_reducer(self)\n        return self.reducer\n\n    def progress(self):\n        \"\"\"\\\n        Report progress to the Java side.\n\n        This needs to flush the uplink stream, but too many flushes can\n        disrupt performance, so we actually talk to upstream once per second.\n        \"\"\"\n        now = time()\n        if now - self.last_progress_t > 1:\n            self.last_progress_t = now\n            if self.status:\n                self.uplink.status(self.status)\n                self.status = None\n            self.__spill_counters()\n            self.uplink.progress(self.progress_value)\n            self.uplink.flush()\n\n    def set_status(self, status):\n        self.status = status\n        self.progress()\n\n    def get_counter(self, group, name):\n        id = len(self.counters)\n        self.uplink.register_counter(id, group, name)\n        self.uplink.flush()\n        self.counters[id] = 0\n        return id\n\n    def increment_counter(self, counter, amount):\n        try:\n            self.counters[counter] += amount\n        except KeyError:\n            raise ValueError(\"invalid counter: %r\" % (counter,))\n\n    def __spill_counters(self):\n        for c, amount in self.counters.items():\n            if amount:\n                self.uplink.increment_counter(c, amount)\n                self.counters[c] = 0\n\n    def _authenticate(self, password, digest, challenge):\n        if create_digest(password, challenge) != digest:\n            raise RuntimeError(\"server failed to authenticate\")\n        response_digest = create_digest(password, digest)\n        self.uplink.authenticate(response_digest)\n        self.uplink.flush()\n\n    def _setup_avro_ser(self):\n        try:\n            from pydoop.avrolib import AvroSerializer\n        except ImportError as e:\n            raise RuntimeError(\"cannot handle avro output: %s\" % e)\n        jc = self.job_conf\n        avro_output = jc.get(config.AVRO_OUTPUT).upper()\n        if avro_output not in api.AVRO_IO_MODES:\n            raise RuntimeError('invalid avro output mode: %s' % avro_output)\n        if avro_output == 'K' or avro_output == 'KV':\n            schema = jc.get(config.AVRO_KEY_OUTPUT_SCHEMA)\n            self.avro_key_serializer = AvroSerializer(schema)\n        if avro_output == 'V' or avro_output == 'KV':\n            schema = jc.get(config.AVRO_VALUE_OUTPUT_SCHEMA)\n            self.avro_value_serializer = AvroSerializer(schema)\n\n    def __maybe_serialize(self, key, value):\n        if self.task_type == \"m\" and self._private_encoding:\n            return dumps(key, HIGHEST_PROTOCOL), dumps(value, HIGHEST_PROTOCOL)\n        if self.avro_key_serializer:\n            key = self.avro_key_serializer.serialize(key)\n        elif self.__auto_serialize:\n            key = as_text(key).encode(\"utf-8\")\n        if self.avro_value_serializer:\n            value = self.avro_value_serializer.serialize(value)\n        elif self.__auto_serialize:\n            value = as_text(value).encode(\"utf-8\")\n        return key, value\n\n    def emit(self, key, value):\n        \"\"\"\\\n        Handle an output key/value pair.\n\n        Reporting progress is strictly necessary only when using a Python\n        record writer, because sending an output key/value pair is an implicit\n        progress report. To take advantage of this, however, we would be\n        forced to flush the uplink stream at every output, and that would be\n        too costly. Rather than add a specific timer for this, we just call\n        progress unconditionally and piggyback on its timer instead. Note that\n        when a combiner is caching there is no actual output, so in that case\n        we would need an explicit progress report anyway.\n        \"\"\"\n        if self.__spilling:\n            self.__actual_emit(key, value)\n        else:\n            # key must be hashable\n            self.__cache.setdefault(key, []).append(value)\n            self.__cache_size += sizeof(key) + sizeof(value)\n            if self.__cache_size >= self.__spill_size:\n                self.__spill_all()\n        self.progress()\n\n    def __actual_emit(self, key, value):\n        if self.record_writer:\n            self.record_writer.emit(key, value)\n            return\n        key, value = self.__maybe_serialize(key, value)\n        if self.partitioner:\n            part = self.partitioner.partition(key, self.nred)\n            self.uplink.partitioned_output(part, key, value)\n        else:\n            self.uplink.output(key, value)\n\n    def __spill_all(self):\n        self.__spilling = True\n        for k in sorted(self.__cache):\n            self._key = k\n            self._values = iter(self.__cache[k])\n            self.combiner.reduce(self)\n        self.__cache.clear()\n        self.__cache_size = 0\n        self.__spilling = False\n\n    def close(self):\n        self.uplink.flush()\n        # do *not* call uplink.done while user components are still active\n        try:\n            if self.mapper:\n                self.mapper.close()\n            # handle combiner after mapper (mapper.close can call emit)\n            if self.__cache:\n                self.__spill_all()\n                self.__spilling = True  # re-enable emit for combiner.close\n                self.combiner.close()\n            if self.record_reader:\n                self.record_reader.close()\n            if self.record_writer:\n                self.record_writer.close()\n            if self.reducer:\n                self.reducer.close()\n            self.__spill_counters()\n        finally:\n            self.uplink.done()\n            self.uplink.flush()\n\n    def get_output_dir(self):\n        return self.job_conf[self.JOB_OUTPUT_DIR]\n\n    def get_work_path(self):\n        try:\n            return self.job_conf[self.TASK_OUTPUT_DIR]\n        except KeyError:\n            raise RuntimeError(\"%r not set\" % (self.TASK_OUTPUT_DIR,))\n\n    def get_task_partition(self):\n        return self.job_conf.get_int(self.TASK_PARTITION)\n\n    def get_default_work_file(self, extension=\"\"):\n        partition = self.get_task_partition()\n        if partition is None:\n            raise RuntimeError(\"%r not set\" % (self.TASK_PARTITION,))\n        base = self.job_conf.get(\"mapreduce.output.basename\", \"part\")\n        return \"%s/%s-%s-%05d%s\" % (\n            self.get_work_path(), base, self.task_type, partition, extension\n        )\n\n\nclass Factory(api.Factory):\n\n    def __init__(self, mapper_class,\n                 reducer_class=None,\n                 combiner_class=None,\n                 partitioner_class=None,\n                 record_writer_class=None,\n                 record_reader_class=None):\n        self.mclass = mapper_class\n        self.rclass = reducer_class\n        self.cclass = combiner_class\n        self.pclass = partitioner_class\n        self.rwclass = record_writer_class\n        self.rrclass = record_reader_class\n\n    def create_mapper(self, context):\n        return self.mclass(context)\n\n    def create_reducer(self, context):\n        return None if not self.rclass else self.rclass(context)\n\n    def create_combiner(self, context):\n        return None if not self.cclass else self.cclass(context)\n\n    def create_partitioner(self, context):\n        return None if not self.pclass else self.pclass(context)\n\n    def create_record_reader(self, context):\n        return None if not self.rrclass else self.rrclass(context)\n\n    def create_record_writer(self, context):\n        return None if not self.rwclass else self.rwclass(context)\n\n\ndef _run(context, **kwargs):\n    with connections.get_connection(context, **kwargs) as connection:\n        for _ in connection.downlink:\n            pass\n\n\ndef run_task(factory, **kwargs):\n    \"\"\"\\\n    Run a MapReduce task.\n\n    Available keyword arguments:\n\n    * ``raw_keys`` (default: :obj:`False`): pass map input keys to context\n      as byte strings (ignore any type information)\n    * ``raw_values`` (default: :obj:`False`): pass map input values to context\n      as byte strings (ignore any type information)\n    * ``private_encoding`` (default: :obj:`True`): automatically serialize map\n      output k/v and deserialize reduce input k/v (pickle)\n    * ``auto_serialize`` (default: :obj:`True`): automatically serialize reduce\n      output (map output in map-only jobs) k/v (call str/unicode then encode as\n      utf-8)\n\n    Advanced keyword arguments:\n\n    * ``pstats_dir``: run the task with cProfile and store stats in this dir\n    * ``pstats_fmt``: use this pattern for pstats filenames (experts only)\n\n    The pstats dir and filename pattern can also be provided via ``pydoop\n    submit`` arguments, with lower precedence in case of clashes.\n    \"\"\"\n    context = TaskContext(factory, **kwargs)\n    pstats_dir = kwargs.get(\"pstats_dir\", os.getenv(PSTATS_DIR))\n    if pstats_dir:\n        import cProfile\n        import tempfile\n        import pydoop.hdfs as hdfs\n        hdfs.mkdir(pstats_dir)\n        fd, pstats_fn = tempfile.mkstemp(suffix=\".pstats\")\n        os.close(fd)\n        cProfile.runctx(\n            \"_run(context, **kwargs)\", globals(), locals(),\n            filename=pstats_fn\n        )\n        pstats_fmt = kwargs.get(\n            \"pstats_fmt\",\n            os.getenv(PSTATS_FMT, DEFAULT_PSTATS_FMT)\n        )\n        name = pstats_fmt % (\n            context.task_type,\n            context.get_task_partition(),\n            os.path.basename(pstats_fn)\n        )\n        hdfs.put(pstats_fn, hdfs.path.join(pstats_dir, name))\n    else:\n        _run(context, **kwargs)\n"
  },
  {
    "path": "pydoop/test_support.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nMiscellaneous utilities for testing.\n\"\"\"\n\nfrom __future__ import print_function\n\nimport sys\nimport os\nimport tempfile\n\nfrom pydoop.hdfs import default_is_local\nfrom pydoop.utils.py3compat import iteritems\n\n\ndef __inject_pos(code, start=0):\n    pos = code.find(\"import\", start)\n    if pos < 0:\n        return start\n    pos = code.rfind(os.linesep, 0, pos) + 1\n    endpos = code.find(os.linesep, pos) + 1\n    if \"__future__\" in code[pos:endpos]:\n        return __inject_pos(code, endpos)\n    else:\n        return pos\n\n\ndef inject_code(new_code, target_code):\n    \"\"\"\n    Inject new_code into target_code, before the first non-future import.\n\n    NOTE: this is just a hack to make examples work out-of-the-box, in\n    the general case it can fail in several ways.\n    \"\"\"\n    new_code = \"{0}#--AUTO-INJECTED--{0}{1}{0}#-----------------{0}\".format(\n        os.linesep, os.linesep.join(new_code.strip().splitlines())\n    )\n    pos = __inject_pos(target_code)\n    return target_code[:pos] + new_code + target_code[pos:]\n\n\ndef add_sys_path(target_code):\n    new_code = os.linesep.join([\n        \"import sys\",\n        \"sys.path = %r\" % (sys.path,)\n    ])\n    return inject_code(new_code, target_code)\n\n\ndef set_python_cmd(code, python_cmd=sys.executable):\n    python_cmd = python_cmd.strip()\n    if not python_cmd.startswith(os.sep):\n        python_cmd = os.path.join(\"\", \"usr\", \"bin\", \"env\", python_cmd)\n    if code.startswith(\"#!\"):\n        pos = code.find(os.linesep, 2)\n        code = \"\" if pos < 0 else code[pos + 1:]\n    return \"#!%s%s%s\" % (python_cmd, os.linesep, code)\n\n\ndef adapt_script(code, python_cmd=sys.executable):\n    return set_python_cmd(add_sys_path(code), python_cmd=python_cmd)\n\n\ndef parse_mr_output(output, vtype=str):\n    d = {}\n    for line in output.splitlines():\n        if line.isspace():\n            continue\n        try:\n            k, v = line.split()\n            v = vtype(v)\n        except (ValueError, TypeError):\n            raise ValueError(\"bad output format\")\n        if k in d:\n            raise ValueError(\"duplicate key: %r\" % (k,))\n        d[k] = v\n    return d\n\n\ndef compare_counts(c1, c2):\n    if len(c1) != len(c2):\n        print(len(c1), len(c2))\n        return \"number of keys differs\"\n    keys = sorted(c1)\n    if sorted(c2) != keys:\n        return \"key lists are different\"\n    for k in keys:\n        if c1[k] != c2[k]:\n            return \"values are different for key %r (%r != %r)\" % (\n                k, c1[k], c2[k]\n            )\n\n\nclass LocalWordCount(object):\n\n    def __init__(self, input_path, min_occurrence=0, stop_words=None):\n        self.input_path = input_path\n        self.min_occurrence = min_occurrence\n        self.stop_words = frozenset(stop_words or [])\n        self.__expected_output = None\n\n    @property\n    def expected_output(self):\n        if self.__expected_output is None:\n            self.__expected_output = self.run()\n        return self.__expected_output\n\n    def run(self):\n        wc = {}\n        if os.path.isdir(self.input_path):\n            for fn in os.listdir(self.input_path):\n                if fn[0] == \".\":\n                    continue\n                self._wordcount_file(wc, fn, self.input_path)\n        else:\n            self._wordcount_file(wc, self.input_path)\n        if self.min_occurrence:\n            wc = dict(t for t in iteritems(wc) if t[1] >= self.min_occurrence)\n        return wc\n\n    def _wordcount_file(self, wc, fn, path=None):\n        with open(os.path.join(path, fn) if path else fn) as f:\n            for line in f:\n                for w in line.split():\n                    if w not in self.stop_words:\n                        wc[w] = wc.get(w, 0) + 1\n\n    def check(self, output):\n        res = compare_counts(\n            parse_mr_output(output, vtype=int), self.expected_output\n        )\n        if res:\n            return \"ERROR: %s\" % res\n        else:\n            return \"OK.\"\n\n\ndef get_wd_prefix(base=\"pydoop_\"):\n    if default_is_local():\n        return os.path.join(tempfile.gettempdir(), \"pydoop_\")\n    else:\n        return base\n"
  },
  {
    "path": "pydoop/test_utils.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nUtilities for unit tests.\n\"\"\"\n\nimport sys\nimport os\nimport random\nimport uuid\nimport tempfile\nimport imp\nimport unittest\nimport shutil\nimport warnings\nimport subprocess\n\nimport pydoop\nimport pydoop.utils.jvm as jvm\nfrom pydoop.utils.py3compat import StringIO\n\nJAVA_HOME = jvm.get_java_home()\nJAVA = os.path.join(JAVA_HOME, \"bin\", \"java\")\nJAVAC = os.path.join(JAVA_HOME, \"bin\", \"javac\")\n\n_RANDOM_DATA_SIZE = 32\n# Default NameNode RPC port. 8020 for all versions except 3.0.0. See\n# https://issues.apache.org/jira/browse/HDFS-12990\n_DEFAULT_HDFS_PORT = 8020\n_DEFAULT_BYTES_PER_CHECKSUM = 512\n\n\ndef _get_special_chr():\n    \"\"\"\n    This is used to check unicode support.  On some systems, depending\n    on locale settings, we won't be able to use non-ASCII characters\n    when interacting with system calls.  Since in such cases it\n    doesn't really make sense to run these tests we set UNI_CHR to a\n    regular ASCII character.\n    \"\"\"\n    # something outside the latin-1 range\n    the_chr = u'\\N{CYRILLIC CAPITAL LETTER O WITH DIAERESIS}'\n    fd = None\n    fname = None\n    try:\n        fd, fname = tempfile.mkstemp(suffix=the_chr)\n    except UnicodeEncodeError:\n        msg = (\"local file system doesn't support unicode characters\"\n               \"in filenames, falling back to ASCII-only\")\n        warnings.warn(msg, UnicodeWarning)\n        the_chr = u's'\n    finally:\n        if fd:\n            os.close(fd)\n            os.remove(fname)\n    return the_chr\n\n\nUNI_CHR = _get_special_chr()\n\n_FD_MAP = {\n    \"stdout\": sys.stdout.fileno(),\n    \"stderr\": sys.stderr.fileno(),\n}\n\n\nclass FSTree(object):\n    \"\"\"\n  >>> t = FSTree('root')\n  >>> d1 = t.add('d1')\n  >>> f1 = t.add('f1', 0)\n  >>> d2 = d1.add('d2')\n  >>> f2 = d2.add('f2', 0)\n  >>> for x in t.walk(): print x.name, x.kind\n  ...\n  root 1\n  d1 1\n  d2 1\n  f2 0\n  f1 0\n  \"\"\"\n\n    def __init__(self, name, kind=1):\n        assert kind in (0, 1)  # (file, dir)\n        self.name = name\n        self.kind = kind\n        if self.kind:\n            self.children = []\n\n    def add(self, name, kind=1):\n        t = FSTree(name, kind)\n        self.children.append(t)\n        return t\n\n    def walk(self):\n        yield self\n        if self.kind:\n            for c in self.children:\n                for t in c.walk():\n                    yield t\n\n\ndef make_wd(fs, prefix=\"pydoop_test_\"):\n    if fs.host:\n        wd = \"%s%s\" % (prefix, uuid.uuid4().hex)\n        fs.create_directory(wd)\n        return fs.get_path_info(wd)['name']\n    else:\n        return tempfile.mkdtemp(prefix=prefix)\n\n\ndef make_random_data(size=_RANDOM_DATA_SIZE, printable=True):\n    randint = random.randint\n    start, stop = (32, 126) if printable else (0, 255)\n    return bytes(bytearray([randint(start, stop) for _ in range(size)]))\n\n\ndef get_bytes_per_checksum():\n    params = pydoop.hadoop_params()\n    return int(params.get('dfs.bytes-per-checksum',\n                          params.get('io.bytes.per.checksum',\n                                     _DEFAULT_BYTES_PER_CHECKSUM)))\n\n\ndef silent_call(func, *args, **kwargs):\n    with open(os.devnull, \"w\") as dev_null:\n        cache = {}\n        for s in \"stdout\", \"stderr\":\n            cache[s] = os.dup(_FD_MAP[s])\n            os.dup2(dev_null.fileno(), _FD_MAP[s])\n        try:\n            ret = func(*args, **kwargs)\n        finally:\n            for s in \"stdout\", \"stderr\":\n                os.dup2(cache[s], _FD_MAP[s])\n    return ret\n\n\ndef get_module(name, path=None):\n\n    fp, pathname, description = imp.find_module(name, path)\n    try:\n        module = imp.load_module(name, fp, pathname, description)\n        return module\n    finally:\n        fp.close()\n\n\ndef compile_java(java_file, classpath, opts=None):\n    if opts is None:\n        opts = []\n    java_class_file = os.path.splitext(\n        os.path.realpath(java_file)\n    )[0] + '.class'\n    if (not os.path.exists(java_class_file) or\n            os.path.getmtime(java_file) > os.path.getmtime(java_class_file)):\n        cmd = [JAVAC] + opts\n        if not {\"-cp\", \"-classpath\"}.intersection(opts):\n            cmd.extend([\"-cp\", classpath])\n        cmd.append(java_file)\n        try:\n            subprocess.check_call(cmd, cwd=os.path.dirname(java_file))\n        except subprocess.CalledProcessError as e:\n            raise RuntimeError(\"Error compiling Java file %s\\n%s\" % (\n                java_file, e))\n\n\ndef run_java(jclass, classpath, args, wd):\n    try:\n        subprocess.check_call([JAVA, '-cp', classpath, jclass] + args, cwd=wd)\n    except subprocess.CalledProcessError as e:\n        raise RuntimeError(\"Error running Java class %s\\n%s\" % (\n            jclass, e))\n\n\ndef get_java_output_stream(jclass, classpath, args, wd):\n    output = subprocess.check_output(\n        [JAVA, '-cp', classpath, jclass] + args,\n        cwd=wd, stderr=open('/dev/null', 'w'))\n    return StringIO(output)\n\n\nclass WDTestCase(unittest.TestCase):\n\n    def setUp(self):\n        self.wd = tempfile.mkdtemp(prefix='pydoop_test_')\n\n    def tearDown(self):\n        shutil.rmtree(self.wd)\n\n    def _mkfn(self, basename):\n        return os.path.join(self.wd, basename)\n\n    def _mkf(self, basename, mode='w'):\n        return open(self._mkfn(basename), mode)\n"
  },
  {
    "path": "pydoop/utils/__init__.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\\\nGeneral purpose utilities.\n\"\"\"\n\n__all__ = [\n    'NullHandler',\n    'NullLogger',\n    'make_random_str',\n]\n\nfrom .misc import NullHandler, NullLogger, make_random_str\n"
  },
  {
    "path": "pydoop/utils/conversion_tables.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n# up-to-date as of Hadoop 2.7.1: http://hadoop.apache.org/docs/r2.7.1/\n#   hadoop-project-dist/hadoop-common/DeprecatedProperties.html\nmrv1_to_mrv2 = {\n    'create.empty.dir.if.nonexist':\n    'mapreduce.jobcontrol.createdir.ifnotexist',\n\n    'dfs.access.time.precision':\n    'dfs.namenode.accesstime.precision',\n\n    'dfs.backup.address':\n    'dfs.namenode.backup.address',\n\n    'dfs.backup.http.address':\n    'dfs.namenode.backup.http-address',\n\n    'dfs.balance.bandwidthPerSec':\n    'dfs.datanode.balance.bandwidthPerSec',\n\n    'dfs.block.size':\n    'dfs.blocksize',\n\n    'dfs.data.dir':\n    'dfs.datanode.data.dir',\n\n    'dfs.datanode.max.xcievers':\n    'dfs.datanode.max.transfer.threads',\n\n    'dfs.df.interval':\n    'fs.df.interval',\n\n    'dfs.federation.nameservice.id':\n    'dfs.nameservice.id',\n\n    'dfs.federation.nameservices':\n    'dfs.nameservices',\n\n    'dfs.http.address':\n    'dfs.namenode.http-address',\n\n    'dfs.https.address':\n    'dfs.namenode.https-address',\n\n    'dfs.https.client.keystore.resource':\n    'dfs.client.https.keystore.resource',\n\n    'dfs.https.need.client.auth':\n    'dfs.client.https.need-auth',\n\n    'dfs.max.objects':\n    'dfs.namenode.max.objects',\n\n    'dfs.max-repl-streams':\n    'dfs.namenode.replication.max-streams',\n\n    'dfs.name.dir':\n    'dfs.namenode.name.dir',\n\n    'dfs.name.dir.restore':\n    'dfs.namenode.name.dir.restore',\n\n    'dfs.name.edits.dir':\n    'dfs.namenode.edits.dir',\n\n    'dfs.permissions':\n    'dfs.permissions.enabled',\n\n    'dfs.permissions.supergroup':\n    'dfs.permissions.superusergroup',\n\n    'dfs.read.prefetch.size':\n    'dfs.client.read.prefetch.size',\n\n    'dfs.replication.considerLoad':\n    'dfs.namenode.replication.considerLoad',\n\n    'dfs.replication.interval':\n    'dfs.namenode.replication.interval',\n\n    'dfs.replication.min':\n    'dfs.namenode.replication.min',\n\n    'dfs.replication.pending.timeout.sec':\n    'dfs.namenode.replication.pending.timeout-sec',\n\n    'dfs.safemode.extension':\n    'dfs.namenode.safemode.extension',\n\n    'dfs.safemode.threshold.pct':\n    'dfs.namenode.safemode.threshold-pct',\n\n    'dfs.secondary.http.address':\n    'dfs.namenode.secondary.http-address',\n\n    'dfs.socket.timeout':\n    'dfs.client.socket-timeout',\n\n    'dfs.umaskmode':\n    'fs.permissions.umask-mode',\n\n    'dfs.write.packet.size':\n    'dfs.client-write-packet-size',\n\n    'fs.checkpoint.dir':\n    'dfs.namenode.checkpoint.dir',\n\n    'fs.checkpoint.edits.dir':\n    'dfs.namenode.checkpoint.edits.dir',\n\n    'fs.checkpoint.period':\n    'dfs.namenode.checkpoint.period',\n\n    'fs.default.name':\n    'fs.defaultFS',\n\n    'hadoop.configured.node.mapping':\n    'net.topology.configured.node.mapping',\n\n    'hadoop.job.history.location':\n    'mapreduce.jobtracker.jobhistory.location',\n\n    'hadoop.native.lib':\n    'io.native.lib.available',\n\n    'hadoop.net.static.resolutions':\n    'mapreduce.tasktracker.net.static.resolutions',\n\n    'hadoop.pipes.command-file.keep':\n    'mapreduce.pipes.commandfile.preserve',\n\n    'hadoop.pipes.executable.interpretor':\n    'mapreduce.pipes.executable.interpretor',\n\n    'hadoop.pipes.executable':\n    'mapreduce.pipes.executable',\n\n    'hadoop.pipes.java.mapper':\n    'mapreduce.pipes.isjavamapper',\n\n    'hadoop.pipes.java.recordreader':\n    'mapreduce.pipes.isjavarecordreader',\n\n    'hadoop.pipes.java.recordwriter':\n    'mapreduce.pipes.isjavarecordwriter',\n\n    'hadoop.pipes.java.reducer':\n    'mapreduce.pipes.isjavareducer',\n\n    'hadoop.pipes.partitioner':\n    'mapreduce.pipes.partitioner',\n\n    'heartbeat.recheck.interval':\n    'dfs.namenode.heartbeat.recheck-interval',\n\n    'io.bytes.per.checksum':\n    'dfs.bytes-per-checksum',\n\n    'io.sort.factor':\n    'mapreduce.task.io.sort.factor',\n\n    'io.sort.mb':\n    'mapreduce.task.io.sort.mb',\n\n    'io.sort.spill.percent':\n    'mapreduce.map.sort.spill.percent',\n\n    'jobclient.completion.poll.interval':\n    'mapreduce.client.completion.pollinterval',\n\n    'jobclient.output.filter':\n    'mapreduce.client.output.filter',\n\n    'jobclient.progress.monitor.poll.interval':\n    'mapreduce.client.progressmonitor.pollinterval',\n\n    'job.end.notification.url':\n    'mapreduce.job.end-notification.url',\n\n    'job.end.retry.attempts':\n    'mapreduce.job.end-notification.retry.attempts',\n\n    'job.end.retry.interval':\n    'mapreduce.job.end-notification.retry.interval',\n\n    'job.local.dir':\n    'mapreduce.job.local.dir',\n\n    'keep.failed.task.files':\n    'mapreduce.task.files.preserve.failedtasks',\n\n    'keep.task.files.pattern':\n    'mapreduce.task.files.preserve.filepattern',\n\n    'key.value.separator.in.input.line':\n    'mapreduce.input.keyvaluelinerecordreader.key.value.separator',\n\n    'local.cache.size':\n    'mapreduce.tasktracker.cache.local.size',\n\n    'map.input.file':\n    'mapreduce.map.input.file',\n\n    'map.input.length':\n    'mapreduce.map.input.length',\n\n    'map.input.start':\n    'mapreduce.map.input.start',\n\n    'map.output.key.field.separator':\n    'mapreduce.map.output.key.field.separator',\n\n    'map.output.key.value.fields.spec':\n    'mapreduce.fieldsel.map.output.key.value.fields.spec',\n\n    'mapred.acls.enabled':\n    'mapreduce.cluster.acls.enabled',\n\n    'mapred.binary.partitioner.left.offset':\n    'mapreduce.partition.binarypartitioner.left.offset',\n\n    'mapred.binary.partitioner.right.offset':\n    'mapreduce.partition.binarypartitioner.right.offset',\n\n    'mapred.cache.archives':\n    'mapreduce.job.cache.archives',\n\n    'mapred.cache.archives.timestamps':\n    'mapreduce.job.cache.archives.timestamps',\n\n    'mapred.cache.files':\n    'mapreduce.job.cache.files',\n\n    'mapred.cache.files.timestamps':\n    'mapreduce.job.cache.files.timestamps',\n\n    'mapred.cache.localArchives':\n    'mapreduce.job.cache.local.archives',\n\n    'mapred.cache.localFiles':\n    'mapreduce.job.cache.local.files',\n\n    'mapred.child.tmp':\n    'mapreduce.task.tmp.dir',\n\n    'mapred.cluster.average.blacklist.threshold':\n    'mapreduce.jobtracker.blacklist.average.threshold',\n\n    'mapred.cluster.map.memory.mb':\n    'mapreduce.cluster.mapmemory.mb',\n\n    'mapred.cluster.max.map.memory.mb':\n    'mapreduce.jobtracker.maxmapmemory.mb',\n\n    'mapred.cluster.max.reduce.memory.mb':\n    'mapreduce.jobtracker.maxreducememory.mb',\n\n    'mapred.cluster.reduce.memory.mb':\n    'mapreduce.cluster.reducememory.mb',\n\n    'mapred.committer.job.setup.cleanup.needed':\n    'mapreduce.job.committer.setup.cleanup.needed',\n\n    'mapred.compress.map.output':\n    'mapreduce.map.output.compress',\n\n    'mapred.data.field.separator':\n    'mapreduce.fieldsel.data.field.separator',\n\n    'mapred.debug.out.lines':\n    'mapreduce.task.debugout.lines',\n\n    'mapred.healthChecker.interval':\n    'mapreduce.tasktracker.healthchecker.interval',\n\n    'mapred.healthChecker.script.args':\n    'mapreduce.tasktracker.healthchecker.script.args',\n\n    'mapred.healthChecker.script.path':\n    'mapreduce.tasktracker.healthchecker.script.path',\n\n    'mapred.healthChecker.script.timeout':\n    'mapreduce.tasktracker.healthchecker.script.timeout',\n\n    'mapred.heartbeats.in.second':\n    'mapreduce.jobtracker.heartbeats.in.second',\n\n    'mapred.hosts.exclude':\n    'mapreduce.jobtracker.hosts.exclude.filename',\n\n    'mapred.hosts':\n    'mapreduce.jobtracker.hosts.filename',\n\n    'mapred.inmem.merge.threshold':\n    'mapreduce.reduce.merge.inmem.threshold',\n\n    'mapred.input.dir.formats':\n    'mapreduce.input.multipleinputs.dir.formats',\n\n    'mapred.input.dir.mappers':\n    'mapreduce.input.multipleinputs.dir.mappers',\n\n    'mapred.input.dir':\n    'mapreduce.input.fileinputformat.inputdir',\n\n    'mapred.input.pathFilter.class':\n    'mapreduce.input.pathFilter.class',\n\n    'mapred.jar':\n    'mapreduce.job.jar',\n\n    'mapred.job.classpath.archives':\n    'mapreduce.job.classpath.archives',\n\n    'mapred.job.classpath.files':\n    'mapreduce.job.classpath.files',\n\n    'mapred.job.id':\n    'mapreduce.job.id',\n\n    'mapred.jobinit.threads':\n    'mapreduce.jobtracker.jobinit.threads',\n\n    'mapred.job.map.memory.mb':\n    'mapreduce.map.memory.mb',\n\n    'mapred.job.name':\n    'mapreduce.job.name',\n\n    'mapred.job.priority':\n    'mapreduce.job.priority',\n\n    'mapred.job.queue.name':\n    'mapreduce.job.queuename',\n\n    'mapred.job.reduce.input.buffer.percent':\n    'mapreduce.reduce.input.buffer.percent',\n\n    'mapred.job.reduce.markreset.buffer.percent':\n    'mapreduce.reduce.markreset.buffer.percent',\n\n    'mapred.job.reduce.memory.mb':\n    'mapreduce.reduce.memory.mb',\n\n    'mapred.job.reduce.total.mem.bytes':\n    'mapreduce.reduce.memory.totalbytes',\n\n    'mapred.job.reuse.jvm.num.tasks':\n    'mapreduce.job.jvm.numtasks',\n\n    'mapred.job.shuffle.input.buffer.percent':\n    'mapreduce.reduce.shuffle.input.buffer.percent',\n\n    'mapred.job.shuffle.merge.percent':\n    'mapreduce.reduce.shuffle.merge.percent',\n\n    'mapred.job.tracker.handler.count':\n    'mapreduce.jobtracker.handler.count',\n\n    'mapred.job.tracker.history.completed.location':\n    'mapreduce.jobtracker.jobhistory.completed.location',\n\n    'mapred.job.tracker.http.address':\n    'mapreduce.jobtracker.http.address',\n\n    'mapred.jobtracker.instrumentation':\n    'mapreduce.jobtracker.instrumentation',\n\n    'mapred.jobtracker.job.history.block.size':\n    'mapreduce.jobtracker.jobhistory.block.size',\n\n    'mapred.job.tracker.jobhistory.lru.cache.size':\n    'mapreduce.jobtracker.jobhistory.lru.cache.size',\n\n    'mapred.job.tracker':\n    'mapreduce.jobtracker.address',\n\n    'mapred.jobtracker.maxtasks.per.job':\n    'mapreduce.jobtracker.maxtasks.perjob',\n\n    'mapred.job.tracker.persist.jobstatus.active':\n    'mapreduce.jobtracker.persist.jobstatus.active',\n\n    'mapred.job.tracker.persist.jobstatus.dir':\n    'mapreduce.jobtracker.persist.jobstatus.dir',\n\n    'mapred.job.tracker.persist.jobstatus.hours':\n    'mapreduce.jobtracker.persist.jobstatus.hours',\n\n    'mapred.jobtracker.restart.recover':\n    'mapreduce.jobtracker.restart.recover',\n\n    'mapred.job.tracker.retiredjobs.cache.size':\n    'mapreduce.jobtracker.retiredjobs.cache.size',\n\n    'mapred.job.tracker.retire.jobs':\n    'mapreduce.jobtracker.retirejobs',\n\n    'mapred.jobtracker.taskalloc.capacitypad':\n    'mapreduce.jobtracker.taskscheduler.taskalloc.capacitypad',\n\n    'mapred.jobtracker.taskScheduler':\n    'mapreduce.jobtracker.taskscheduler',\n\n    'mapred.jobtracker.taskScheduler.maxRunningTasksPerJob':\n    'mapreduce.jobtracker.taskscheduler.maxrunningtasks.perjob',\n\n    'mapred.join.expr':\n    'mapreduce.join.expr',\n\n    'mapred.join.keycomparator':\n    'mapreduce.join.keycomparator',\n\n    'mapred.lazy.output.format':\n    'mapreduce.output.lazyoutputformat.outputformat',\n\n    'mapred.line.input.format.linespermap':\n    'mapreduce.input.lineinputformat.linespermap',\n\n    'mapred.linerecordreader.maxlength':\n    'mapreduce.input.linerecordreader.line.maxlength',\n\n    'mapred.local.dir':\n    'mapreduce.cluster.local.dir',\n\n    'mapred.local.dir.minspacekill':\n    'mapreduce.tasktracker.local.dir.minspacekill',\n\n    'mapred.local.dir.minspacestart':\n    'mapreduce.tasktracker.local.dir.minspacestart',\n\n    'mapred.map.child.env':\n    'mapreduce.map.env',\n\n    'mapred.map.child.java.opts':\n    'mapreduce.map.java.opts',\n\n    'mapred.map.child.log.level':\n    'mapreduce.map.log.level',\n\n    'mapred.map.max.attempts':\n    'mapreduce.map.maxattempts',\n\n    'mapred.map.output.compression.codec':\n    'mapreduce.map.output.compress.codec',\n\n    'mapred.mapoutput.key.class':\n    'mapreduce.map.output.key.class',\n\n    'mapred.mapoutput.value.class':\n    'mapreduce.map.output.value.class',\n\n    'mapred.mapper.regex.group':\n    'mapreduce.mapper.regexmapper..group',\n\n    'mapred.mapper.regex':\n    'mapreduce.mapper.regex',\n\n    'mapred.map.task.debug.script':\n    'mapreduce.map.debug.script',\n\n    'mapred.map.tasks':\n    'mapreduce.job.maps',\n\n    'mapred.map.tasks.speculative.execution':\n    'mapreduce.map.speculative',\n\n    'mapred.max.map.failures.percent':\n    'mapreduce.map.failures.maxpercent',\n\n    'mapred.max.reduce.failures.percent':\n    'mapreduce.reduce.failures.maxpercent',\n\n    'mapred.max.split.size':\n    'mapreduce.input.fileinputformat.split.maxsize',\n\n    'mapred.max.tracker.blacklists':\n    'mapreduce.jobtracker.tasktracker.maxblacklists',\n\n    'mapred.max.tracker.failures':\n    'mapreduce.job.maxtaskfailures.per.tracker',\n\n    'mapred.merge.recordsBeforeProgress':\n    'mapreduce.task.merge.progress.records',\n\n    'mapred.min.split.size':\n    'mapreduce.input.fileinputformat.split.minsize',\n\n    'mapred.min.split.size.per.node':\n    'mapreduce.input.fileinputformat.split.minsize.per.node',\n\n    'mapred.min.split.size.per.rack':\n    'mapreduce.input.fileinputformat.split.minsize.per.rack',\n\n    'mapred.output.compression.codec':\n    'mapreduce.output.fileoutputformat.compress.codec',\n\n    'mapred.output.compression.type':\n    'mapreduce.output.fileoutputformat.compress.type',\n\n    'mapred.output.compress':\n    'mapreduce.output.fileoutputformat.compress',\n\n    'mapred.output.dir':\n    'mapreduce.output.fileoutputformat.outputdir',\n\n    'mapred.output.key.class':\n    'mapreduce.job.output.key.class',\n\n    'mapred.output.key.comparator.class':\n    'mapreduce.job.output.key.comparator.class',\n\n    'mapred.output.value.class':\n    'mapreduce.job.output.value.class',\n\n    'mapred.output.value.groupfn.class':\n    'mapreduce.job.output.group.comparator.class',\n\n    'mapred.permissions.supergroup':\n    'mapreduce.cluster.permissions.supergroup',\n\n    'mapred.pipes.user.inputformat':\n    'mapreduce.pipes.inputformat',\n\n    'mapred.reduce.child.env':\n    'mapreduce.reduce.env',\n\n    'mapred.reduce.child.java.opts':\n    'mapreduce.reduce.java.opts',\n\n    'mapred.reduce.child.log.level':\n    'mapreduce.reduce.log.level',\n\n    'mapred.reduce.max.attempts':\n    'mapreduce.reduce.maxattempts',\n\n    'mapred.reduce.parallel.copies':\n    'mapreduce.reduce.shuffle.parallelcopies',\n\n    'mapred.reduce.slowstart.completed.maps':\n    'mapreduce.job.reduce.slowstart.completedmaps',\n\n    'mapred.reduce.task.debug.script':\n    'mapreduce.reduce.debug.script',\n\n    'mapred.reduce.tasks':\n    'mapreduce.job.reduces',\n\n    'mapred.reduce.tasks.speculative.execution':\n    'mapreduce.reduce.speculative',\n\n    'mapred.seqbinary.output.key.class':\n    'mapreduce.output.seqbinaryoutputformat.key.class',\n\n    'mapred.seqbinary.output.value.class':\n    'mapreduce.output.seqbinaryoutputformat.value.class',\n\n    'mapred.shuffle.connect.timeout':\n    'mapreduce.reduce.shuffle.connect.timeout',\n\n    'mapred.shuffle.read.timeout':\n    'mapreduce.reduce.shuffle.read.timeout',\n\n    'mapred.skip.attempts.to.start.skipping':\n    'mapreduce.task.skip.start.attempts',\n\n    'mapred.skip.map.auto.incr.proc.count':\n    'mapreduce.map.skip.proc-count.auto-incr',\n\n    'mapred.skip.map.max.skip.records':\n    'mapreduce.map.skip.maxrecords',\n\n    'mapred.skip.on':\n    'mapreduce.job.skiprecords',\n\n    'mapred.skip.out.dir':\n    'mapreduce.job.skip.outdir',\n\n    'mapred.skip.reduce.auto.incr.proc.count':\n    'mapreduce.reduce.skip.proc-count.auto-incr',\n\n    'mapred.skip.reduce.max.skip.groups':\n    'mapreduce.reduce.skip.maxgroups',\n\n    'mapred.speculative.execution.slowNodeThreshold':\n    'mapreduce.job.speculative.slownodethreshold',\n\n    'mapred.speculative.execution.slowTaskThreshold':\n    'mapreduce.job.speculative.slowtaskthreshold',\n\n    'mapred.speculative.execution.speculativeCap':\n    'mapreduce.job.speculative.speculativecap',\n\n    'mapred.submit.replication':\n    'mapreduce.client.submit.file.replication',\n\n    'mapred.system.dir':\n    'mapreduce.jobtracker.system.dir',\n\n    'mapred.task.cache.levels':\n    'mapreduce.jobtracker.taskcache.levels',\n\n    'mapred.task.id':\n    'mapreduce.task.attempt.id',\n\n    'mapred.task.is.map':\n    'mapreduce.task.ismap',\n\n    'mapred.task.partition':\n    'mapreduce.task.partition',\n\n    'mapred.task.profile':\n    'mapreduce.task.profile',\n\n    'mapred.task.profile.maps':\n    'mapreduce.task.profile.maps',\n\n    'mapred.task.profile.params':\n    'mapreduce.task.profile.params',\n\n    'mapred.task.profile.reduces':\n    'mapreduce.task.profile.reduces',\n\n    'mapred.task.timeout':\n    'mapreduce.task.timeout',\n\n    'mapred.tasktracker.dns.interface':\n    'mapreduce.tasktracker.dns.interface',\n\n    'mapred.tasktracker.dns.nameserver':\n    'mapreduce.tasktracker.dns.nameserver',\n\n    'mapred.tasktracker.events.batchsize':\n    'mapreduce.tasktracker.events.batchsize',\n\n    'mapred.tasktracker.expiry.interval':\n    'mapreduce.jobtracker.expire.trackers.interval',\n\n    'mapred.task.tracker.http.address':\n    'mapreduce.tasktracker.http.address',\n\n    'mapred.tasktracker.indexcache.mb':\n    'mapreduce.tasktracker.indexcache.mb',\n\n    'mapred.tasktracker.instrumentation':\n    'mapreduce.tasktracker.instrumentation',\n\n    'mapred.tasktracker.map.tasks.maximum':\n    'mapreduce.tasktracker.map.tasks.maximum',\n\n    'mapred.tasktracker.memory_calculator_plugin':\n    'mapreduce.tasktracker.resourcecalculatorplugin',\n\n    'mapred.tasktracker.memorycalculatorplugin':\n    'mapreduce.tasktracker.resourcecalculatorplugin',\n\n    'mapred.tasktracker.reduce.tasks.maximum':\n    'mapreduce.tasktracker.reduce.tasks.maximum',\n\n    'mapred.task.tracker.report.address':\n    'mapreduce.tasktracker.report.address',\n\n    'mapred.task.tracker.task-controller':\n    'mapreduce.tasktracker.taskcontroller',\n\n    'mapred.tasktracker.taskmemorymanager.monitoring-interval':\n    'mapreduce.tasktracker.taskmemorymanager.monitoringinterval',\n\n    'mapred.tasktracker.tasks.sleeptime-before-sigkill':\n    'mapreduce.tasktracker.tasks.sleeptimebeforesigkill',\n\n    'mapred.temp.dir':\n    'mapreduce.cluster.temp.dir',\n\n    'mapred.text.key.comparator.options':\n    'mapreduce.partition.keycomparator.options',\n\n    'mapred.text.key.partitioner.options':\n    'mapreduce.partition.keypartitioner.options',\n\n    'mapred.textoutputformat.separator':\n    'mapreduce.output.textoutputformat.separator',\n\n    'mapred.tip.id':\n    'mapreduce.task.id',\n\n    'mapreduce.combine.class':\n    'mapreduce.job.combine.class',\n\n    'mapreduce.inputformat.class':\n    'mapreduce.job.inputformat.class',\n\n    'mapreduce.job.counters.limit':\n    'mapreduce.job.counters.max',\n\n    'mapreduce.jobtracker.permissions.supergroup':\n    'mapreduce.cluster.permissions.supergroup',\n\n    'mapreduce.map.class':\n    'mapreduce.job.map.class',\n\n    'mapreduce.outputformat.class':\n    'mapreduce.job.outputformat.class',\n\n    'mapreduce.partitioner.class':\n    'mapreduce.job.partitioner.class',\n\n    'mapreduce.reduce.class':\n    'mapreduce.job.reduce.class',\n\n    'mapred.used.genericoptionsparser':\n    'mapreduce.client.genericoptionsparser.used',\n\n    'mapred.userlog.limit.kb':\n    'mapreduce.task.userlog.limit.kb',\n\n    'mapred.userlog.retain.hours':\n    'mapreduce.job.userlog.retain.hours',\n\n    'mapred.working.dir':\n    'mapreduce.job.working.dir',\n\n    'mapred.work.output.dir':\n    'mapreduce.task.output.dir',\n\n    'min.num.spills.for.combine':\n    'mapreduce.map.combine.minspills',\n\n    'reduce.output.key.value.fields.spec':\n    'mapreduce.fieldsel.reduce.output.key.value.fields.spec',\n\n    'security.job.submission.protocol.acl':\n    'security.job.client.protocol.acl',\n\n    'security.task.umbilical.protocol.acl':\n    'security.job.task.protocol.acl',\n\n    'sequencefile.filter.class':\n    'mapreduce.input.sequencefileinputfilter.class',\n\n    'sequencefile.filter.frequency':\n    'mapreduce.input.sequencefileinputfilter.frequency',\n\n    'sequencefile.filter.regex':\n    'mapreduce.input.sequencefileinputfilter.regex',\n\n    'session.id':\n    'dfs.metrics.session-id',\n\n    # duplicate key :-o\n    # 'slave.host.name':\n    # 'dfs.datanode.hostname',\n\n    'slave.host.name':\n    'mapreduce.tasktracker.host.name',\n\n    'tasktracker.contention.tracking':\n    'mapreduce.tasktracker.contention.tracking',\n\n    'tasktracker.http.threads':\n    'mapreduce.tasktracker.http.threads',\n\n    'topology.node.switch.mapping.impl':\n    'net.topology.node.switch.mapping.impl',\n\n    'topology.script.file.name':\n    'net.topology.script.file.name',\n\n    'topology.script.number.args':\n    'net.topology.script.number.args',\n\n    'user.name':\n    'mapreduce.job.user.name',\n\n    'webinterface.private.actions':\n    'mapreduce.jobtracker.webinterface.trusted',\n\n    ('yarn.app.mapreduce.yarn.app.mapreduce.'\n     'client-am.ipc.max-retries-on-timeouts'):\n    'yarn.app.mapreduce.client-am.ipc.max-retries-on-timeouts',\n}\n\nmrv2_to_mrv1 = dict((t[1], t[0]) for t in mrv1_to_mrv2.items())\n"
  },
  {
    "path": "pydoop/utils/jvm.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport os\nimport shutil\nimport string\nimport subprocess\nimport sys\nimport tempfile\nimport fnmatch\n\n\nJPROG = string.Template(\"\"\"\\\npublic class ${classname} {\n  public static void main(String[] args) {\n    System.out.println(System.getProperty(\"java.home\"));\n  }\n}\n\"\"\")\n\n\ndef get_java_home():\n    \"\"\"\\\n    Try getting JAVA_HOME from system properties.\n\n    We are interested in the JDK home, containing include/jni.h, while the\n    java.home property points to the JRE home. If a JDK is installed, however,\n    the two are (usually) related: the JDK home is either the same directory\n    as the JRE home (recent java versions) or its parent (and java.home points\n    to jdk_home/jre).\n    \"\"\"\n    error = RuntimeError(\"java home not found, try setting JAVA_HOME\")\n    try:\n        return os.environ[\"JAVA_HOME\"]\n    except KeyError:\n        wd = tempfile.mkdtemp(prefix='pydoop_')\n        jclass = \"Temp\"\n        jsrc = os.path.join(wd, \"%s.java\" % jclass)\n        with open(jsrc, \"w\") as f:\n            f.write(JPROG.substitute(classname=jclass))\n        try:\n            subprocess.check_call([\"javac\", jsrc])\n            path = subprocess.check_output(\n                [\"java\", \"-cp\", wd, jclass], universal_newlines=True\n            )\n        except (OSError, UnicodeDecodeError, subprocess.CalledProcessError):\n            raise error\n        finally:\n            shutil.rmtree(wd)\n        path = os.path.normpath(path.strip())\n        if os.path.exists(os.path.join(path, \"include\", \"jni.h\")):\n            return path\n        path = os.path.dirname(path)\n        if os.path.exists(os.path.join(path, \"include\", \"jni.h\")):\n            return path\n        raise error\n\n\ndef load_jvm_lib(java_home=None):\n    if not java_home:\n        java_home = get_java_home()\n    jvm_path, jvm_lib = get_jvm_lib_path_and_name(java_home)\n    if jvm_path and jvm_lib:\n        from ctypes import CDLL\n        CDLL(os.path.join(jvm_path, jvm_lib))\n    else:\n        raise ImportError(\"Unable to load the JVM dynamic library\")\n\n\ndef get_include_dirs():\n    java_home = get_java_home()\n    dirs = [os.path.join(java_home, 'include'),\n            os.path.join('native', 'jni_include'),\n            os.path.join(java_home, 'lib')]\n    if sys.platform == 'win32':\n        dirs += [os.path.join(java_home, 'include', 'win32')]\n    elif sys.platform == 'darwin':\n        dirs += [os.path.join(java_home, 'include', 'darwin')]\n    elif sys.platform.startswith('freebsd'):\n        dirs += [os.path.join(java_home, 'include', 'freebsd')]\n    else:  # linux\n        dirs += [os.path.join(java_home, 'include', 'linux')]\n    return dirs\n\n\ndef get_libraries():\n    libraries = []\n    if sys.platform == 'win32':\n        libraries += ['Advapi32']\n    elif sys.platform == 'darwin':\n        libraries += ['dl', 'jvm']\n    elif sys.platform.startswith('freebsd'):\n        libraries += ['jvm']\n    else:  # linux etc.\n        libraries += ['dl', \"jvm\"]\n    return libraries\n\n\ndef get_macros():\n    macros = []\n    if sys.platform == 'win32':\n        macros += [('WIN32', 1)]\n    elif sys.platform == 'darwin':\n        macros += [('MACOSX', 1)]\n    else:  # linux etc.\n        pass\n    return macros\n\n\ndef get_jvm_lib_path_and_name(java_home=None):\n    if not java_home:\n        java_home = get_java_home()\n    jvm_lib_name = None\n    if sys.platform == 'win32':\n        jvm_lib_name = \"jvm.dll\"  # FIXME: check the library name\n    elif sys.platform == 'darwin':\n        jvm_lib_name = \"libjvm.dylib\"\n    else:  # linux\n        jvm_lib_name = \"libjvm.so\"\n    jvm_path = find_file(java_home, jvm_lib_name)\n    return os.path.dirname(jvm_path), jvm_lib_name if jvm_path else None\n\n\ndef check_jni_header(include_dirs=None):\n    for d in include_dirs:\n        if os.path.exists(os.path.join(d, 'jni.h')):\n            found_jni = True\n            break\n    if not found_jni:\n        import warnings\n        warnings.warn('Falling back to provided JNI headers: ' +\n                      'unable to find jni.h in your JAVA_HOME')\n\n\ndef find_file(path, to_find):\n    result = None\n    for element in os.listdir(path):\n        if result:\n            break\n        if fnmatch.fnmatch(element, to_find):\n            fullPath = os.path.join(path, element)\n            result = fullPath\n        if not result and os.path.isdir(os.path.join(path, element)):\n            result = find_file(os.path.join(path, element), to_find)\n    return result\n"
  },
  {
    "path": "pydoop/utils/misc.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nMiscellaneous utilities.\n\"\"\"\n\nimport logging\nimport time\nimport uuid\n\n\nDEFAULT_LOG_LEVEL = \"WARNING\"\n\n\nclass NullHandler(logging.Handler):\n    def emit(self, record):\n        pass\n\n\nclass NullLogger(logging.Logger):\n    def __init__(self):\n        logging.Logger.__init__(self, \"null\")\n        self.propagate = 0\n        self.handlers = [NullHandler()]\n\n\ndef make_random_str(prefix=\"pydoop_\", postfix=''):\n    return \"%s%s%s\" % (prefix, uuid.uuid4().hex, postfix)\n\n\nclass Timer(object):\n\n    def __init__(self, ctx, counter_group=None):\n        self.ctx = ctx\n        self._start_times = {}\n        self._counters = {}\n        self._counter_group = counter_group if counter_group else \"Timer\"\n\n    def _gen_counter_name(self, event):\n        return \"TIME_\" + event.upper() + \" (ms)\"\n\n    def _get_time_counter(self, name):\n        if name not in self._counters:\n            counter_name = self._gen_counter_name(name)\n            self._counters[name] = self.ctx.get_counter(\n                self._counter_group, counter_name\n            )\n        return self._counters[name]\n\n    def start(self, s):\n        self._start_times[s] = time.time()\n\n    def stop(self, s):\n        delta_ms = 1000 * (time.time() - self._start_times[s])\n        self.ctx.increment_counter(self._get_time_counter(s), int(delta_ms))\n\n    def time_block(self, event_name):\n        return self.TimingBlock(self, event_name)\n\n    class TimingBlock(object):\n\n        def __init__(self, timer, event_name):\n            self._timer = timer\n            self._event_name = event_name\n\n        def __enter__(self):\n            self._timer.start(self._event_name)\n            return self._timer\n\n        def __exit__(self, exception_type, exception_val, exception_tb):\n            self._timer.stop(self._event_name)\n            return False\n"
  },
  {
    "path": "pydoop/utils/py3compat.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\nimport sys\nfrom abc import ABCMeta\n\n_is_py3 = sys.version_info[0] == 3\n__all__ = [\n    \"ABC\",\n    \"basestring\",\n    \"bintype\",\n    \"cfilter\",\n    \"clong\",\n    \"cmap\",\n    \"configparser\",\n    \"czip\",\n    \"iteritems\",\n    \"parser_read\",\n    \"pickle\",\n    \"socketserver\",\n    \"StringIO\",\n    \"unicode\",\n    \"xchr\",\n]\n\n\nclass Py2ABC(object):\n    __metaclass__ = ABCMeta\n\n\ndef __identity(x):\n    return x\n\n\ndef __chr(x):\n    return chr(x)\n\n\ndef __iteritems_2(x):\n    return x.iteritems()\n\n\ndef __iteritems_3(x):\n    return x.items()\n\n\ndef __parser_read_2(parser, f):\n    parser.readfp(f)\n\n\ndef __parser_read_3(parser, f):\n    parser.read_file(f)\n\n\nif _is_py3:\n    from io import BytesIO as StringIO\n    from abc import ABC\n    import configparser\n    import pickle\n    import socketserver\n    clong = int\n    #  something that should be interpreted as a string\n    basestring = str\n    unicode = str\n    parser_read = __parser_read_3\n    xchr = __identity\n    czip = zip\n    cmap = map\n    cfilter = filter\n    iteritems = __iteritems_3\n    bintype = bytes\nelse:\n    from itertools import izip as czip\n    from itertools import imap as cmap\n    from itertools import ifilter as cfilter\n    from cStringIO import StringIO\n    import cPickle as pickle\n    import ConfigParser as configparser\n    import SocketServer as socketserver\n    parser_read = __parser_read_2\n    #  something that should be interpreted as a string\n    basestring = unicode\n    unicode = unicode\n    clong = long  # noqa: F821\n    xchr = __chr\n    iteritems = __iteritems_2\n    bintype = str\n    ABC = Py2ABC\n"
  },
  {
    "path": "pydoop.properties",
    "content": "AVRO_INPUT=pydoop.mapreduce.avro.input\nAVRO_OUTPUT=pydoop.mapreduce.avro.output\nAVRO_KEY_INPUT_SCHEMA=pydoop.mapreduce.avro.key.input.schema\nAVRO_KEY_OUTPUT_SCHEMA=pydoop.mapreduce.avro.key.output.schema\nAVRO_VALUE_INPUT_SCHEMA=pydoop.mapreduce.avro.value.input.schema\nAVRO_VALUE_OUTPUT_SCHEMA=pydoop.mapreduce.avro.value.output.schema\nPIPES_EXTERNALSPLITS_URI=pydoop.mapreduce.pipes.externalsplits.uri\n"
  },
  {
    "path": "requirements.txt",
    "content": "avro >=1.7.4; python_version < '3'\navro-python3 >=1.7.4; python_version >= '3'\nsetuptools\n\n# examples\nwheel\n"
  },
  {
    "path": "setup.cfg",
    "content": "[flake8]\nignore = E402,W504\nexclude = hadoop*,build\n"
  },
  {
    "path": "setup.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\\\nPydoop is a Python MapReduce and HDFS API for Hadoop.\n\nPydoop is built on top of two C/C++ extension modules: a libhdfs wrapper and a\n(de)serialization library for types used by the Hadoop Pipes protocol. Since\nlibhdfs is, in turn, a JNI wrapper for the HDFS Java code, Pydoop needs a JDK\n(a JRE is not enough) to build.\n\nYou can point Pydoop to the Java home directory by exporting the JAVA_HOME\nenvironment variable. Make sure JAVA_HOME points to the JDK home directory\n(e.g., ${JAVA_HOME}/include/jni.h should be a valid path). If JAVA_HOME is not\ndefined, Pydoop will try to get the JDK home from Java system properties.\n\nTo compile its Java components, Pydoop also needs to find the Hadoop\nlibraries. In order to do so, it will try to call ``hadoop classpath``, so\nmake sure that the ``hadoop`` executable is in the PATH.\n\"\"\"\nfrom __future__ import print_function\n\nimport sys\nimport time\nimport os\nimport glob\nimport shutil\nimport itertools\nimport tempfile\n\nSETUPTOOLS_MIN_VER = '3.3'\n\nimport setuptools\nfrom pkg_resources import parse_version  # included in setuptools\nprint('using setuptools version', setuptools.__version__)\nif parse_version(setuptools.__version__) < parse_version(SETUPTOOLS_MIN_VER):\n    raise RuntimeError(\n        'setuptools minimum required version: %s' % SETUPTOOLS_MIN_VER\n    )\n\n# bug: http://bugs.python.org/issue1222585\n# workaround: http://stackoverflow.com/questions/8106258\nfrom distutils.sysconfig import get_config_var\n_UNWANTED_OPTS = frozenset(['-Wstrict-prototypes'])\nos.environ['OPT'] = ' '.join(\n    _ for _ in get_config_var('OPT').strip().split() if _ not in _UNWANTED_OPTS\n)\n\nfrom setuptools import setup, find_packages, Extension\nfrom setuptools.command.build_ext import build_ext\nfrom distutils.command.build import build\nfrom distutils.errors import DistutilsSetupError, CompileError\nfrom distutils import log\n\nimport pydoop\nimport pydoop.utils.jvm as jvm\n\nVERSION_FN = \"VERSION\"\nEXTRA_COMPILE_ARGS = [\"-Wno-write-strings\"]  # http://bugs.python.org/issue6952\n\n# properties file.  Since the source is in the root dir, filename = basename\nPROP_FN = PROP_BN = pydoop.__propfile_basename__\n\nCONSOLE_SCRIPTS = ['pydoop = pydoop.app.main:main']\nif sys.version_info[0] == 3:\n    CONSOLE_SCRIPTS.append('pydoop3 = pydoop.app.main:main')\nelse:\n    CONSOLE_SCRIPTS.append('pydoop2 = pydoop.app.main:main')\n\n\n# ---------\n# UTILITIES\n# ---------\n\ndef rm_rf(path, dry_run=False):\n    \"\"\"\n    Remove a file or directory tree.\n\n    Won't throw an exception, even if the removal fails.\n    \"\"\"\n    log.info(\"removing %s\" % path)\n    if dry_run:\n        return\n    try:\n        if os.path.isdir(path) and not os.path.islink(path):\n            shutil.rmtree(path)\n        else:\n            os.remove(path)\n    except OSError:\n        pass\n\n\ndef mtime(fn):\n    return os.stat(fn).st_mtime\n\n\ndef must_generate(target, prerequisites):\n    try:\n        return max(mtime(p) for p in prerequisites) > mtime(target)\n    except OSError:\n        return True\n\n\ndef get_version_string():\n    try:\n        with open(VERSION_FN) as f:\n            return f.read().strip()\n    except IOError:\n        raise DistutilsSetupError(\"failed to read version info\")\n\n\ndef write_config(filename=\"pydoop/config.py\"):\n    prereq = PROP_FN\n    if must_generate(filename, [prereq]):\n        props = pydoop.read_properties(PROP_FN)\n        with open(filename, \"w\") as fo:\n            fo.write(\"# GENERATED BY setup.py\\n\")\n            for k in sorted(props):\n                fo.write(\"%s = %r\\n\" % (k, props[k]))\n\n\ndef write_version(filename=\"pydoop/version.py\"):\n    if must_generate(filename, [VERSION_FN]):\n        with open(filename, \"w\") as f:\n            f.write(\"# GENERATED BY setup.py\\n\")\n            f.write(\"version = %r\\n\" % (get_version_string(),))\n\n\nEXTENSION_MODULES = [\n    Extension(\n        'pydoop.native_core_hdfs',\n        include_dirs=[\n            'src/libhdfs',\n            'src/libhdfs/include',\n            'src/libhdfs/os/posix',\n        ],\n        sources=list(itertools.chain(\n            glob.iglob('src/libhdfs/*.c'),\n            glob.iglob('src/libhdfs/common/*.c'),\n            glob.iglob('src/libhdfs/os/posix/*.c'),\n            glob.iglob('src/native_core_hdfs/*.cc')\n        )),\n        extra_compile_args=EXTRA_COMPILE_ARGS,\n        # to be finalized at build time\n    ),\n    Extension(\n        'pydoop.sercore',\n        sources=[\n            \"src/sercore/hu_extras.cpp\",\n            \"src/sercore/sercore.cpp\",\n            \"src/sercore/streams.cpp\",\n            \"src/sercore/HadoopUtils/SerialUtils.cc\",\n        ],\n        include_dirs=[\"src/sercore/HadoopUtils\"],\n        extra_compile_args=EXTRA_COMPILE_ARGS + [\"-std=c++11\", \"-O3\"],\n    )\n]\n\n\n# ------------\n# BUILD ENGINE\n# ------------\n\nclass JavaLib(object):\n\n    def __init__(self):\n        self.jar_name = pydoop.jar_name()\n        self.classpath = pydoop.hadoop_classpath()\n        self.java_files = glob.glob(\n            \"src/it/crs4/pydoop/mapreduce/pipes/*.java\"\n        ) + [\"src/it/crs4/pydoop/NoSeparatorTextOutputFormat.java\"]\n        self.dependencies = glob.glob('lib/*.jar')\n        self.properties = [(\n            os.path.join(\"it/crs4/pydoop/mapreduce/pipes\", PROP_BN),\n            PROP_FN\n        )]\n\n\nclass JavaBuilder(object):\n\n    def __init__(self, build_temp, build_lib):\n        self.build_temp = build_temp\n        self.build_lib = build_lib\n        self.java_libs = [JavaLib()]\n\n    def run(self):\n        for jlib in self.java_libs:\n            self.__build_java_lib(jlib)\n\n    def __build_java_lib(self, jlib):\n        package_path = os.path.join(self.build_lib, \"pydoop\")\n        compile_cmd = \"javac\"\n        if jlib.classpath:\n            classpath = [jlib.classpath]\n            for src in jlib.dependencies:\n                dest = os.path.join(package_path, os.path.basename(src))\n                shutil.copyfile(src, dest)\n                classpath.append(dest)\n            compile_cmd += \" -classpath %s\" % (':'.join(classpath))\n        else:\n            log.warn(\n                \"WARNING: could not set classpath, java code may not compile\"\n            )\n        class_dir = os.path.join(\n            self.build_temp, \"pipes\"\n        )\n        jar_path = os.path.join(package_path, jlib.jar_name)\n        if not os.path.exists(class_dir):\n            os.mkdir(class_dir)\n        compile_cmd += \" -d '%s'\" % class_dir\n        log.info(\"Compiling Java classes\")\n        for f in jlib.java_files:\n            compile_cmd += \" %s\" % f\n        ret = os.system(compile_cmd)\n        if ret:\n            raise DistutilsSetupError(\n                \"Error compiling java component.  Command: %s\" % compile_cmd\n            )\n        log.info(\"Copying properties file\")\n        for p in jlib.properties:\n            prop_file_dest = os.path.join(class_dir, p[0])\n            shutil.copyfile(p[1], prop_file_dest)\n        log.info(\"Making Jar: %s\", jar_path)\n        package_cmd = \"jar -cf %(jar_path)s -C %(class_dir)s ./it\" % {\n            'jar_path': jar_path, 'class_dir': class_dir\n        }\n        log.info(\"Packaging Java classes\")\n        log.info(\"Command: %s\", package_cmd)\n        ret = os.system(package_cmd)\n        if ret:\n            raise DistutilsSetupError(\n                \"Error packaging java component.  Command: %s\" % package_cmd\n            )\n\n\nclass BuildPydoopExt(build_ext):\n\n    def __have_better_tls(self):\n        log.info(\"checking for TLS support\")\n        test_code = \"int main(void) { static __thread int i = 0; return i; }\"\n        wd = tempfile.mkdtemp(prefix=\"pydoop_\")\n        test_src = os.path.join(wd, \"temp.c\")\n        with open(test_src, \"w\") as f:\n            f.write(test_code)\n        try:\n            self.compiler.compile([test_src], output_dir=wd)\n        except CompileError:\n            ret = False\n        else:\n            ret = True\n        shutil.rmtree(wd)\n        return ret\n\n    def __finalize_hdfs(self, ext):\n        \"\"\"\\\n        Adds a few bits that depend on the specific environment.\n\n        Delaying this until the build_ext phase allows non-build commands\n        (e.g., sdist) to be run without java.\n        \"\"\"\n        java_home = jvm.get_java_home()\n        jvm_lib_path, _ = jvm.get_jvm_lib_path_and_name(java_home)\n        ext.include_dirs = jvm.get_include_dirs() + ext.include_dirs\n        ext.libraries = jvm.get_libraries()\n        ext.library_dirs = [os.path.join(java_home, \"Libraries\"), jvm_lib_path]\n        ext.define_macros = jvm.get_macros()\n        ext.extra_link_args = ['-Wl,-rpath,%s' % jvm_lib_path]\n        if self.__have_better_tls():\n            ext.define_macros.append((\"HAVE_BETTER_TLS\", None))\n        try:\n            # too many warnings in libhdfs\n            self.compiler.compiler_so.remove(\"-Wsign-compare\")\n        except (AttributeError, ValueError):\n            pass\n\n    # called for each extension, after compiler has been set up\n    def build_extension(self, ext):\n        if ext.name == \"pydoop.native_core_hdfs\":\n            self.__finalize_hdfs(ext)\n        build_ext.build_extension(self, ext)\n\n\nclass BuildPydoop(build):\n\n    def build_java(self):\n        jb = JavaBuilder(self.build_temp, self.build_lib)\n        jb.run()\n\n    def create_tmp(self):\n        if not os.path.exists(self.build_temp):\n            os.mkdir(self.build_temp)\n        if not os.path.exists(self.build_lib):\n            os.mkdir(self.build_lib)\n\n    def clean_up(self):\n        shutil.rmtree(self.build_temp)\n\n    def run(self):\n        write_version()\n        write_config()\n        shutil.copyfile(PROP_FN, os.path.join(\"pydoop\", PROP_BN))\n        build.run(self)\n        try:\n            self.create_tmp()\n            self.build_java()\n        finally:\n            # On NFS, if we clean up right away we have issues with\n            # NFS handles being still in the directory trees to be\n            # deleted.  So, we sleep a bit and then delete\n            time.sleep(0.5)\n            self.clean_up()\n        log.info(\"Build finished\")\n\n\nsetup(\n    name=\"pydoop\",\n    version=get_version_string(),\n    description=pydoop.__doc__.strip().splitlines()[0],\n    long_description=pydoop.__doc__.lstrip(),\n    author=pydoop.__author__,\n    author_email=pydoop.__author_email__,\n    url=pydoop.__url__,\n    download_url=\"https://pypi.python.org/pypi/pydoop\",\n    install_requires=['setuptools>=%s' % SETUPTOOLS_MIN_VER],\n    extras_require={\n        'avro': [\n            'avro>=1.7.4;python_version<\"3\"',\n            'avro-python3>=1.7.4;python_version>=\"3\"',\n        ],\n    },\n    packages=find_packages(exclude=['test', 'test.*']),\n    package_data={\"pydoop\": [PROP_FN]},\n    cmdclass={\n        \"build\": BuildPydoop,\n        \"build_ext\": BuildPydoopExt,\n    },\n    entry_points={'console_scripts': CONSOLE_SCRIPTS},\n    platforms=[\"Linux\"],\n    ext_modules=EXTENSION_MODULES,\n    license=\"Apache-2.0\",\n    keywords=[\"hadoop\", \"mapreduce\"],\n    classifiers=[\n        \"Programming Language :: Python :: 2.7\",\n        \"Programming Language :: Python :: 3.5\",\n        \"License :: OSI Approved :: Apache Software License\",\n        \"Operating System :: POSIX :: Linux\",\n        \"Topic :: Software Development :: Libraries :: Application Frameworks\",\n        \"Intended Audience :: Developers\",\n    ],\n    data_files=[\n        ('config', ['README.md']),\n    ],\n    zip_safe=False,\n)\n"
  },
  {
    "path": "src/Py_macros.h",
    "content": "/* BEGIN_COPYRIGHT\n *\n * Copyright 2009-2026 CRS4.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n * use this file except in compliance with the License. You may obtain a copy\n * of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n * License for the specific language governing permissions and limitations\n * under the License.\n *\n * END_COPYRIGHT\n */\n#ifndef PYDOOP_PY_MACROS\n#define PYDOOP_PY_MACROS 1\n\n\n// FIXME: PyBytes should be ok in py2.7 too.\n\n#if IS_PY3K\n#define PyInt_Check PyLong_Check\n#define PyInt_AsLong PyLong_AsLong\n#define PyInt_AsSsize_t PyLong_AsSsize_t\n#define PyString_Check PyBytes_Check\n#define PyString_AsString PyBytes_AsString\n#else\n\n#endif\n\n\n#endif  // PYDOOP_PY_MACROS\n"
  },
  {
    "path": "src/buf_macros.h",
    "content": "/* BEGIN_COPYRIGHT\n *\n * Copyright 2009-2026 CRS4.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n * use this file except in compliance with the License. You may obtain a copy\n * of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n * License for the specific language governing permissions and limitations\n * under the License.\n *\n * END_COPYRIGHT\n */\n\n#ifndef PYDOOP_BUF_MACROS\n#define PYDOOP_BUF_MACROS 1\n\n\n#if IS_PY3K\n#define _PyBuf_FromStringAndSize(s,nbytes) PyBytes_FromStringAndSize(s, nbytes)\n#define _PyBuf_AS_STRING(b) PyBytes_AS_STRING(b)\n#define _PyBuf_Resize(b, n) _PyBytes_Resize(b, n)\n#define _PyBuf_FromString(x) PyBytes_FromString(x)\n#else\n#define _PyBuf_FromStringAndSize(s,nbytes) PyString_FromStringAndSize(s, nbytes)\n#define _PyBuf_AS_STRING(b) PyString_AS_STRING(b)\n#define _PyBuf_Resize(b, n) _PyString_Resize(b, n)\n#define _PyBuf_FromString(x) PyString_FromString(x)\n#endif\n\n#endif /* PYDOOP_BUF_MACROS */\n"
  },
  {
    "path": "src/it/crs4/pydoop/NoSeparatorTextOutputFormat.java",
    "content": "\n// BEGIN_COPYRIGHT\n// \n// Copyright 2009-2026 CRS4.\n// \n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n// \n//   http://www.apache.org/licenses/LICENSE-2.0\n// \n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n// \n// END_COPYRIGHT\n\npackage it.crs4.pydoop;\n\nimport java.io.DataOutputStream;\nimport java.io.IOException;\n\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.fs.FileSystem;\nimport org.apache.hadoop.fs.FSDataOutputStream;\nimport org.apache.hadoop.fs.Path;\nimport org.apache.hadoop.io.compress.CompressionCodec;\nimport org.apache.hadoop.io.compress.GzipCodec;\nimport org.apache.hadoop.io.Text;\nimport org.apache.hadoop.mapreduce.RecordWriter;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;\nimport org.apache.hadoop.util.*;\nimport org.apache.hadoop.util.Progressable;\n\n/**\n * A TextOutputFormat that doesn't insert a separator between key and value.\n */\npublic class NoSeparatorTextOutputFormat extends TextOutputFormat<Text, Text>\n{\n  public RecordWriter<Text, Text> \n         getRecordWriter(TaskAttemptContext job\n                         ) throws IOException, InterruptedException {\n    final String keyValueSeparator = \"\";\n    Configuration conf = job.getConfiguration();\n    boolean isCompressed = getCompressOutput(job);\n    CompressionCodec codec = null;\n    String extension = \"\";\n    if (isCompressed) {\n      Class<? extends CompressionCodec> codecClass = \n        getOutputCompressorClass(job, GzipCodec.class);\n      codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);\n      extension = codec.getDefaultExtension();\n    }\n    Path file = getDefaultWorkFile(job, extension);\n    FileSystem fs = file.getFileSystem(conf);\n    if (!isCompressed) {\n      FSDataOutputStream fileOut = fs.create(file, false);\n      return new LineRecordWriter<Text, Text>(fileOut, keyValueSeparator);\n    } else {\n      FSDataOutputStream fileOut = fs.create(file, false);\n      return new LineRecordWriter<Text, Text>(\n          new DataOutputStream (codec.createOutputStream(fileOut)), keyValueSeparator);\n    }\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/Application.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\npackage it.crs4.pydoop.mapreduce.pipes;\n\n\nimport java.io.File;\nimport java.io.IOException;\nimport java.net.ServerSocket;\nimport java.net.Socket;\nimport java.util.ArrayList;\nimport java.util.HashMap;\nimport java.util.List;\nimport java.util.Map;\nimport java.util.Random;\n\nimport javax.crypto.SecretKey;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\nimport org.apache.hadoop.fs.FSDataOutputStream;\nimport org.apache.hadoop.fs.FileSystem;\nimport org.apache.hadoop.fs.FileUtil;\nimport org.apache.hadoop.fs.Path;\nimport org.apache.hadoop.fs.permission.FsPermission;\nimport org.apache.hadoop.io.FloatWritable;\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.io.Writable;\nimport org.apache.hadoop.io.WritableComparable;\n\nimport org.apache.hadoop.conf.Configuration;\n/*\n  FIXME org.apache.hadoop.mapred.TaskLog is clearly not what it is expected to\n  be used with org.apache.hadoop.mapreduce.* \n\n  For the time being, we use the following as a stand-in.\n\n  it.crs4.pydoop.mapreduce.pipes.TaskLog;\n*/\n\nimport org.apache.hadoop.mapreduce.TaskInputOutputContext;\nimport org.apache.hadoop.mapreduce.TaskAttemptID;\nimport org.apache.hadoop.mapreduce.TaskID;\nimport org.apache.hadoop.mapreduce.MRJobConfig;\nimport org.apache.hadoop.mapreduce.OutputCommitter;\nimport org.apache.hadoop.mapreduce.filecache.DistributedCache;\nimport org.apache.hadoop.mapreduce.security.SecureShuffleUtils;\nimport org.apache.hadoop.mapreduce.security.TokenCache;\nimport org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;\nimport org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;\nimport org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;\nimport org.apache.hadoop.security.token.Token;\nimport org.apache.hadoop.util.ReflectionUtils;\nimport org.apache.hadoop.util.StringUtils;\n\n/**\n * This class is responsible for launching and communicating with the child \n * process.\n */\nclass Application<K1 extends Writable, V1 extends Writable,\n                  K2 extends WritableComparable, V2 extends Writable> {\n    private static final Log LOG = LogFactory.getLog(Application.class.getName());\n    private ServerSocket serverSocket;\n    private Process process;\n    private Socket clientSocket;\n    private OutputHandler<K2, V2> handler;\n    private DownwardProtocol<K1, V1> downlink;\n    static final boolean WINDOWS \n        = System.getProperty(\"os.name\").startsWith(\"Windows\");\n\n    /**\n     * Start the child process to handle the task for us.\n     * @throws IOException\n     * @throws InterruptedException\n     */\n    Application(TaskInputOutputContext<K1,V1,K2,V2> context, \n                DummyRecordReader input) \n        throws IOException, InterruptedException {\n\n        Configuration conf = context.getConfiguration();\n        OutputCommitter committer = context.getOutputCommitter();\n        if (committer instanceof FileOutputCommitter) {\n          conf.set(MRJobConfig.TASK_OUTPUT_DIR,\n                   ((FileOutputCommitter)committer).getWorkPath().toString());\n        }\n        serverSocket = new ServerSocket(0);\n        Map<String, String> env = new HashMap<String,String>();\n        // add TMPDIR environment variable with the value of java.io.tmpdir\n        env.put(\"TMPDIR\", System.getProperty(\"java.io.tmpdir\"));\n        env.put(Submitter.PORT, Integer.toString(serverSocket.getLocalPort()));\n    \n        //Add token to the environment if security is enabled\n        Token<JobTokenIdentifier> jobToken = \n            TokenCache.getJobToken(context.getCredentials());\n        // This password is used as shared secret key between this application and\n        // child pipes process\n        byte[]  password = jobToken.getPassword();\n        String localPasswordFile = new File(\n            System.getProperty(\"user.dir\"), \"jobTokenPassword\"\n        ).getAbsolutePath();\n        writePasswordToLocalFile(localPasswordFile, password, conf);\n        // FIXME why is this not Submitter.SECRET_LOCATION ?\n        env.put(\"hadoop.pipes.shared.secret.location\", localPasswordFile);\n \n        List<String> cmd = new ArrayList<String>();\n        String interpretor = conf.get(Submitter.INTERPRETOR);\n        if (interpretor != null) {\n            cmd.add(interpretor);\n        }\n        String executable = context.getLocalCacheFiles()[0].toString();\n        if (!(new File(executable).canExecute())) {\n            // LinuxTaskController sets +x permissions on all distcache files already.\n            // In case of DefaultTaskController, set permissions here.\n            FileUtil.chmod(executable, \"u+x\");\n        }\n        cmd.add(executable);\n        // wrap the command in a stdout/stderr capture\n        // we are starting map/reduce task of the pipes job. this is not a cleanup\n        // attempt. \n        TaskAttemptID taskid = context.getTaskAttemptID();\n\n        File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);\n        File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);\n        long logLength = TaskLog.getTaskLogLength(conf);\n        cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength,\n                                         false);\n        process = runClient(cmd, env);\n        clientSocket = serverSocket.accept();\n    \n        String challenge = getSecurityChallenge();\n        String digestToSend = createDigest(password, challenge);\n        String digestExpected = createDigest(password, digestToSend);\n    \n        handler = new OutputHandler<K2, V2>(context, input, digestExpected);\n        K2 outputKey = (K2)\n            ReflectionUtils.newInstance(context.getOutputKeyClass(), conf);\n        V2 outputValue = (V2) \n            ReflectionUtils.newInstance(context.getOutputValueClass(), conf);\n        downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, \n                                                      outputKey, outputValue, conf);\n\n        downlink.authenticate(digestToSend, challenge);\n        waitForAuthentication();\n        LOG.debug(\"Authentication succeeded\");\n        downlink.start();\n        downlink.setJobConf(conf); \n    }\n\n    private String getSecurityChallenge() {\n        Random rand = new Random(System.currentTimeMillis());\n        //Use 4 random integers so as to have 16 random bytes.\n        StringBuilder strBuilder = new StringBuilder();\n        strBuilder.append(rand.nextInt(0x7fffffff));\n        strBuilder.append(rand.nextInt(0x7fffffff));\n        strBuilder.append(rand.nextInt(0x7fffffff));\n        strBuilder.append(rand.nextInt(0x7fffffff));\n        return strBuilder.toString();\n    }\n\n    private void writePasswordToLocalFile(String localPasswordFile,\n                                          byte[] password, \n                                          Configuration conf) throws IOException {\n        FileSystem localFs = FileSystem.getLocal(conf);\n        Path localPath = new Path(localPasswordFile);\n        FSDataOutputStream out = FileSystem.create(localFs, localPath,\n                                                   new FsPermission(\"400\"));\n        out.write(password);\n        out.close();\n    }\n\n    /**\n     * Get the downward protocol object that can send commands down to the\n     * application.\n     * @return the downlink proxy\n     */\n    DownwardProtocol<K1, V1> getDownlink() {\n        return downlink;\n    }\n  \n    /**\n     * Wait for authentication response.\n     * @throws IOException\n     * @throws InterruptedException\n     */\n    void waitForAuthentication() throws IOException,\n        InterruptedException {\n        downlink.flush();\n        LOG.debug(\"Waiting for authentication response\");\n        handler.waitForAuthentication();\n    }\n  \n    /**\n     * Wait for the application to finish\n     * @return did the application finish correctly?\n     * @throws Throwable\n     */\n    boolean waitForFinish() throws Throwable {\n        downlink.flush();\n        return handler.waitForFinish();\n    }\n\n    /**\n     * Abort the application and wait for it to finish.\n     * @param t the exception that signalled the problem\n     * @throws IOException A wrapper around the exception that was passed in\n     */\n    void abort(Throwable t) throws IOException {\n        LOG.info(\"Aborting because of \" + StringUtils.stringifyException(t));\n        try {\n            downlink.abort();\n            downlink.flush();\n        } catch (IOException e) {\n            // IGNORE cleanup problems\n        }\n        try {\n            handler.waitForFinish();\n        } catch (Throwable ignored) {\n            process.destroy();\n        }\n        IOException wrapper = new IOException(\"pipe child exception\");\n        wrapper.initCause(t);\n        throw wrapper;      \n    }\n  \n    /**\n     * Clean up the child procress and socket.\n     * @throws IOException\n     */\n    void cleanup() throws IOException {\n        serverSocket.close();\n        try {\n            downlink.close();\n        } catch (InterruptedException ie) {\n            Thread.currentThread().interrupt();\n        }      \n    }\n\n    /**\n     * Run a given command in a subprocess, including threads to copy its stdout\n     * and stderr to our stdout and stderr.\n     * @param command the command and its arguments\n     * @param env the environment to run the process in\n     * @return a handle on the process\n     * @throws IOException\n     */\n    static Process runClient(List<String> command, \n                             Map<String, String> env) throws IOException {\n        ProcessBuilder builder = new ProcessBuilder(command);\n        if (env != null) {\n            builder.environment().putAll(env);\n        }\n        Process result = builder.start();\n        return result;\n    }\n  \n    public static String createDigest(byte[] password, String data)\n        throws IOException {\n        SecretKey key = JobTokenSecretManager.createSecretKey(password);\n        return SecureShuffleUtils.hashFromString(data, key);\n    }\n\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/BinaryProtocol.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.BufferedInputStream;\nimport java.io.BufferedOutputStream;\nimport java.io.DataInputStream;\nimport java.io.DataOutputStream;\nimport java.io.FileOutputStream;\nimport java.io.FilterOutputStream;\nimport java.io.IOException;\nimport java.io.InputStream;\nimport java.io.OutputStream;\nimport java.net.Socket;\nimport java.util.ArrayList;\nimport java.util.List;\nimport java.util.Map;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\nimport org.apache.hadoop.io.BytesWritable;\nimport org.apache.hadoop.io.DataOutputBuffer;\nimport org.apache.hadoop.io.Text;\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.io.Writable;\nimport org.apache.hadoop.io.WritableComparable;\nimport org.apache.hadoop.io.WritableUtils;\n\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.util.StringUtils;\n\n/**\n * This protocol is a binary implementation of the Pipes protocol.\n */\nclass BinaryProtocol<K1 extends Writable, V1 extends Writable,\n                     K2 extends WritableComparable, V2 extends Writable>\n    implements DownwardProtocol<K1, V1> {\n  \n    public static final int CURRENT_PROTOCOL_VERSION = 0;\n    /**\n     * The buffer size for the command socket\n     */\n    private static final int BUFFER_SIZE = 128*1024;\n\n    private DataOutputStream stream;\n    private DataOutputBuffer buffer = new DataOutputBuffer();\n    private static final Log LOG = \n        LogFactory.getLog(BinaryProtocol.class.getName());\n    private UplinkReaderThread uplink;\n\n    /**\n     * The integer codes to represent the different messages. These must match\n     * the external program codes or massive confusion will result.\n     */\n    private static enum MessageType { START(0),\n                                      SET_JOB_CONF(1),\n                                      SET_INPUT_TYPES(2),\n                                      RUN_MAP(3),\n                                      MAP_ITEM(4),\n                                      RUN_REDUCE(5),\n                                      REDUCE_KEY(6),\n                                      REDUCE_VALUE(7),\n                                      CLOSE(8),\n                                      ABORT(9),\n                                      AUTHENTICATION_REQ(10),\n                                      OUTPUT(50),\n                                      PARTITIONED_OUTPUT(51),\n                                      STATUS(52),\n                                      PROGRESS(53),\n                                      DONE(54),\n                                      REGISTER_COUNTER(55),\n                                      INCREMENT_COUNTER(56),\n                                      AUTHENTICATION_RESP(57);\n                                      final int code;\n                                      MessageType(int code) {\n                                          this.code = code;\n                                      }\n    }\n\n    private static class UplinkReaderThread<K2 extends WritableComparable,\n        V2 extends Writable>  \n        extends Thread {\n    \n        private DataInputStream inStream;\n        private UpwardProtocol<K2, V2> handler;\n        private K2 key;\n        private V2 value;\n        private boolean authPending = true;\n    \n        public UplinkReaderThread(InputStream stream,\n                                  UpwardProtocol<K2, V2> handler, \n                                  K2 key, V2 value) throws IOException{\n            inStream = new DataInputStream(new BufferedInputStream(stream, \n                                                                   BUFFER_SIZE));\n            this.handler = handler;\n            this.key = key;\n            this.value = value;\n        }\n\n        public void closeConnection() throws IOException {\n            inStream.close();\n        }\n\n        public void run() {\n            while (true) {\n                try {\n                    if (Thread.currentThread().isInterrupted()) {\n                        throw new InterruptedException();\n                    }\n                    int cmd = WritableUtils.readVInt(inStream);\n                    LOG.debug(\"Handling uplink command \" + cmd);\n                    if (cmd == MessageType.AUTHENTICATION_RESP.code) {\n                        String digest = Text.readString(inStream);\n                        authPending = !handler.authenticate(digest);\n                    } else if (authPending) {\n                        LOG.warn(\"Message \" + cmd + \" received before authentication is \"\n                                 + \"complete. Ignoring\");\n                        continue;\n                    } else if (cmd == MessageType.OUTPUT.code) {\n                        readObject(key);\n                        readObject(value);\n                        handler.output(key, value);\n                    } else if (cmd == MessageType.PARTITIONED_OUTPUT.code) {\n                        int part = WritableUtils.readVInt(inStream);\n                        readObject(key);\n                        readObject(value);\n                        handler.partitionedOutput(part, key, value);\n                    } else if (cmd == MessageType.STATUS.code) {\n                        handler.status(Text.readString(inStream));\n                    } else if (cmd == MessageType.PROGRESS.code) {\n                        handler.progress(inStream.readFloat());\n                    } else if (cmd == MessageType.REGISTER_COUNTER.code) {\n                        int id = WritableUtils.readVInt(inStream);\n                        String group = Text.readString(inStream);\n                        String name = Text.readString(inStream);\n                        handler.registerCounter(id, group, name);\n                    } else if (cmd == MessageType.INCREMENT_COUNTER.code) {\n                        int id = WritableUtils.readVInt(inStream);\n                        long amount = WritableUtils.readVLong(inStream);\n                        handler.incrementCounter(id, amount);\n                    } else if (cmd == MessageType.DONE.code) {\n                        LOG.debug(\"Pipe child done\");\n                        handler.done();\n                        return;\n                    } else {\n                        throw new IOException(\"Bad command code: \" + cmd);\n                    }\n                } catch (InterruptedException e) {\n                    return;\n                } catch (Throwable e) {\n                    LOG.error(StringUtils.stringifyException(e));\n                    handler.failed(e);\n                    return;\n                }\n            }\n        }\n    \n        private void readObject(Writable obj) throws IOException {\n            int numBytes = WritableUtils.readVInt(inStream);\n            byte[] buffer;\n            // For BytesWritable and Text, use the specified length to set the length\n            // this causes the \"obvious\" translations to work. So that if you emit\n            // a string \"abc\" from C++, it shows up as \"abc\".\n            if (obj instanceof BytesWritable) {\n                buffer = new byte[numBytes];\n                inStream.readFully(buffer);\n                ((BytesWritable) obj).set(buffer, 0, numBytes);\n            } else if (obj instanceof Text) {\n                buffer = new byte[numBytes];\n                inStream.readFully(buffer);\n                ((Text) obj).set(buffer);\n            } else {\n                obj.readFields(inStream);\n            }\n        }\n    }\n\n    /**\n     * An output stream that will save a copy of the data into a file.\n     */\n    private static class TeeOutputStream extends FilterOutputStream {\n        private OutputStream file;\n        TeeOutputStream(String filename, OutputStream base) throws IOException {\n            super(base);\n            file = new FileOutputStream(filename);\n        }\n        public void write(byte b[], int off, int len) throws IOException {\n            file.write(b,off,len);\n            out.write(b,off,len);\n        }\n\n        public void write(int b) throws IOException {\n            file.write(b);\n            out.write(b);\n        }\n\n        public void flush() throws IOException {\n            file.flush();\n            out.flush();\n        }\n\n        public void close() throws IOException {\n            flush();\n            file.close();\n            out.close();\n        }\n    }\n\n    /**\n     * Create a proxy object that will speak the binary protocol on a socket.\n     * Upward messages are passed on the specified handler and downward\n     * downward messages are public methods on this object.\n     * @param sock The socket to communicate on.\n     * @param handler The handler for the received messages.\n     * @param key The object to read keys into.\n     * @param value The object to read values into.\n     * @param config The job's configuration\n     * @throws IOException\n     */\n    public BinaryProtocol(Socket sock, \n                          UpwardProtocol<K2, V2> handler,\n                          K2 key,\n                          V2 value,\n                          Configuration config) throws IOException {\n        OutputStream raw = sock.getOutputStream();\n        // If we are debugging, save a copy of the downlink commands to a file\n        if (Submitter.getKeepCommandFile(config)) {\n            raw = new TeeOutputStream(\"downlink.data\", raw);\n        }\n        stream = new DataOutputStream(new BufferedOutputStream(raw, \n                                                               BUFFER_SIZE)) ;\n        uplink = new UplinkReaderThread<K2, V2>(sock.getInputStream(),\n                                                handler, key, value);\n        uplink.setName(\"pipe-uplink-handler\");\n        uplink.start();\n    }\n\n    /**\n     * Close the connection and shutdown the handler thread.\n     * @throws IOException\n     * @throws InterruptedException\n     */\n    public void close() throws IOException, InterruptedException {\n        LOG.debug(\"closing connection\");\n        stream.close();\n        uplink.closeConnection();\n        uplink.interrupt();\n        uplink.join();\n    }\n  \n    public void authenticate(String digest, String challenge)\n        throws IOException {\n        LOG.debug(\"Sending AUTHENTICATION_REQ, digest=\" + digest + \", challenge=\"\n                  + challenge);\n        WritableUtils.writeVInt(stream, MessageType.AUTHENTICATION_REQ.code);\n        Text.writeString(stream, digest);\n        Text.writeString(stream, challenge);\n    }\n\n    public void start() throws IOException {\n        LOG.debug(\"starting downlink\");\n        WritableUtils.writeVInt(stream, MessageType.START.code);\n        WritableUtils.writeVInt(stream, CURRENT_PROTOCOL_VERSION);\n    }\n\n    public void setJobConf(Configuration conf) throws IOException {\n        WritableUtils.writeVInt(stream, MessageType.SET_JOB_CONF.code);\n        List<String> list = new ArrayList<String>();\n        for(Map.Entry<String, String> itm: conf) {\n            list.add(itm.getKey());\n            list.add(itm.getValue());\n        }\n        WritableUtils.writeVInt(stream, list.size());\n        for(String entry: list){\n            Text.writeString(stream, entry);\n        }\n    }\n\n    public void setInputTypes(String keyType, \n                              String valueType) throws IOException {\n        WritableUtils.writeVInt(stream, MessageType.SET_INPUT_TYPES.code);\n        Text.writeString(stream, keyType);\n        Text.writeString(stream, valueType);\n    }\n\n    public void runMap(InputSplit split, int numReduces, \n                       boolean pipedInput) throws IOException {\n        if (!Writable.class.isInstance(split)) {\n          throw new RuntimeException(\"split is not Writable\");\n        }\n        WritableUtils.writeVInt(stream, MessageType.RUN_MAP.code);\n        writeObject((Writable)split);\n        WritableUtils.writeVInt(stream, numReduces);\n        WritableUtils.writeVInt(stream, pipedInput ? 1 : 0);\n    }\n\n    public void mapItem(Writable key, \n                        Writable value) throws IOException {\n        WritableUtils.writeVInt(stream, MessageType.MAP_ITEM.code);\n        writeObject(key);\n        writeObject(value);\n    }\n\n    public void runReduce(int reduce, boolean pipedOutput) throws IOException {\n        WritableUtils.writeVInt(stream, MessageType.RUN_REDUCE.code);\n        WritableUtils.writeVInt(stream, reduce);\n        WritableUtils.writeVInt(stream, pipedOutput ? 1 : 0);\n    }\n\n    public void reduceKey(Writable key) throws IOException {\n        WritableUtils.writeVInt(stream, MessageType.REDUCE_KEY.code);\n        writeObject(key);\n    }\n\n    public void reduceValue(Writable value) throws IOException {\n        WritableUtils.writeVInt(stream, MessageType.REDUCE_VALUE.code);\n        writeObject(value);\n    }\n\n    public void endOfInput() throws IOException {\n        WritableUtils.writeVInt(stream, MessageType.CLOSE.code);\n        LOG.debug(\"Sent close command\");\n    }\n  \n    public void abort() throws IOException {\n        WritableUtils.writeVInt(stream, MessageType.ABORT.code);\n        LOG.debug(\"Sent abort command\");\n    }\n\n    public void flush() throws IOException {\n        stream.flush();\n    }\n\n    /**\n     * Write the given object to the stream. If it is a Text or BytesWritable,\n     * write it directly. Otherwise, write it to a buffer and then write the\n     * length and data to the stream.\n     * @param obj the object to write\n     * @throws IOException\n     */\n    private void writeObject(Writable obj) throws IOException {\n        // For Text and BytesWritable, encode them directly, so that they end up\n        // in C++ as the natural translations.\n        if (obj instanceof Text) {\n            Text t = (Text) obj;\n            int len = t.getLength();\n            WritableUtils.writeVInt(stream, len);\n            stream.write(t.getBytes(), 0, len);\n        } else if (obj instanceof BytesWritable) {\n            BytesWritable b = (BytesWritable) obj;\n            int len = b.getLength();\n            WritableUtils.writeVInt(stream, len);\n            stream.write(b.getBytes(), 0, len);\n        } else if (obj == null) {\n            // write a zero length string\n            WritableUtils.writeVInt(stream, 0);            \n        } else {\n            buffer.reset();\n            obj.write(buffer);\n            int length = buffer.getLength();\n            WritableUtils.writeVInt(stream, length);\n            stream.write(buffer.getData(), 0, length);\n        }\n    }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/DownwardProtocol.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.io.Writable;\nimport org.apache.hadoop.io.WritableComparable;\nimport org.apache.hadoop.mapreduce.InputSplit;\n\n\n/**\n * The abstract description of the downward (from Java to external program)\n * Pipes protocol.  All of these calls are asynchronous and return before the\n * message has been processed.\n */\ninterface DownwardProtocol<K extends Writable, V extends Writable> {\n  /**\n   * request authentication\n   * @throws IOException\n   */\n  void authenticate(String digest, String challenge) throws IOException;\n  \n  /**\n   * Start communication\n   * @throws IOException\n   */\n  void start() throws IOException;\n  \n  /**\n   * Set the Configuration for the task.\n   * @param conf\n   * @throws IOException\n   */\n  void setJobConf(Configuration conf) throws IOException;\n  \n  /**\n   * Set the input types for Maps.\n   * @param keyType the name of the key's type\n   * @param valueType the name of the value's type\n   * @throws IOException\n   */\n  void setInputTypes(String keyType, String valueType) throws IOException;\n  \n  /**\n   * Run a map task in the child.\n   * @param split The input split for this map.\n   * @param numReduces The number of reduces for this job.\n   * @param pipedInput Is the input coming from Java?\n   * @throws IOException\n   */\n  void runMap(InputSplit split, int numReduces, \n              boolean pipedInput) throws IOException;\n  \n  /**\n   * For maps with pipedInput, the key/value pairs are sent via this messaage.\n   * @param key The record's key\n   * @param value The record's value\n   * @throws IOException\n   */\n  void mapItem(K key, V value) throws IOException;\n  \n  /**\n   * Run a reduce task in the child\n   * @param reduce the index of the reduce (0 .. numReduces - 1)\n   * @param pipedOutput is the output being sent to Java?\n   * @throws IOException\n   */\n  void runReduce(int reduce, boolean pipedOutput) throws IOException;\n  \n  /**\n   * The reduce should be given a new key\n   * @param key the new key\n   * @throws IOException\n   */\n  void reduceKey(K key) throws IOException;\n  \n  /**\n   * The reduce should be given a new value\n   * @param value the new value\n   * @throws IOException\n   */\n  void reduceValue(V value) throws IOException;\n  \n  /**\n   * The task has no more input coming, but it should finish processing it's \n   * input.\n   * @throws IOException\n   */\n  void endOfInput() throws IOException;\n  \n  /**\n   * The task should stop as soon as possible, because something has gone wrong.\n   * @throws IOException\n   */\n  void abort() throws IOException;\n  \n  /**\n   * Flush the data through any buffers.\n   */\n  void flush() throws IOException;\n  \n  /**\n   * Close the connection.\n   */\n  void close() throws IOException, InterruptedException;\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/DummyRecordReader.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.io.FloatWritable;\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.mapreduce.RecordReader;\n\npublic abstract class DummyRecordReader \n    extends RecordReader<FloatWritable, NullWritable> {\n\n    public abstract  boolean next(FloatWritable key, NullWritable value)\n        throws IOException ;\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/OpaqueSplit.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport org.apache.hadoop.mapred.SplitLocationInfo;\n\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.io.BytesWritable;\nimport org.apache.hadoop.io.Writable;\nimport java.io.IOException;\nimport java.io.DataInput;\nimport java.io.DataOutput;\n\n/**\n * An opaque piece of information to be handled on the client side.\n */\nclass OpaqueSplit extends InputSplit implements Writable {\n\n  private BytesWritable payload;\n\n  public OpaqueSplit() {\n    payload = new BytesWritable();\n  }\n\n  public OpaqueSplit(byte[] payload) {\n    this.payload = new BytesWritable(payload);\n  }\n\n  public BytesWritable getPayload() {\n    return payload;\n  }\n\n  @Override\n  public long getLength() {\n    return payload.getLength();\n  }\n\n  @Override\n  public String toString() {\n    return payload.toString();\n  }\n\n  @Override\n  public String[] getLocations() throws IOException {\n    return new String[]{};\n  }\n\n  @Override\n  public SplitLocationInfo[] getLocationInfo() throws IOException {\n    return new SplitLocationInfo[]{};\n  }\n\n  // Writable methods\n\n  @Override\n  public void write(DataOutput out) throws IOException {\n    payload.write(out);\n  }\n\n  @Override\n  public void readFields(DataInput in) throws IOException {\n    payload.readFields(in);\n  }\n\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/OutputHandler.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\nimport java.util.HashMap;\nimport java.util.Map;\n\nimport org.apache.hadoop.io.FloatWritable;\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.io.Writable;\nimport org.apache.hadoop.io.WritableComparable;\nimport org.apache.hadoop.mapreduce.Counters;\nimport org.apache.hadoop.mapreduce.Counter;\nimport org.apache.hadoop.mapreduce.TaskInputOutputContext;\n\n/**\n * Handles the upward (C++ to Java) messages from the application.\n */\nclass OutputHandler<K extends WritableComparable, V extends Writable>\n    implements UpwardProtocol<K, V> {\n  \n    private TaskInputOutputContext context;\n    private float progressValue = 0.0f;\n    private boolean done = false;\n  \n    private Throwable exception = null;\n    //RecordReader<FloatWritable, NullWritable> recordReader = null;\n    DummyRecordReader recordReader = null;\n\n    private Map<Integer, Counter> registeredCounters = \n                    new HashMap<Integer, Counter>();\n\n    private String expectedDigest = null;\n    private boolean digestReceived = false;\n    /**\n     * Create a handler that will handle any records output from the application.\n     * @param context the actual input and output interface to the Java hadoop system.\n     * @param expectedDigest \n     */\n    public OutputHandler(TaskInputOutputContext context, \n                         DummyRecordReader recordReader,\n                         String expectedDigest) {\n        this.context = context;\n        this.recordReader = recordReader;\n        this.expectedDigest = expectedDigest;\n    }\n\n    /**\n     * The task output a normal record.\n     */\n    @Override\n    public void output(K key, V value) throws IOException, InterruptedException {\n        context.write(key, value);\n    }\n\n    /**\n     * The task output a record with a partition number attached.\n     */\n    @Override\n    public void partitionedOutput(int reduce, K key, \n                                  V value) throws IOException, InterruptedException  {\n        PipesPartitioner.setNextPartition(reduce);\n        context.write(key, value);\n    }\n\n    /**\n     * Update the status message for the task.\n     */\n    @Override\n    public void status(String msg) {\n        context.setStatus(msg);\n    }\n\n    private FloatWritable progressKey = new FloatWritable(0.0f);\n    private NullWritable nullValue = NullWritable.get();\n    /**\n     * Update the amount done and update above.\n     */\n    @Override\n    public void progress(float progress) throws IOException {\n        progressValue = progress;\n        context.progress();\n        if (recordReader != null) {\n            progressKey.set(progress);\n            recordReader.next(progressKey, nullValue);\n        }\n    }\n\n    /**\n     * The task finished successfully.\n     */\n    @Override\n    public void done() throws IOException {\n        synchronized (this) {\n            done = true;\n            notify();\n        }\n    }\n\n    /**\n     * Get the current amount done.\n     * @return a float between 0.0 and 1.0\n     */\n    public float getProgress() {\n        return progressValue;\n    }\n\n    /**\n     * The task failed with an exception.\n     */\n    public void failed(Throwable e) {\n        synchronized (this) {\n            exception = e;\n            notify();\n        }\n    }\n\n    /**\n     * Wait for the task to finish or abort.\n     * @return did the task finish correctly?\n     * @throws Throwable\n     */\n    public synchronized boolean waitForFinish() throws Throwable {\n            while (!done && exception == null) {\n                wait();\n            }\n            if (exception != null) {\n                throw exception;\n            }\n            return done;\n        }\n\n    @Override\n    public void registerCounter(int id, String group, String name) throws IOException {\n        Counter counter = context.getCounter(group, name);\n        registeredCounters.put(id, counter);\n    }\n\n    @Override\n    public void incrementCounter(int id, long amount) throws IOException {\n        if (id < registeredCounters.size()) {\n            Counter counter = registeredCounters.get(id);\n            counter.increment(amount);\n        } else {\n            throw new IOException(\"Invalid counter with id: \" + id);\n        }\n    }\n  \n    public synchronized boolean authenticate(String digest) throws IOException {\n            boolean success = true;\n            if (!expectedDigest.equals(digest)) {\n                exception = new IOException(\"Authentication Failed: Expected digest=\"\n                                            + expectedDigest + \", received=\" + digestReceived);\n                success = false;\n            }\n            digestReceived = true;\n            notify();\n            return success;\n        }\n\n    /**\n     * This is called by Application and blocks the thread until\n     * authentication response is received.\n     * @throws IOException\n     * @throws InterruptedException\n     */\n    synchronized void waitForAuthentication()\n                          throws IOException, InterruptedException {\n            while (digestReceived == false && exception == null) {\n                wait();\n            }\n            if (exception != null) {\n                throw new IOException(exception.getMessage());\n            }\n        }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PipesMapper.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\n\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.io.Writable;\nimport org.apache.hadoop.io.WritableComparable;\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.util.ReflectionUtils;\nimport org.apache.hadoop.mapreduce.Mapper;\nimport org.apache.hadoop.mapreduce.RecordReader;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.InputFormat;\n\n\n/**\n * An adaptor to run an external mapper.\n */\nclass PipesMapper<K1 extends Writable, V1 extends Writable,\n                  K2 extends WritableComparable, V2 extends Writable>\n    extends Mapper<K1, V1, K2, V2> {\n\n  protected static final Log LOG = LogFactory.getLog(PipesMapper.class);\n\n  Context context;\n  Application<K1, V1, K2, V2> application = null;\n  boolean skipping = false;\n\n  @Override\n  protected void setup(Context context)\n    throws IOException, InterruptedException {\n    this.context = context;\n    //disable the auto increment of the counter. For pipes, no of processed\n    //records could be different(equal or less) than the no of records input.\n    // FIXME: disable right now...\n    // SkipBadRecords.setAutoIncrMapperProcCount(context, false);\n  }\n\n  @Override\n  protected void cleanup(Context context)\n      throws IOException, InterruptedException {\n    if (application != null)  {\n      application.cleanup();\n    }\n  }\n\n  @Override\n  public void run(Context context)\n      throws IOException, InterruptedException {\n    setup(context);\n    Configuration conf = context.getConfiguration();\n    InputSplit split = context.getInputSplit();\n    // FIXME: do we really need to be so convoluted?\n    InputFormat<K1, V1> inputFormat;\n    try {\n      inputFormat = (InputFormat<K1, V1>)\n        ReflectionUtils.newInstance(context.getInputFormatClass(), conf);\n    } catch (ClassNotFoundException ce) {\n      throw new RuntimeException(\"class not found\", ce);\n    }\n    RecordReader<K1, V1> input =\n        inputFormat.createRecordReader(split, context);\n    input.initialize(split, context);\n    boolean isJavaInput = Submitter.getIsJavaRecordReader(conf);\n    try {\n      // FIXME: what happens for a java mapper and no java record reader?\n      DummyRecordReader fakeInput =\n          (!isJavaInput && !Submitter.getIsJavaMapper(conf)) ?\n          (DummyRecordReader) input : null;\n      application = new Application<K1, V1, K2, V2>(context, fakeInput);\n    } catch (InterruptedException ie) {\n      throw new RuntimeException(\"interrupted\", ie);\n    }\n    DownwardProtocol<K1, V1> downlink = application.getDownlink();\n    downlink.runMap(context.getInputSplit(),\n        context.getNumReduceTasks(), isJavaInput);\n    boolean skipping = conf.getBoolean(context.SKIP_RECORDS, false);\n    boolean sent_input_types = false;\n    try {\n      if (isJavaInput) {\n        // FIXME\n        while (input.nextKeyValue()) {\n          if (!sent_input_types) {\n            sent_input_types = true;\n            NullWritable n = NullWritable.get();\n            String kclass_name = n.getClass().getName();\n            String vclass_name = n.getClass().getName();\n            if (input.getCurrentKey() != null) {\n              kclass_name = input.getCurrentKey().getClass().getName();\n            }\n            if (input.getCurrentValue() != null) {\n              vclass_name = input.getCurrentValue().getClass().getName();\n            }\n            downlink.setInputTypes(kclass_name, vclass_name);\n          }\n          downlink.mapItem(input.getCurrentKey(), input.getCurrentValue());\n          if(skipping) {\n            //flush the streams on every record input if running in skip mode\n            //so that we don't buffer other records surrounding a bad record.\n            downlink.flush();\n          }\n        }\n        downlink.endOfInput();\n      }\n      application.waitForFinish();\n    } catch (Throwable t) {\n      application.abort(t);\n    } finally {\n      cleanup(context);\n    }\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PipesNonJavaInputFormat.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\nimport java.util.ArrayList;\nimport java.util.List;\nimport java.util.Properties;\n\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.io.IntWritable;\nimport org.apache.hadoop.io.FloatWritable;\nimport org.apache.hadoop.fs.FileSystem;\nimport org.apache.hadoop.fs.Path;\nimport org.apache.hadoop.fs.FSDataInputStream;\nimport org.apache.hadoop.mapreduce.InputFormat;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.JobContext;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.mapreduce.lib.input.TextInputFormat;\nimport org.apache.hadoop.util.ReflectionUtils;\n\n/**\n * Dummy input format used when non-Java a {@link RecordReader} is used by\n * the Pipes' application.\n *\n * Sets up the Map-Reduce job to get the {@link PipesDummyRecordReader} and\n * the input splits. If <i>pydoop.mapreduce.pipes.externalsplits.uri</i> is\n * defined, input splits are read from the specified HDFS URI as a binary\n * sequence in the following format: <N><OBJ_1><OBJ_2>...<OBJ_N>, i.e., a\n * WritableInt N followed by N opaque objects. If it's not defined, input\n * splits are retrieved by invoking the getSplits method of the 'actual'\n * InputFormat specified by the user in <i>mapreduce.pipes.inputformat</i>.\n */\nclass PipesNonJavaInputFormat\n    extends InputFormat<FloatWritable, NullWritable> {\n\n  public List<InputSplit> getSplits(JobContext context)\n      throws IOException, InterruptedException {\n    Properties props = Submitter.getPydoopProperties();\n    Configuration conf = context.getConfiguration();\n    String uri = conf.get(props.getProperty(\"PIPES_EXTERNALSPLITS_URI\"));\n    if (uri != null) {\n      return getOpaqueSplits(conf, uri);\n    } else {\n      return ReflectionUtils.newInstance(\n          conf.getClass(Submitter.INPUT_FORMAT,\n                        TextInputFormat.class,\n                        InputFormat.class), conf).getSplits(context);\n    }\n  }\n\n  private List<InputSplit> getOpaqueSplits(Configuration conf, String uri)\n      throws IOException, InterruptedException {\n    FileSystem fs = FileSystem.get(conf);\n    Path path = new Path(uri);\n    if (!fs.exists(path)) {\n      throw new IOException(uri + \" does not exists\");\n    }\n    List<InputSplit> splits = new ArrayList<InputSplit>();\n    FSDataInputStream in = fs.open(path);\n    try {\n      IntWritable numRecords = new IntWritable();\n      numRecords.readFields(in);\n      for(int i = 0; i < numRecords.get(); i++) {\n        OpaqueSplit o = new OpaqueSplit();\n        o.readFields(in);\n        splits.add(o);\n      }\n    } finally {\n      in.close();\n    }\n    return splits;\n  }\n\n  @Override\n  public DummyRecordReader\n    createRecordReader(InputSplit split, TaskAttemptContext context)\n      throws IOException {\n    return new PipesDummyRecordReader(split, context);\n  }\n\n  /**\n   * A dummy {@link org.apache.hadoop.mapreduce.RecordReader} to help track the\n   * progress of Hadoop Pipes applications when they are using a non-Java\n   * <code>RecordReader</code>.\n   *\n   * The <code>PipesDummyRecordReader</code> is informed of the 'progress' of\n   * the task by the {@link OutputHandler#progress(float)} which calls the\n   * {@link #next(FloatWritable, NullWritable)} with the progress as the\n   * <code>key</code>.\n   */\n  static class PipesDummyRecordReader extends DummyRecordReader {\n\n    float progress = 0.0f;\n\n    public PipesDummyRecordReader() {}\n\n    public PipesDummyRecordReader(InputSplit split, TaskAttemptContext context)\n        throws IOException {\n      initialize(split, context);\n    }\n\n    @Override\n    public void initialize(InputSplit split, TaskAttemptContext context)\n        throws IOException {}\n\n    public synchronized void close() throws IOException {}\n\n    @Override\n    public float getProgress() throws IOException, InterruptedException {\n      return progress;\n    }\n\n    @Override\n    public boolean nextKeyValue() throws IOException, InterruptedException {\n      return true;\n    }\n\n    @Override\n    public FloatWritable getCurrentKey()\n        throws IOException, InterruptedException {\n      return new FloatWritable(progress);\n    }\n\n    @Override\n    public NullWritable getCurrentValue()\n        throws IOException, InterruptedException {\n      return null;\n    }\n\n    @Override\n    public synchronized boolean next(FloatWritable key, NullWritable value)\n        throws IOException  {\n      progress = key.get();\n      return true;\n    }\n  }\n\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PipesNonJavaOutputFormat.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport org.apache.hadoop.mapreduce.RecordWriter;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;\n\n/**\n * Ignores all output, but otherwise behaves like FileOutputFormat\n * (e.g., temp dir management).\n */\npublic class PipesNonJavaOutputFormat<K, V> extends FileOutputFormat<K, V> {\n\n  @Override\n  public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) {\n    return new RecordWriter<K, V>() {\n        public void write(K key, V value) { }\n        public void close(TaskAttemptContext context) { }\n    };\n  }\n\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PipesPartitioner.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\npackage it.crs4.pydoop.mapreduce.pipes;\n\n\nimport org.apache.hadoop.io.Writable;\nimport org.apache.hadoop.io.WritableComparable;\nimport org.apache.hadoop.mapreduce.Partitioner;\nimport org.apache.hadoop.conf.Configurable;\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.util.ReflectionUtils;\n\n\n/**\n * This partitioner is one that can either be set manually per a record or it\n * can fall back onto a Java partitioner that was set by the user.\n */\nclass PipesPartitioner<K extends WritableComparable, V extends Writable>\n    extends Partitioner<K, V> \n    implements Configurable {\n  \n    private static ThreadLocal<Integer> cache = new ThreadLocal<Integer>();\n    private Partitioner<K, V> part = null;\n\n    private Configuration conf;\n\n    public void setConf(Configuration conf) {\n        this.conf = conf;\n        part = ReflectionUtils.newInstance(\n                   Submitter.getJavaPartitioner(conf), conf);\n    }\n  \n    public Configuration getConf() {\n        return conf;\n    }\n\n    /**\n     * Set the next key to have the given partition.\n     * @param newValue the next partition value\n     */\n    static void setNextPartition(int newValue) {\n        cache.set(newValue);\n    }\n\n    /**\n     * If a partition result was set manually, return it. Otherwise, we call\n     * the Java partitioner.\n     * @param key the key to partition\n     * @param value the value to partition\n     * @param numPartitions the number of reduces\n     */\n    @Override\n    public int getPartition(K key, V value, \n                            int numPartitions) {\n        Integer result = cache.get();\n        if (result == null) {\n            return part.getPartition(key, value, numPartitions);\n        } else {\n            return result;\n        }\n    }\n\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PipesReducer.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\npackage it.crs4.pydoop.mapreduce.pipes;\n\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\nimport org.apache.hadoop.io.Writable;\nimport org.apache.hadoop.io.WritableComparable;\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.mapreduce.TaskInputOutputContext;\nimport org.apache.hadoop.mapreduce.Reducer;\nimport org.apache.hadoop.mapreduce.ReduceContext;\nimport org.apache.hadoop.mapreduce.MRJobConfig;\nimport org.apache.hadoop.mapred.SkipBadRecords;\n\nimport java.io.IOException;\nimport java.util.Iterator;\n\n/**\n * This class is used to talk to a C++ reduce task.\n */\nclass PipesReducer<K2 extends WritableComparable, V2 extends Writable,\n                   K3 extends WritableComparable, V3 extends Writable>\n    extends Reducer<K2, V2, K3, V3> {\n    private static final Log LOG = LogFactory.getLog(PipesReducer.class.getName());\n    private Context context;\n    private Configuration configuration;\n    private Application<K2, V2, K3, V3> application = null;\n    private DownwardProtocol<K2, V2> downlink = null;\n    private boolean isOk = true;\n\n    @Override\n    public void setup(Reducer.Context context) {\n        this.context = context;\n        this.configuration = this.context.getConfiguration();\n    }\n\n    /**\n     * Process all of the keys and values. Start up the application if we haven't\n     * started it yet.\n     */\n    @Override\n    public void reduce(K2 key, Iterable<V2> values, Context context)\n        throws IOException, InterruptedException {\n        isOk = false;\n        startApplication();\n        downlink.reduceKey(key);\n        for(V2 value: values) {\n            downlink.reduceValue(value);\n        }\n        isOk = true;\n    }\n\n    @SuppressWarnings(\"unchecked\")\n    private void startApplication() throws IOException {\n        if (application == null) {\n            try {\n                LOG.info(\"starting application\");\n                application = new Application<K2, V2, K3, V3>(context, null);\n                downlink = application.getDownlink();\n            } catch (InterruptedException ie) {\n                throw new RuntimeException(\"interrupted\", ie);\n            }\n            int reduce=0;\n            downlink.runReduce(reduce, Submitter.getIsJavaRecordWriter(configuration));\n        }\n    }\n\n    /**\n     * Handle the end of the input by closing down the application.\n     */\n    @Override\n    public void cleanup(Context context) \n        throws IOException, InterruptedException {\n        // if we haven't started the application, we have nothing to do\n        if (isOk) {\n            startApplication();\n        }\n        try {\n            if (isOk) {\n                application.getDownlink().endOfInput();\n            } else {\n                // send the abort to the application and let it clean up\n                application.getDownlink().abort();\n            }\n            LOG.info(\"waiting for finish\");\n            application.waitForFinish();\n            LOG.info(\"got done\");\n        } catch (Throwable t) {\n            application.abort(t);\n        } finally {\n            application.cleanup();\n        }\n    }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroBridgeKeyReader.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.util.Properties;\nimport java.util.List;\nimport java.util.Arrays;\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.mapreduce.RecordReader;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.io.Text;\nimport org.apache.hadoop.conf.Configuration;\n\nimport org.apache.avro.generic.IndexedRecord;\n\n\npublic class PydoopAvroBridgeKeyReader\n    extends PydoopAvroBridgeReaderBase<Text, NullWritable> {\n\n  private Properties props;\n\n  public PydoopAvroBridgeKeyReader(\n      RecordReader<? extends IndexedRecord, ?> actualReader) {\n    this.actualReader = actualReader;\n    props = Submitter.getPydoopProperties();\n  }\n\n  protected List<IndexedRecord> getInRecords()\n      throws IOException, InterruptedException {\n    IndexedRecord key = (IndexedRecord) actualReader.getCurrentKey();\n    return Arrays.asList(key);\n  }\n\n  public void initialize(InputSplit split, TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    super.initialize(split, context);\n    assert schemas.size() == 1;\n    Configuration conf = context.getConfiguration();\n    conf.set(props.getProperty(\"AVRO_INPUT\"), Submitter.AvroIO.K.name());\n    conf.set(props.getProperty(\"AVRO_KEY_INPUT_SCHEMA\"),\n        schemas.get(0).toString());\n  }\n\n  @Override\n  public Text getCurrentKey()\n      throws IOException, InterruptedException {\n    assert outRecords.size() == 1;\n    return outRecords.get(0);\n  }\n\n  @Override\n  public NullWritable getCurrentValue()\n      throws IOException, InterruptedException {\n    return NullWritable.get();\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroBridgeKeyValueReader.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.util.Properties;\nimport java.util.List;\nimport java.util.Arrays;\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.mapreduce.RecordReader;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.io.Text;\nimport org.apache.hadoop.conf.Configuration;\n\nimport org.apache.avro.generic.IndexedRecord;\n\n\npublic class PydoopAvroBridgeKeyValueReader\n    extends PydoopAvroBridgeReaderBase<Text, Text> {\n\n  private Properties props;\n\n  public PydoopAvroBridgeKeyValueReader(\n      RecordReader<? extends IndexedRecord,\n                   ? extends IndexedRecord> actualReader) {\n    this.actualReader = actualReader;\n    props = Submitter.getPydoopProperties();\n  }\n\n  protected List<IndexedRecord> getInRecords()\n      throws IOException, InterruptedException {\n    IndexedRecord key = (IndexedRecord) actualReader.getCurrentKey();\n    IndexedRecord value = (IndexedRecord) actualReader.getCurrentValue();\n    return Arrays.asList(key, value);\n  }\n\n  public void initialize(InputSplit split, TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    super.initialize(split, context);\n    assert schemas.size() == 2;\n    Configuration conf = context.getConfiguration();\n    conf.set(props.getProperty(\"AVRO_INPUT\"), Submitter.AvroIO.KV.name());\n    conf.set(props.getProperty(\"AVRO_KEY_INPUT_SCHEMA\"),\n        schemas.get(0).toString());\n    conf.set(props.getProperty(\"AVRO_VALUE_INPUT_SCHEMA\"),\n        schemas.get(1).toString());\n  }\n\n  @Override\n  public Text getCurrentKey()\n      throws IOException, InterruptedException {\n    assert outRecords.size() == 2;\n    return outRecords.get(0);\n  }\n\n  @Override\n  public Text getCurrentValue()\n      throws IOException, InterruptedException {\n    assert outRecords.size() == 2;\n    return outRecords.get(1);\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroBridgeKeyValueWriter.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.util.List;\nimport java.util.Arrays;\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.mapreduce.RecordWriter;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.io.Text;\n\nimport org.apache.avro.generic.GenericRecord;\n\n\npublic class PydoopAvroBridgeKeyValueWriter\n    extends PydoopAvroBridgeWriterBase {\n\n  public PydoopAvroBridgeKeyValueWriter(\n      RecordWriter<? super GenericRecord, ? super GenericRecord> actualWriter,\n      TaskAttemptContext context) {\n    super(context, Submitter.AvroIO.KV);\n    this.actualWriter = actualWriter;\n  }\n\n  public void write(Text key, Text value)\n      throws IOException, InterruptedException {\n    List<GenericRecord> outRecords = super.getOutRecords(\n        Arrays.asList(key, value));\n    super.write(outRecords);\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroBridgeKeyWriter.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.util.List;\nimport java.util.Arrays;\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.mapreduce.RecordWriter;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.io.Text;\n\nimport org.apache.avro.generic.GenericRecord;\n\n\npublic class PydoopAvroBridgeKeyWriter extends PydoopAvroBridgeWriterBase {\n\n  public PydoopAvroBridgeKeyWriter(\n      RecordWriter<? super GenericRecord, NullWritable> actualWriter,\n      TaskAttemptContext context) {\n    super(context, Submitter.AvroIO.K);\n    this.actualWriter = actualWriter;\n  }\n\n  public void write(Text key, Text ignore)\n      throws IOException, InterruptedException {\n    List<GenericRecord> outRecords = super.getOutRecords(Arrays.asList(key));\n    super.write(outRecords);\n  }\n\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroBridgeReaderBase.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.util.List;\nimport java.util.ArrayList;\nimport java.util.Iterator;\n\nimport java.io.IOException;\nimport java.io.ByteArrayOutputStream;\n\nimport org.apache.hadoop.mapreduce.RecordReader;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.mapreduce.Counter;\nimport org.apache.hadoop.io.Text;\n\nimport org.apache.avro.Schema;\nimport org.apache.avro.generic.IndexedRecord;\nimport org.apache.avro.generic.GenericDatumWriter;\nimport org.apache.avro.io.DatumWriter;\nimport org.apache.avro.io.EncoderFactory;\nimport org.apache.avro.io.BinaryEncoder;\n\n\npublic abstract class PydoopAvroBridgeReaderBase<K, V>\n    extends RecordReader<K, V> {\n\n  private static final String COUNTERS_GROUP =\n    PydoopAvroBridgeReaderBase.class.getName();\n\n  protected RecordReader actualReader;\n  protected List<Schema> schemas;\n  protected List<Text> outRecords;\n  protected List<DatumWriter<IndexedRecord>> datumWriters;\n  protected List<BinaryEncoder> encoders;\n  protected List<ByteArrayOutputStream> outStreams;\n\n  protected Counter nRecords;\n  protected Counter readTimeCounter;\n  protected Counter serTimeCounter;\n\n  private List<IndexedRecord> bufferedInRecords;\n  private long start;\n  private boolean hasRecord;\n\n  /**\n   * Get current record(s) from the actual (input) RecordReader.\n   * The returned list should contain one element for key-only or\n   * value-only readers, two for key/value readers (this is not\n   * enforced here, however).  This method must NOT advance the actual\n   * reader (it's the equivalent of getCurrent{Key,Value}, not of\n   * nextKeyValue).\n   */\n  protected abstract List<IndexedRecord> getInRecords()\n      throws IOException, InterruptedException;\n\n  public void initialize(InputSplit split, TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    actualReader.initialize(split, context);\n    nRecords = context.getCounter(COUNTERS_GROUP, \"Number of records\");\n    readTimeCounter = context.getCounter(COUNTERS_GROUP, \"Read time (ms)\");\n    serTimeCounter = context.getCounter(\n        COUNTERS_GROUP, \"Serialization time (ms)\");\n    // peek at the record stream and save the schema(s) so that the concrete\n    // subclass can set the schema property during initialization\n    start = System.nanoTime();\n    hasRecord = actualReader.nextKeyValue();\n    if (hasRecord) {\n      readTimeCounter.increment((System.nanoTime() - start) / 1000000);\n      bufferedInRecords = getInRecords();\n      schemas = new ArrayList<Schema>();\n      datumWriters = new ArrayList<DatumWriter<IndexedRecord>>();\n      outStreams = new ArrayList<ByteArrayOutputStream>();\n      encoders = new ArrayList<BinaryEncoder>();\n      outRecords = new ArrayList<Text>();\n      for (IndexedRecord r: bufferedInRecords) {\n        Schema s = r.getSchema();\n        schemas.add(s);\n        datumWriters.add(new GenericDatumWriter<IndexedRecord>(s));\n        ByteArrayOutputStream stream = new ByteArrayOutputStream();\n        outStreams.add(stream);\n        encoders.add(EncoderFactory.get().binaryEncoder(stream, null));\n        outRecords.add(new Text());\n      }\n    }\n  }\n\n  public synchronized boolean nextKeyValue()\n      throws IOException, InterruptedException {\n    List<IndexedRecord> records = null;\n    if (bufferedInRecords == null) {\n      start = System.nanoTime();\n      hasRecord = actualReader.nextKeyValue();\n      if (!hasRecord) {\n        return false;\n      }\n      else {\n        readTimeCounter.increment((System.nanoTime() - start) / 1000000);\n        records = getInRecords();\n      }\n    }\n    else {\n      records = bufferedInRecords;\n      bufferedInRecords = null;\n    }\n    //--\n    Iterator<IndexedRecord> iterRecords = records.iterator();\n    Iterator<DatumWriter<IndexedRecord>> iterWriters = datumWriters.iterator();\n    Iterator<BinaryEncoder> iterEncoders = encoders.iterator();\n    Iterator<ByteArrayOutputStream> iterStreams = outStreams.iterator();\n    Iterator<Text> iterOutRecords = outRecords.iterator();\n    start = System.nanoTime();\n    while (iterRecords.hasNext()) {\n      ByteArrayOutputStream stream = iterStreams.next();\n      BinaryEncoder enc = iterEncoders.next();\n      try {\n        iterWriters.next().write(iterRecords.next(), enc);\n        enc.flush();\n      } catch (IOException e) {\n        throw new RuntimeException(e);\n      }\n      iterOutRecords.next().set(new Text(stream.toByteArray()));\n      stream.reset();\n    }\n    serTimeCounter.increment((System.nanoTime() - start) / 1000000);\n    nRecords.increment(1);\n    return true;\n  }\n\n  public float getProgress() throws IOException,  InterruptedException {\n    return actualReader.getProgress();\n  }\n\n  public synchronized void close() throws IOException {\n    actualReader.close();\n  }\n\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroBridgeValueReader.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.util.Properties;\nimport java.util.List;\nimport java.util.Arrays;\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.mapreduce.RecordReader;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.io.Text;\nimport org.apache.hadoop.conf.Configuration;\n\nimport org.apache.avro.generic.IndexedRecord;\n\n\npublic class PydoopAvroBridgeValueReader\n    extends PydoopAvroBridgeReaderBase<NullWritable, Text> {\n\n  private Properties props;\n\n  public PydoopAvroBridgeValueReader(\n      RecordReader<?, ? extends IndexedRecord> actualReader) {\n    this.actualReader = actualReader;\n    props = Submitter.getPydoopProperties();\n  }\n\n  protected List<IndexedRecord> getInRecords()\n      throws IOException, InterruptedException {\n    IndexedRecord value = (IndexedRecord) actualReader.getCurrentValue();\n    return Arrays.asList(value);\n  }\n\n  public void initialize(InputSplit split, TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    super.initialize(split, context);\n    assert schemas.size() == 1;\n    Configuration conf = context.getConfiguration();\n    conf.set(props.getProperty(\"AVRO_INPUT\"), Submitter.AvroIO.V.name());\n    conf.set(props.getProperty(\"AVRO_VALUE_INPUT_SCHEMA\"),\n        schemas.get(0).toString());\n  }\n\n  @Override\n  public NullWritable getCurrentKey()\n      throws IOException, InterruptedException {\n    return NullWritable.get();\n  }\n\n  @Override\n  public Text getCurrentValue()\n      throws IOException, InterruptedException {\n    assert outRecords.size() == 1;\n    return outRecords.get(0);\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroBridgeValueWriter.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.util.List;\nimport java.util.Arrays;\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.mapreduce.RecordWriter;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.io.Text;\n\nimport org.apache.avro.generic.GenericRecord;\n\n\npublic class PydoopAvroBridgeValueWriter extends PydoopAvroBridgeWriterBase {\n\n  public PydoopAvroBridgeValueWriter(\n      RecordWriter<NullWritable, ? super GenericRecord> actualWriter,\n      TaskAttemptContext context) {\n    super(context, Submitter.AvroIO.V);\n    this.actualWriter = actualWriter;\n  }\n\n  public void write(Text ignore, Text value)\n      throws IOException, InterruptedException {\n    List<GenericRecord> outRecords = super.getOutRecords(Arrays.asList(value));\n    super.write(outRecords);\n  }\n\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroBridgeWriterBase.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.util.List;\nimport java.util.ArrayList;\nimport java.util.Iterator;\nimport java.util.Properties;\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.mapreduce.RecordWriter;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.mapreduce.Counter;\nimport org.apache.hadoop.io.Text;\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.conf.Configuration;\n\nimport org.apache.avro.Schema;\nimport org.apache.avro.generic.GenericRecord;\nimport org.apache.avro.generic.GenericDatumReader;\nimport org.apache.avro.io.DatumReader;\nimport org.apache.avro.io.DecoderFactory;\nimport org.apache.avro.io.Decoder;\nimport org.apache.avro.io.BinaryDecoder;\n\nimport static it.crs4.pydoop.mapreduce.pipes.Submitter.AvroIO;\n\n\npublic abstract class PydoopAvroBridgeWriterBase\n    extends RecordWriter<Text, Text> {\n\n  private static final String COUNTERS_GROUP =\n    PydoopAvroBridgeWriterBase.class.getName();\n  private long start;\n\n  protected AvroIO mode;\n  protected RecordWriter actualWriter;\n  protected DecoderFactory decFactory;\n  protected List<DatumReader<GenericRecord>> datumReaders;\n  protected List<Decoder> decoders;\n  protected List<GenericRecord> outRecords;\n\n  protected Counter nRecords;\n  protected Counter writeTimeCounter;\n  protected Counter deserTimeCounter;\n\n  public PydoopAvroBridgeWriterBase(TaskAttemptContext context, AvroIO mode) {\n    Properties props = Submitter.getPydoopProperties();\n    Configuration conf = context.getConfiguration();\n    datumReaders = new ArrayList<DatumReader<GenericRecord>>();\n    decoders = new ArrayList<Decoder>();\n    outRecords = new ArrayList<GenericRecord>();\n    if (mode == AvroIO.K || mode == AvroIO.KV) {\n      datumReaders.add(new GenericDatumReader<GenericRecord>(Schema.parse(\n          conf.get(props.getProperty(\"AVRO_KEY_OUTPUT_SCHEMA\")))));\n      decoders.add(null);\n      outRecords.add(null);\n    }\n    if (mode == AvroIO.V || mode == AvroIO.KV) {\n      datumReaders.add(new GenericDatumReader<GenericRecord>(Schema.parse(\n          conf.get(props.getProperty(\"AVRO_VALUE_OUTPUT_SCHEMA\")))));\n      decoders.add(null);\n      outRecords.add(null);\n    }\n    decFactory = DecoderFactory.get();\n    this.mode = mode;\n    //--\n    nRecords = context.getCounter(COUNTERS_GROUP, \"Number of records\");\n    writeTimeCounter = context.getCounter(COUNTERS_GROUP, \"Write time (ms)\");\n    deserTimeCounter = context.getCounter(\n        COUNTERS_GROUP, \"Deserialization time (ms)\");\n  }\n\n  protected List<GenericRecord> getOutRecords(List<Text> inRecords)\n      throws IOException {\n    start = System.nanoTime();\n    for (int i = 0; i < inRecords.size(); i++) {\n      Decoder dec = decFactory.binaryDecoder(\n          inRecords.get(i).getBytes(), (BinaryDecoder) decoders.get(i));\n      decoders.set(i, dec);\n      outRecords.set(i, datumReaders.get(i).read(outRecords.get(i), dec));\n    }\n    deserTimeCounter.increment((System.nanoTime() - start) / 1000000);\n    return outRecords;\n  }\n\n  protected void write(List<GenericRecord> outRecords)\n      throws IOException, InterruptedException {\n    start = System.nanoTime();\n    switch (mode) {\n    case K:\n      actualWriter.write(outRecords.get(0), NullWritable.get());\n      break;\n    case V:\n      // Parquet writer does not accept a NullWritable key\n      GenericRecord r = outRecords.get(0);\n      actualWriter.write(null, r);\n      break;\n    case KV:\n      actualWriter.write(outRecords.get(0), outRecords.get(1));\n      break;\n    default:\n      throw new RuntimeException(\"Invalid Avro I/O mode\");\n    }\n    writeTimeCounter.increment((System.nanoTime() - start) / 1000000);\n    nRecords.increment(1);\n  }\n\n  public void close(TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    actualWriter.close(context);\n  }\n\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroInputBridgeBase.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\nimport java.util.List;\n\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.JobContext;\nimport org.apache.hadoop.mapreduce.InputFormat;\nimport org.apache.hadoop.mapreduce.lib.input.TextInputFormat;\nimport org.apache.hadoop.util.ReflectionUtils;\n\n\npublic abstract class PydoopAvroInputBridgeBase<K, V>\n    extends InputFormat<K, V> {\n\n  protected InputFormat actualFormat;\n  protected Class<? extends InputFormat> defaultActualFormat;\n\n  protected InputFormat getActualFormat(Configuration conf) {\n    if (actualFormat == null) {\n      actualFormat = ReflectionUtils.newInstance(\n          conf.getClass(\n              Submitter.INPUT_FORMAT,\n              defaultActualFormat,\n              InputFormat.class), conf);\n        }\n    return actualFormat;\n  }\n\n  @Override\n  public List<InputSplit> getSplits(JobContext context)\n      throws IOException, InterruptedException {\n    Configuration conf = context.getConfiguration();\n    return getActualFormat(conf).getSplits(context);\n  }\n\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroInputKeyBridge.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.io.Text;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.RecordReader;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\n\n\npublic class PydoopAvroInputKeyBridge\n    extends PydoopAvroInputBridgeBase<Text, NullWritable> {\n\n  public PydoopAvroInputKeyBridge() {\n    defaultActualFormat = PydoopAvroKeyInputFormat.class;\n  }\n\n  @Override\n  public RecordReader<Text, NullWritable> createRecordReader(\n      InputSplit split, TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    Configuration conf = context.getConfiguration();\n    return new PydoopAvroBridgeKeyReader(\n        getActualFormat(conf).createRecordReader(split, context));\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroInputKeyValueBridge.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.io.Text;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.RecordReader;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\n\n\npublic class PydoopAvroInputKeyValueBridge\n    extends PydoopAvroInputBridgeBase<Text, Text> {\n\n  public PydoopAvroInputKeyValueBridge() {\n    defaultActualFormat = PydoopAvroKeyValueInputFormat.class;\n  }\n\n  @Override\n  public RecordReader<Text, Text> createRecordReader(\n      InputSplit split, TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    Configuration conf = context.getConfiguration();\n    return new PydoopAvroBridgeKeyValueReader(\n        getActualFormat(conf).createRecordReader(split, context));\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroInputValueBridge.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.io.Text;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.RecordReader;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\n\n\npublic class PydoopAvroInputValueBridge\n    extends PydoopAvroInputBridgeBase<NullWritable, Text> {\n\n  public PydoopAvroInputValueBridge() {\n    defaultActualFormat = PydoopAvroValueInputFormat.class;\n  }\n\n  @Override\n  public RecordReader<NullWritable, Text> createRecordReader(\n      InputSplit split, TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    Configuration conf = context.getConfiguration();\n    return new PydoopAvroBridgeValueReader(\n        getActualFormat(conf).createRecordReader(split, context));\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroKeyInputFormat.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.avro.generic.GenericRecord;\n\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.RecordReader;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.mapreduce.lib.input.FileInputFormat;\n\n\npublic class PydoopAvroKeyInputFormat\n    extends FileInputFormat<GenericRecord, NullWritable> {\n\n  @Override\n  public RecordReader<GenericRecord, NullWritable> createRecordReader(\n      InputSplit split, TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    // null readerSchema: the reader will fall back to the writer schema\n    // FIXME: we could add our own property for setting the reader schema\n    return new PydoopAvroKeyRecordReader(null);\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroKeyOutputFormat.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.avro.generic.GenericRecord;\n\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.mapreduce.RecordWriter;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\n\n\npublic class PydoopAvroKeyOutputFormat\n    extends PydoopAvroOutputFormatBase<GenericRecord, NullWritable> {\n\n  @Override\n  @SuppressWarnings(\"unchecked\")\n  public RecordWriter<GenericRecord, NullWritable> getRecordWriter(\n      TaskAttemptContext context) throws IOException {\n    return new PydoopAvroKeyRecordWriter(\n        getOutputSchema(context, \"AVRO_KEY_OUTPUT_SCHEMA\"),\n        getCompressionCodec(context),\n        getAvroFileOutputStream(context)\n    );\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroKeyRecordReader.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.avro.Schema;\nimport org.apache.avro.generic.GenericRecord;\nimport org.apache.hadoop.io.NullWritable;\n\nimport org.slf4j.Logger;\nimport org.slf4j.LoggerFactory;\n\n\npublic class PydoopAvroKeyRecordReader\n    extends PydoopAvroRecordReaderBase<GenericRecord, NullWritable> {\n\n  private static final Logger LOG = LoggerFactory.getLogger(\n      PydoopAvroKeyRecordReader.class);\n\n  public PydoopAvroKeyRecordReader(Schema readerSchema) {\n    super(readerSchema);\n  }\n\n  @Override\n  public GenericRecord getCurrentKey()\n      throws IOException, InterruptedException {\n    return getCurrentRecord();\n  }\n\n  @Override\n  public NullWritable getCurrentValue()\n      throws IOException, InterruptedException {\n    return NullWritable.get();\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroKeyRecordWriter.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\nimport java.io.OutputStream;\n\nimport org.apache.avro.Schema;\nimport org.apache.avro.generic.GenericRecord;\nimport org.apache.avro.file.CodecFactory;\n\nimport org.apache.hadoop.io.NullWritable;\n\n\npublic class PydoopAvroKeyRecordWriter\n    extends PydoopAvroRecordWriterBase<GenericRecord, NullWritable> {\n\n  public PydoopAvroKeyRecordWriter(Schema writerSchema,\n      CodecFactory compressionCodec, OutputStream outputStream)\n      throws IOException {\n    super(writerSchema, compressionCodec, outputStream);\n  }\n\n  @Override\n  public void write(GenericRecord record, NullWritable ignore)\n      throws IOException {\n    mAvroFileWriter.append(record);\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroKeyValueInputFormat.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.avro.generic.GenericRecord;\n\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.RecordReader;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.mapreduce.lib.input.FileInputFormat;\n\n\npublic class PydoopAvroKeyValueInputFormat\n    extends FileInputFormat<GenericRecord, GenericRecord> {\n\n  @Override\n  public RecordReader<GenericRecord, GenericRecord> createRecordReader(\n      InputSplit split, TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    // null readerSchema: the reader will fall back to the writer schema\n    // FIXME: we could add our own property for setting the reader schema\n    // FIXME: no distinction between top-level, key and value schema\n    return new PydoopAvroKeyValueRecordReader(null);\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroKeyValueOutputFormat.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.avro.Schema;\nimport org.apache.avro.generic.GenericRecord;\nimport org.apache.avro.hadoop.io.AvroKeyValue;\n\nimport org.apache.hadoop.mapreduce.RecordWriter;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\n\n\npublic class PydoopAvroKeyValueOutputFormat\n    extends PydoopAvroOutputFormatBase<GenericRecord, GenericRecord> {\n\n  @Override\n  @SuppressWarnings(\"unchecked\")\n  public RecordWriter<GenericRecord, GenericRecord> getRecordWriter(\n      TaskAttemptContext context) throws IOException {\n    Schema keyValueSchema = AvroKeyValue.getSchema(\n        getOutputSchema(context, \"AVRO_KEY_OUTPUT_SCHEMA\"),\n        getOutputSchema(context, \"AVRO_VALUE_OUTPUT_SCHEMA\")\n    );\n    return new PydoopAvroKeyValueRecordWriter(\n        keyValueSchema, getCompressionCodec(context),\n        getAvroFileOutputStream(context)\n    );\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroKeyValueRecordReader.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.avro.Schema;\nimport org.apache.avro.generic.GenericRecord;\n\n\npublic class PydoopAvroKeyValueRecordReader\n    extends PydoopAvroRecordReaderBase<GenericRecord, GenericRecord> {\n\n  public PydoopAvroKeyValueRecordReader(Schema readerSchema) {\n    super(readerSchema);\n  }\n\n  @Override\n  public GenericRecord getCurrentKey()\n      throws IOException, InterruptedException {\n    return (GenericRecord) getCurrentRecord().get(\"key\");\n  }\n\n  @Override\n  public GenericRecord getCurrentValue()\n      throws IOException, InterruptedException {\n    return (GenericRecord) getCurrentRecord().get(\"value\");\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroKeyValueRecordWriter.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\nimport java.io.OutputStream;\n\nimport org.apache.avro.Schema;\nimport org.apache.avro.generic.GenericRecord;\nimport org.apache.avro.generic.GenericData;\nimport org.apache.avro.file.CodecFactory;\nimport org.apache.avro.hadoop.io.AvroKeyValue;\n\n\npublic class PydoopAvroKeyValueRecordWriter\n    extends PydoopAvroRecordWriterBase<GenericRecord, GenericRecord> {\n\n  private Schema keyValueSchema;\n\n  public PydoopAvroKeyValueRecordWriter(Schema writerSchema,\n      CodecFactory compressionCodec, OutputStream outputStream)\n      throws IOException {\n    super(writerSchema, compressionCodec, outputStream);\n    keyValueSchema = writerSchema;\n  }\n\n  @Override\n  public void write(GenericRecord key, GenericRecord value)\n      throws IOException {\n    AvroKeyValue<GenericRecord, GenericRecord> kv\n        = new AvroKeyValue<GenericRecord, GenericRecord>(\n            new GenericData.Record(keyValueSchema));\n    kv.setKey(key);\n    kv.setValue(value);\n    mAvroFileWriter.append(kv.get());\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroOutputBridgeBase.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.mapreduce.OutputFormat;\nimport org.apache.hadoop.mapreduce.JobContext;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.mapreduce.OutputCommitter;\nimport org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;\nimport org.apache.hadoop.util.ReflectionUtils;\nimport org.apache.hadoop.io.Text;\n\n\npublic abstract class PydoopAvroOutputBridgeBase\n    extends OutputFormat<Text, Text> {\n\n  protected OutputFormat actualFormat;\n  protected Class<? extends OutputFormat> defaultActualFormat;\n\n  protected OutputFormat getActualFormat(Configuration conf) {\n    if (actualFormat == null) {\n      actualFormat = ReflectionUtils.newInstance(\n          conf.getClass(\n              Submitter.OUTPUT_FORMAT,\n              defaultActualFormat,\n              OutputFormat.class), conf);\n        }\n    return actualFormat;\n  }\n\n  @Override\n  public void checkOutputSpecs(JobContext context)\n      throws IOException, InterruptedException {\n    Configuration conf = context.getConfiguration();\n    getActualFormat(conf).checkOutputSpecs(context);\n  }\n\n  @Override\n  public OutputCommitter getOutputCommitter(TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    Configuration conf = context.getConfiguration();\n    return getActualFormat(conf).getOutputCommitter(context);\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroOutputFormatBase.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.util.Properties;\nimport java.io.IOException;\n\nimport org.apache.avro.Schema;\nimport org.apache.avro.mapreduce.AvroOutputFormatBase;\n\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\n\n\npublic abstract class PydoopAvroOutputFormatBase<K, V>\n    extends AvroOutputFormatBase<K, V> {\n\n  protected static Schema getOutputSchema(\n      TaskAttemptContext context, String propName) throws IOException {\n    Properties props = Submitter.getPydoopProperties();\n    Configuration conf = context.getConfiguration();\n    String schemaJSON = conf.get(props.getProperty(propName));\n    if (null == schemaJSON) {\n      throw new IOException(\"Avro output requires an output schema\");\n    }\n    return Schema.parse(schemaJSON);\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroOutputKeyBridge.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.mapreduce.RecordWriter;\nimport org.apache.hadoop.io.Text;\n\nimport org.apache.avro.Schema;\n\n\npublic class PydoopAvroOutputKeyBridge extends PydoopAvroOutputBridgeBase {\n\n  public PydoopAvroOutputKeyBridge() {\n    defaultActualFormat = PydoopAvroKeyOutputFormat.class;\n  }\n\n  @Override\n  public RecordWriter<Text, Text>\n      getRecordWriter(TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    return new PydoopAvroBridgeKeyWriter(\n        getActualFormat(context.getConfiguration()).getRecordWriter(context),\n        context\n    );\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroOutputKeyValueBridge.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.mapreduce.RecordWriter;\nimport org.apache.hadoop.io.Text;\n\nimport org.apache.avro.Schema;\n\n\npublic class PydoopAvroOutputKeyValueBridge\n    extends PydoopAvroOutputBridgeBase {\n\n  public PydoopAvroOutputKeyValueBridge() {\n    defaultActualFormat = PydoopAvroKeyValueOutputFormat.class;\n  }\n\n  @Override\n  public RecordWriter<Text, Text>\n      getRecordWriter(TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    return new PydoopAvroBridgeKeyValueWriter(\n        getActualFormat(context.getConfiguration()).getRecordWriter(context),\n        context\n    );\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroOutputValueBridge.java",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.mapreduce.RecordWriter;\nimport org.apache.hadoop.io.Text;\n\nimport org.apache.avro.Schema;\n\n\npublic class PydoopAvroOutputValueBridge extends PydoopAvroOutputBridgeBase {\n\n  public PydoopAvroOutputValueBridge() {\n    defaultActualFormat = PydoopAvroValueOutputFormat.class;\n  }\n\n  @Override\n  public RecordWriter<Text, Text>\n      getRecordWriter(TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    return new PydoopAvroBridgeValueWriter(\n        getActualFormat(context.getConfiguration()).getRecordWriter(context),\n        context\n    );\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroRecordReaderBase.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.avro.Schema;\nimport org.apache.avro.generic.GenericRecord;\nimport org.apache.avro.generic.GenericDatumReader;\nimport org.apache.avro.file.DataFileReader;\nimport org.apache.avro.file.SeekableInput;\nimport org.apache.avro.mapred.FsInput;\n\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.fs.Path;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.RecordReader;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.mapreduce.lib.input.FileSplit;\n\nimport org.slf4j.Logger;\nimport org.slf4j.LoggerFactory;\n\n\npublic abstract class PydoopAvroRecordReaderBase<K, V>\n    extends RecordReader<K, V> {\n\n  private static final Logger LOG = LoggerFactory.getLogger(\n      PydoopAvroRecordReaderBase.class);\n\n  private final Schema mReaderSchema;\n  private GenericRecord mCurrentRecord;\n  private DataFileReader<GenericRecord> mAvroFileReader;\n  private long mStartPosition;\n  private long mEndPosition;\n\n  protected PydoopAvroRecordReaderBase(Schema readerSchema) {\n    mReaderSchema = readerSchema;\n    mCurrentRecord = null;\n  }\n\n  @Override\n  public void initialize(InputSplit inputSplit, TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    if (!(inputSplit instanceof FileSplit)) {\n      throw new IllegalArgumentException(\"Only compatible with FileSplits.\");\n    }\n    FileSplit fileSplit = (FileSplit) inputSplit;\n    SeekableInput seekableFileInput = createSeekableInput(\n        context.getConfiguration(), fileSplit.getPath());\n    mAvroFileReader = new DataFileReader<GenericRecord>(seekableFileInput,\n        new GenericDatumReader<GenericRecord>(mReaderSchema));\n    // We will read the first block that begins after the input split\n    // start; we will read up to but not including the first block\n    // that begins after the input split end.\n    mAvroFileReader.sync(fileSplit.getStart());\n    mStartPosition = mAvroFileReader.previousSync();\n    mEndPosition = fileSplit.getStart() + fileSplit.getLength();\n  }\n\n  @Override\n  public boolean nextKeyValue() throws IOException, InterruptedException {\n    assert null != mAvroFileReader;\n    if (mAvroFileReader.hasNext() && !mAvroFileReader.pastSync(mEndPosition)) {\n      mCurrentRecord = mAvroFileReader.next(mCurrentRecord);\n      return true;\n    }\n    return false;\n  }\n\n  @Override\n  public float getProgress() throws IOException, InterruptedException {\n    assert null != mAvroFileReader;\n    if (mEndPosition == mStartPosition) {\n      return 0.0f;\n    }\n    long bytesRead = mAvroFileReader.previousSync() - mStartPosition;\n    long bytesTotal = mEndPosition - mStartPosition;\n    LOG.debug(\n        \"Progress: bytesRead=\" + bytesRead + \", bytesTotal=\" + bytesTotal);\n    return Math.min(1.0f, (float) bytesRead / (float) bytesTotal);\n  }\n\n  @Override\n  public void close() throws IOException {\n    if (null != mAvroFileReader) {\n      try {\n        mAvroFileReader.close();\n      } finally {\n        mAvroFileReader = null;\n      }\n    }\n  }\n\n  protected GenericRecord getCurrentRecord() {\n    return mCurrentRecord;\n  }\n\n  protected SeekableInput createSeekableInput(Configuration conf, Path path)\n      throws IOException {\n    return new FsInput(path, conf);\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroRecordWriterBase.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\nimport java.io.OutputStream;\n\nimport org.apache.avro.Schema;\nimport org.apache.avro.generic.GenericRecord;\nimport org.apache.avro.generic.GenericDatumWriter;\nimport org.apache.avro.file.CodecFactory;\nimport org.apache.avro.file.DataFileWriter;\n\nimport org.apache.hadoop.mapreduce.RecordWriter;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\n\n\npublic abstract class PydoopAvroRecordWriterBase<K, V>\n    extends RecordWriter<K, V> {\n\n  protected final DataFileWriter<GenericRecord> mAvroFileWriter;\n\n  protected PydoopAvroRecordWriterBase(Schema writerSchema,\n      CodecFactory compressionCodec, OutputStream outputStream)\n      throws IOException {\n    mAvroFileWriter = new DataFileWriter<GenericRecord>(\n        new GenericDatumWriter<GenericRecord>(writerSchema));\n    mAvroFileWriter.setCodec(compressionCodec);\n    mAvroFileWriter.create(writerSchema, outputStream);\n  }\n\n  @Override\n  public void close(TaskAttemptContext context) throws IOException {\n    mAvroFileWriter.close();\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroValueInputFormat.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.avro.generic.GenericRecord;\n\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.RecordReader;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\nimport org.apache.hadoop.mapreduce.lib.input.FileInputFormat;\n\n\npublic class PydoopAvroValueInputFormat\n    extends FileInputFormat<NullWritable, GenericRecord> {\n\n  @Override\n  public RecordReader<NullWritable, GenericRecord> createRecordReader(\n      InputSplit split, TaskAttemptContext context)\n      throws IOException, InterruptedException {\n    // null readerSchema: the reader will fall back to the writer schema\n    // FIXME: we could add our own property for setting the reader schema\n    return new PydoopAvroValueRecordReader(null);\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroValueOutputFormat.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.avro.generic.GenericRecord;\n\nimport org.apache.hadoop.io.NullWritable;\nimport org.apache.hadoop.mapreduce.RecordWriter;\nimport org.apache.hadoop.mapreduce.TaskAttemptContext;\n\n\npublic class PydoopAvroValueOutputFormat\n    extends PydoopAvroOutputFormatBase<NullWritable, GenericRecord> {\n\n  @Override\n  @SuppressWarnings(\"unchecked\")\n  public RecordWriter<NullWritable, GenericRecord> getRecordWriter(\n      TaskAttemptContext context) throws IOException {\n    return new PydoopAvroValueRecordWriter(\n        getOutputSchema(context, \"AVRO_VALUE_OUTPUT_SCHEMA\"),\n        getCompressionCodec(context),\n        getAvroFileOutputStream(context)\n    );\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroValueRecordReader.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\n\nimport org.apache.avro.Schema;\nimport org.apache.avro.generic.GenericRecord;\nimport org.apache.hadoop.io.NullWritable;\n\nimport org.slf4j.Logger;\nimport org.slf4j.LoggerFactory;\n\n\npublic class PydoopAvroValueRecordReader\n    extends PydoopAvroRecordReaderBase<NullWritable, GenericRecord> {\n\n  private static final Logger LOG = LoggerFactory.getLogger(\n      PydoopAvroValueRecordReader.class);\n\n  public PydoopAvroValueRecordReader(Schema readerSchema) {\n    super(readerSchema);\n  }\n\n  @Override\n  public NullWritable getCurrentKey()\n      throws IOException, InterruptedException {\n    return NullWritable.get();\n  }\n\n  @Override\n  public GenericRecord getCurrentValue()\n      throws IOException, InterruptedException {\n    return getCurrentRecord();\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/PydoopAvroValueRecordWriter.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n * implied.  See the License for the specific language governing\n * permissions and limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\nimport java.io.OutputStream;\n\nimport org.apache.avro.Schema;\nimport org.apache.avro.generic.GenericRecord;\nimport org.apache.avro.file.CodecFactory;\n\nimport org.apache.hadoop.io.NullWritable;\n\n\npublic class PydoopAvroValueRecordWriter\n    extends PydoopAvroRecordWriterBase<NullWritable, GenericRecord> {\n\n  public PydoopAvroValueRecordWriter(Schema writerSchema,\n      CodecFactory compressionCodec, OutputStream outputStream)\n      throws IOException {\n    super(writerSchema, compressionCodec, outputStream);\n  }\n\n  @Override\n  public void write(NullWritable ignore, GenericRecord record)\n      throws IOException {\n    mAvroFileWriter.append(record);\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/Submitter.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.util.Properties;\nimport java.io.InputStream;\nimport java.io.IOException;\nimport java.net.URI;\nimport java.net.URISyntaxException;\nimport java.net.URL;\nimport java.net.URLClassLoader;\nimport java.security.AccessController;\nimport java.security.PrivilegedAction;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\nimport org.apache.commons.cli.BasicParser;\nimport org.apache.commons.cli.Parser;\nimport org.apache.commons.cli.CommandLine;\nimport org.apache.commons.cli.Option;\nimport org.apache.commons.cli.OptionBuilder;\nimport org.apache.commons.cli.Options;\nimport org.apache.commons.cli.ParseException;\n\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.conf.Configured;\nimport org.apache.hadoop.util.ExitUtil;\nimport org.apache.hadoop.util.Tool;\nimport org.apache.hadoop.util.GenericOptionsParser;\nimport org.apache.hadoop.io.Text;\nimport org.apache.hadoop.fs.FileSystem;\nimport org.apache.hadoop.fs.Path;\nimport org.apache.hadoop.mapreduce.lib.input.FileInputFormat;\nimport org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;\nimport org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;\nimport org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;\nimport org.apache.hadoop.mapreduce.filecache.DistributedCache;\nimport org.apache.hadoop.mapreduce.MRJobConfig;\nimport org.apache.hadoop.mapreduce.Job;\nimport org.apache.hadoop.mapreduce.InputFormat;\nimport org.apache.hadoop.mapreduce.Mapper;\nimport org.apache.hadoop.mapreduce.OutputFormat;\nimport org.apache.hadoop.mapreduce.Partitioner;\nimport org.apache.hadoop.mapreduce.Reducer;\n\n\n/**\n * A command line parser for the CLI-based Pipes job submitter.\n */\nclass CommandLineParser {\n  private Options options = new Options();\n\n  CommandLineParser() {\n    addOption(\"input\", false, \"input path to the maps\", \"path\");\n    addOption(\"output\", false, \"output path from the reduces\", \"path\");\n    addOption(\"jar\", false, \"job jar file\", \"path\");\n    addOption(\"inputformat\", false, \"java classname of InputFormat\", \"class\");\n    addOption(\"map\", false, \"java classname of Mapper\", \"class\");\n    addOption(\"partitioner\", false, \"java classname of Partitioner\", \"class\");\n    addOption(\"reduce\", false, \"java classname of Reducer\", \"class\");\n    addOption(\"writer\", false, \"java classname of OutputFormat\", \"class\");\n    addOption(\"program\", false, \"URI to application executable\", \"class\");\n    addOption(\"reduces\", false, \"number of reduces\", \"num\");\n    addOption(\"lazyOutput\", false, \"Optional. Create output lazily\", \"boolean\");\n    addOption(\"avroInput\", false, \"avro input mode\", \"boolean\");\n    addOption(\"avroOutput\", false, \"avro output mode\", \"boolean\");\n  }\n\n  void addOption(String longName, boolean required, String description,\n      String paramName) {\n    Option option = OptionBuilder.withArgName(paramName)\n        .hasArgs(1).withDescription(description)\n        .isRequired(required).create(longName);\n    options.addOption(option);\n  }\n\n  void addArgument(String name, boolean required, String description) {\n    Option option = OptionBuilder.withArgName(name)\n        .hasArgs(1).withDescription(description)\n        .isRequired(required).create();\n    options.addOption(option);\n  }\n\n  CommandLine parse(Configuration conf, String[] args)\n      throws IOException, ParseException {\n    Parser parser = new BasicParser();\n    conf.setBoolean(\"mapreduce.client.genericoptionsparser.used\", true);\n    GenericOptionsParser genericParser = new GenericOptionsParser(conf, args);\n    return parser.parse(options, genericParser.getRemainingArgs());\n  }\n\n  void printUsage() {\n    // The CLI package should do this for us, but I can't figure out how\n    // to make it print something reasonable.\n    System.out.println(\"bin/hadoop pipes\");\n    System.out.println(\"  [-input <path>] // Input directory\");\n    System.out.println(\"  [-output <path>] // Output directory\");\n    System.out.println(\"  [-jar <jar file> // jar filename\");\n    System.out.println(\"  [-inputformat <class>] // InputFormat class\");\n    System.out.println(\"  [-map <class>] // Java Map class\");\n    System.out.println(\"  [-partitioner <class>] // Java Partitioner\");\n    System.out.println(\"  [-reduce <class>] // Java Reduce class\");\n    System.out.println(\"  [-writer <class>] // Java RecordWriter\");\n    System.out.println(\"  [-program <executable>] // executable URI\");\n    System.out.println(\"  [-reduces <num>] // number of reduces\");\n    System.out.println(\"  [-lazyOutput <true/false>] // createOutputLazily\");\n    System.out.println(\"  [-avroInput <k/v/kv>] // avro input\");\n    System.out.println(\"  [-avroOutput <k/v/kv>] // avro output\");\n    System.out.println();\n    GenericOptionsParser.printGenericCommandUsage(System.out);\n  }\n}\n\n\npublic class Submitter extends Configured implements Tool {\n\n  public static enum AvroIO {\n    K,   // {Input,Output}Format key type is avro record\n    V,   // {Input,Output}Format value type is avro record\n    KV,  // {Input,Output}Format {key,value} type is avro record\n  }\n\n  protected static final Log LOG = LogFactory.getLog(Submitter.class);\n  protected static final String PROP_FILE = \"pydoop.properties\";\n  protected static AvroIO avroInput;\n  protected static AvroIO avroOutput;\n  protected static boolean explicitInputFormat = false;\n  protected static boolean explicitOutputFormat = false;\n  // --- pydoop properties ---\n  protected static Properties props;\n\n  public static final String PRESERVE_COMMANDFILE =\n      \"mapreduce.pipes.commandfile.preserve\";\n  public static final String EXECUTABLE = \"mapreduce.pipes.executable\";\n  public static final String INTERPRETOR =\n      \"mapreduce.pipes.executable.interpretor\";\n  public static final String IS_JAVA_MAP = \"mapreduce.pipes.isjavamapper\";\n  public static final String IS_JAVA_RR = \"mapreduce.pipes.isjavarecordreader\";\n  public static final String IS_JAVA_RW = \"mapreduce.pipes.isjavarecordwriter\";\n  public static final String IS_JAVA_REDUCE = \"mapreduce.pipes.isjavareducer\";\n  public static final String PARTITIONER = \"mapreduce.pipes.partitioner\";\n  public static final String INPUT_FORMAT = \"mapreduce.pipes.inputformat\";\n  public static final String OUTPUT_FORMAT = \"mapreduce.pipes.outputformat\";\n  public static final String PORT = \"mapreduce.pipes.command.port\";\n\n  public static Properties getPydoopProperties() {\n    Properties properties = new Properties();\n    InputStream stream = Submitter.class.getResourceAsStream(PROP_FILE);\n    try {\n      properties.load(stream);\n      stream.close();\n    } catch (NullPointerException e) {\n      throw new RuntimeException(\"Could not find \" + PROP_FILE);\n    } catch (IOException e) {\n      throw new RuntimeException(\"Could not read \" + PROP_FILE);\n    }\n    return properties;\n  }\n\n  public Submitter() {\n    super();\n    props = getPydoopProperties();\n  }\n\n  public static boolean isLocalFS(Configuration conf) throws IOException {\n    return FileSystem.get(conf).equals(FileSystem.getLocal(conf));\n  }\n\n  /**\n   * Get the URI of the application's executable.\n   * @param conf\n   * @return the URI where the application's executable is located\n   */\n  public static String getExecutable(Configuration conf) {\n    return conf.get(Submitter.EXECUTABLE);\n  }\n\n  /**\n   * Set the URI for the application's executable. Normally this is a hdfs:\n   * location.\n   * @param conf\n   * @param executable The URI of the application's executable.\n   */\n  public static void setExecutable(Configuration conf, String executable) {\n    conf.set(Submitter.EXECUTABLE, executable);\n  }\n\n  /**\n   * Set whether the job is using a Java RecordReader.\n   * @param conf the configuration to modify\n   * @param value the new value\n   */\n  public static void setIsJavaRecordReader(Configuration conf, boolean value) {\n    conf.setBoolean(Submitter.IS_JAVA_RR, value);\n  }\n\n  /**\n   * Check whether the job is using a Java RecordReader\n   * @param conf the configuration to check\n   * @return is it a Java RecordReader?\n   */\n  public static boolean getIsJavaRecordReader(Configuration conf) {\n    return conf.getBoolean(Submitter.IS_JAVA_RR, false);\n  }\n\n  /**\n   * Set whether the Mapper is written in Java.\n   * @param conf the configuration to modify\n   * @param value the new value\n   */\n  public static void setIsJavaMapper(Configuration conf, boolean value) {\n    conf.setBoolean(Submitter.IS_JAVA_MAP, value);\n  }\n\n  /**\n   * Check whether the job is using a Java Mapper.\n   * @param conf the configuration to check\n   * @return is it a Java Mapper?\n   */\n  public static boolean getIsJavaMapper(Configuration conf) {\n    return conf.getBoolean(Submitter.IS_JAVA_MAP, false);\n  }\n\n  /**\n   * Set whether the Reducer is written in Java.\n   * @param conf the configuration to modify\n   * @param value the new value\n   */\n  public static void setIsJavaReducer(Configuration conf, boolean value) {\n    conf.setBoolean(Submitter.IS_JAVA_REDUCE, value);\n  }\n\n  /**\n   * Check whether the job is using a Java Reducer.\n   * @param conf the configuration to check\n   * @return is it a Java Reducer?\n   */\n  public static boolean getIsJavaReducer(Configuration conf) {\n    return conf.getBoolean(Submitter.IS_JAVA_REDUCE, false);\n  }\n\n  /**\n   * Set whether the job will use a Java RecordWriter.\n   * @param conf the configuration to modify\n   * @param value the new value to set\n   */\n  public static void setIsJavaRecordWriter(Configuration conf, boolean value) {\n    conf.setBoolean(Submitter.IS_JAVA_RW, value);\n  }\n\n  /**\n   * Will the reduce use a Java RecordWriter?\n   * @param conf the configuration to check\n   * @return true, if the output of the job will be written by Java\n   */\n  public static boolean getIsJavaRecordWriter(Configuration conf) {\n    return conf.getBoolean(Submitter.IS_JAVA_RW, false);\n  }\n\n  /**\n   * Set the configuration, if it doesn't already have a value for the given\n   * key.\n   * @param conf the configuration to modify\n   * @param key the key to set\n   * @param value the new \"default\" value to set\n   */\n  private static void setIfUnset(Configuration conf, String key, String value) {\n    if (conf.get(key) == null) {\n      conf.set(key, value);\n    }\n  }\n\n  /**\n   * Save away the user's original partitioner before we override it.\n   * @param conf the configuration to modify\n   * @param cls the user's partitioner class\n   */\n  static void setJavaPartitioner(Configuration conf, Class cls) {\n    conf.set(Submitter.PARTITIONER, cls.getName());\n  }\n\n  /**\n   * Get the user's original partitioner.\n   * @param conf the configuration to look in\n   * @return the class that the user submitted\n   */\n  static Class<? extends Partitioner> getJavaPartitioner(Configuration conf) {\n    return conf.getClass(Submitter.PARTITIONER, HashPartitioner.class,\n        Partitioner.class);\n  }\n\n  private static <InterfaceType>\n    Class<? extends InterfaceType> getClass(CommandLine cl, String key,\n        Configuration conf, Class<InterfaceType> cls)\n        throws ClassNotFoundException {\n    return conf.getClassByName(cl.getOptionValue(key)).asSubclass(cls);\n  }\n\n  /**\n   * Does the user want to keep the command file for debugging? If\n   * this is true, pipes will write a copy of the command data to a\n   * file in the task directory named \"downlink.data\", which may be\n   * used to run the C++ program under the debugger. You probably also\n   * want to set Configuration.setKeepFailedTaskFiles(true) to keep\n   * the entire directory from being deleted.  To run using the data\n   * file, set the environment variable \"mapreduce.pipes.commandfile\"\n   * to point to the file.\n   * @param conf the configuration to check\n   * @return will the framework save the command file?\n   */\n  public static boolean getKeepCommandFile(Configuration conf) {\n    return conf.getBoolean(Submitter.PRESERVE_COMMANDFILE, false);\n  }\n\n  /**\n   * Set whether to keep the command file for debugging\n   * @param conf the configuration to modify\n   * @param keep the new value\n   */\n  public static void setKeepCommandFile(Configuration conf, boolean keep) {\n    conf.setBoolean(Submitter.PRESERVE_COMMANDFILE, keep);\n  }\n\n  private static void setupPipesJob(Job job)\n      throws IOException, ClassNotFoundException, URISyntaxException {\n    Configuration conf = job.getConfiguration();\n\n    // -libjars does not work when running on the local FS\n    if (isLocalFS(conf)) {\n      URL[] libjars = GenericOptionsParser.getLibJars(conf);\n      for (URL jarUrl: libjars) {\n        job.addFileToClassPath(new Path(jarUrl.toURI()));\n      }\n    }\n\n    // default map output types to Text\n    if (!getIsJavaMapper(conf)) {\n      job.setMapperClass(PipesMapper.class);\n      // Save the user's partitioner and hook in our's.\n      setJavaPartitioner(conf, job.getPartitionerClass());\n      job.setPartitionerClass(PipesPartitioner.class);\n    }\n    if (!getIsJavaReducer(conf)) {\n      job.setReducerClass(PipesReducer.class);\n      if (!getIsJavaRecordWriter(conf)) {\n        job.setOutputFormatClass(PipesNonJavaOutputFormat.class);\n      }\n    }\n    String textClassname = Text.class.getName();\n    setIfUnset(conf, MRJobConfig.MAP_OUTPUT_KEY_CLASS, textClassname);\n    setIfUnset(conf, MRJobConfig.MAP_OUTPUT_VALUE_CLASS, textClassname);\n    setIfUnset(conf, MRJobConfig.OUTPUT_KEY_CLASS, textClassname);\n    setIfUnset(conf, MRJobConfig.OUTPUT_VALUE_CLASS, textClassname);\n\n    // Use PipesNonJavaInputFormat if necessary to handle progress reporting\n    // from C++ RecordReaders ...\n    if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) {\n      conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(),\n          InputFormat.class);\n      job.setInputFormatClass(PipesNonJavaInputFormat.class);\n    }\n\n    if (avroInput != null) {\n      if (explicitInputFormat) {\n        conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(),\n            InputFormat.class);\n      }  // else let the bridge fall back to the appropriate Avro IF\n      switch (avroInput) {\n      case K:\n        job.setInputFormatClass(PydoopAvroInputKeyBridge.class);\n        break;\n      case V:\n        job.setInputFormatClass(PydoopAvroInputValueBridge.class);\n        break;\n      case KV:\n        job.setInputFormatClass(PydoopAvroInputKeyValueBridge.class);\n        break;\n      default:\n        throw new IllegalArgumentException(\"Bad Avro input type\");\n      }\n    }\n    if (avroOutput != null) {\n      if (explicitOutputFormat) {\n        conf.setClass(Submitter.OUTPUT_FORMAT, job.getOutputFormatClass(),\n            OutputFormat.class);\n      }  // else let the bridge fall back to the appropriate Avro OF\n      conf.set(props.getProperty(\"AVRO_OUTPUT\"), avroOutput.name());\n      switch (avroOutput) {\n      case K:\n        job.setOutputFormatClass(PydoopAvroOutputKeyBridge.class);\n        break;\n      case V:\n        job.setOutputFormatClass(PydoopAvroOutputValueBridge.class);\n        break;\n      case KV:\n        job.setOutputFormatClass(PydoopAvroOutputKeyValueBridge.class);\n        break;\n      default:\n        throw new IllegalArgumentException(\"Bad Avro output type\");\n      }\n    }\n\n    String exec = getExecutable(conf);\n    if (exec == null) {\n      String msg = \"No application program defined.\";\n      throw new IllegalArgumentException(msg);\n    }\n    // add default debug script only when executable is expressed as\n    // <path>#<executable>\n    //FIXME: this is kind of useless if the pipes program is not in c++\n    if (exec.contains(\"#\")) {\n      // set default gdb commands for map and reduce task\n      String defScript =\n          \"$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script\";\n      setIfUnset(conf, MRJobConfig.MAP_DEBUG_SCRIPT,defScript);\n      setIfUnset(conf, MRJobConfig.REDUCE_DEBUG_SCRIPT,defScript);\n    }\n    URI[] fileCache = DistributedCache.getCacheFiles(conf);\n    if (fileCache == null) {\n      fileCache = new URI[1];\n    } else {\n      URI[] tmp = new URI[fileCache.length+1];\n      System.arraycopy(fileCache, 0, tmp, 1, fileCache.length);\n      fileCache = tmp;\n    }\n    try {\n      fileCache[0] = new URI(exec);\n    } catch (URISyntaxException e) {\n      String msg = \"Problem parsing executable URI \" + exec;\n      IOException ie = new IOException(msg);\n      ie.initCause(e);\n      throw ie;\n    }\n    DistributedCache.setCacheFiles(fileCache, conf);\n  }\n\n  public int run(String[] args) throws Exception {\n    CommandLineParser cli = new CommandLineParser();\n    if (args.length == 0) {\n      cli.printUsage();\n      return 1;\n    }\n    try {\n      Job job = new Job(new Configuration());\n      job.setJobName(getClass().getName());\n      Configuration conf = job.getConfiguration();\n      CommandLine results = cli.parse(conf, args);\n      if (results.hasOption(\"input\")) {\n        Path path = new Path(results.getOptionValue(\"input\"));\n        FileInputFormat.setInputPaths(job, path);\n      }\n      if (results.hasOption(\"output\")) {\n        Path path = new Path(results.getOptionValue(\"output\"));\n        FileOutputFormat.setOutputPath(job,path);\n      }\n      if (results.hasOption(\"jar\")) {\n        job.setJar(results.getOptionValue(\"jar\"));\n      }\n      if (results.hasOption(\"inputformat\")) {\n        explicitInputFormat = true;\n        setIsJavaRecordReader(conf, true);\n        job.setInputFormatClass(getClass(results, \"inputformat\", conf,\n            InputFormat.class));\n      }\n      if (results.hasOption(\"javareader\")) {\n        setIsJavaRecordReader(conf, true);\n      }\n      if (results.hasOption(\"map\")) {\n        setIsJavaMapper(conf, true);\n        job.setMapperClass(getClass(results, \"map\", conf, Mapper.class));\n      }\n      if (results.hasOption(\"partitioner\")) {\n        job.setPartitionerClass(getClass(results, \"partitioner\", conf,\n            Partitioner.class));\n      }\n      if (results.hasOption(\"reduce\")) {\n        setIsJavaReducer(conf, true);\n        job.setReducerClass(getClass(results, \"reduce\", conf, Reducer.class));\n      }\n      if (results.hasOption(\"reduces\")) {\n        job.setNumReduceTasks(Integer.parseInt(\n            results.getOptionValue(\"reduces\")));\n      }\n      if (results.hasOption(\"writer\")) {\n        explicitOutputFormat = true;\n        setIsJavaRecordWriter(conf, true);\n        job.setOutputFormatClass(getClass(results, \"writer\", conf,\n            OutputFormat.class));\n      }\n      if (results.hasOption(\"lazyOutput\")) {\n        if (Boolean.parseBoolean(results.getOptionValue(\"lazyOutput\"))) {\n          LazyOutputFormat.setOutputFormatClass(\n              job, job.getOutputFormatClass());\n        }\n      }\n      if (results.hasOption(\"avroInput\")) {\n        avroInput = AvroIO.valueOf(\n            results.getOptionValue(\"avroInput\").toUpperCase());\n      }\n      if (results.hasOption(\"avroOutput\")) {\n        avroOutput = AvroIO.valueOf(\n            results.getOptionValue(\"avroOutput\").toUpperCase());\n      }\n\n      if (results.hasOption(\"program\")) {\n        setExecutable(conf, results.getOptionValue(\"program\"));\n      }\n      // if they gave us a jar file, include it into the class path\n      String jarFile = job.getJar();\n      if (jarFile != null) {\n        final URL[] urls = new URL[] {\n          FileSystem.getLocal(conf).pathToFile(new Path(jarFile)).toURL()\n        };\n        // FindBugs complains that creating a URLClassLoader should be\n        // in a doPrivileged() block.\n        ClassLoader loader = AccessController.doPrivileged(\n            new PrivilegedAction<ClassLoader>() {\n              public ClassLoader run() {return new URLClassLoader(urls);}\n            }\n        );\n        conf.setClassLoader(loader);\n      }\n      setupPipesJob(job);\n      return job.waitForCompletion(true) ? 0 : 1;\n    } catch (ParseException pe) {\n      LOG.info(\"Error : \" + pe);\n      cli.printUsage();\n      return 1;\n    }\n  }\n\n  public static void main(String[] args) throws Exception {\n    int exitCode =  new Submitter().run(args);\n    ExitUtil.terminate(exitCode);\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/TaskLog.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\n\nimport java.io.BufferedOutputStream;\nimport java.io.BufferedReader;\nimport java.io.DataOutputStream;\nimport java.io.File;\nimport java.io.FileInputStream;\nimport java.io.Flushable;\nimport java.io.IOException;\nimport java.io.InputStream;\nimport java.io.InputStreamReader;\nimport java.util.ArrayList;\nimport java.util.Enumeration;\nimport java.util.List;\nimport java.util.concurrent.Executors;\nimport java.util.concurrent.ScheduledExecutorService;\nimport java.util.concurrent.ThreadFactory;\nimport java.util.concurrent.TimeUnit;\n\nimport org.apache.commons.logging.Log;\nimport org.apache.commons.logging.LogFactory;\nimport org.apache.hadoop.classification.InterfaceAudience;\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.fs.FileStatus;\nimport org.apache.hadoop.fs.FileSystem;\nimport org.apache.hadoop.fs.FileUtil;\nimport org.apache.hadoop.fs.LocalFileSystem;\nimport org.apache.hadoop.fs.Path;\nimport org.apache.hadoop.io.IOUtils;\nimport org.apache.hadoop.io.SecureIOUtils;\nimport org.apache.hadoop.mapreduce.JobID;\nimport org.apache.hadoop.mapreduce.MRJobConfig;\nimport org.apache.hadoop.mapreduce.TaskAttemptID;\nimport org.apache.hadoop.mapreduce.TaskID;\nimport org.apache.hadoop.mapreduce.util.ProcessTree;\nimport org.apache.hadoop.util.Shell;\nimport org.apache.hadoop.util.StringUtils;\nimport org.apache.hadoop.util.ShutdownHookManager;\nimport org.apache.hadoop.yarn.conf.YarnConfiguration;\nimport org.apache.log4j.Appender;\nimport org.apache.log4j.LogManager;\nimport org.apache.log4j.Logger;\nimport java.lang.reflect.Field;\n\nimport com.google.common.base.Charsets;\n\n/**\n * A simple logger to handle the task-specific user logs.\n * This class uses the system property <code>hadoop.log.dir</code>.\n *\n */\n@InterfaceAudience.Private\npublic class TaskLog {\n    private static final Log LOG =\n    LogFactory.getLog(TaskLog.class);\n    \n    static final String USERLOGS_DIR_NAME = \"userlogs\";\n    \n    private static final File LOG_DIR =\n    new File(getBaseLogDir(), USERLOGS_DIR_NAME).getAbsoluteFile();\n    \n    // localFS is set in (and used by) writeToIndexFile()\n    static LocalFileSystem localFS = null;\n    \n    private static String getYarnAppContainerLogDir(){\n        try{\n            Field field = YarnConfiguration.class.getField(\"YARN_APP_CONTAINER_LOG_DIR\");\n            if(field!=null)\n                return (String) field.get(null);\n        }catch(Exception e){}\n        return \"yarn.app.container.log.dir\";\n    }\n    \n    public static String getMRv2LogDir() {\n        return System.getProperty(getYarnAppContainerLogDir());\n    }\n    \n    public static File getTaskLogFile(TaskAttemptID taskid, boolean isCleanup,\n                                      LogName filter) {\n        if (getMRv2LogDir() != null) {\n            return new File(getMRv2LogDir(), filter.toString());\n        } else {\n            return new File(getAttemptDir(taskid, isCleanup), filter.toString());\n        }\n    }\n    \n    static File getRealTaskLogFileLocation(TaskAttemptID taskid,\n                                           boolean isCleanup, LogName filter) {\n        LogFileDetail l;\n        try {\n            l = getLogFileDetail(taskid, filter, isCleanup);\n        } catch (IOException ie) {\n            LOG.error(\"getTaskLogFileDetail threw an exception \" + ie);\n            return null;\n        }\n        return new File(l.location, filter.toString());\n    }\n    private static class LogFileDetail {\n        final static String LOCATION = \"LOG_DIR:\";\n        String location;\n        long start;\n        long length;\n    }\n    \n    private static LogFileDetail getLogFileDetail(TaskAttemptID taskid,\n                                                  LogName filter,\n                                                  boolean isCleanup)\n    throws IOException {\n        File indexFile = getIndexFile(taskid, isCleanup);\n        BufferedReader fis = new BufferedReader(new InputStreamReader(\n                                                                      SecureIOUtils.openForRead(indexFile, obtainLogDirOwner(taskid), null),\n                                                                      Charsets.UTF_8));\n        //the format of the index file is\n        //LOG_DIR: <the dir where the task logs are really stored>\n        //stdout:<start-offset in the stdout file> <length>\n        //stderr:<start-offset in the stderr file> <length>\n        //syslog:<start-offset in the syslog file> <length>\n        LogFileDetail l = new LogFileDetail();\n        String str = null;\n        try {\n            str = fis.readLine();\n            if (str == null) { // the file doesn't have anything\n                throw new IOException(\"Index file for the log of \" + taskid\n                                      + \" doesn't exist.\");\n            }\n            l.location = str.substring(str.indexOf(LogFileDetail.LOCATION)\n                                       + LogFileDetail.LOCATION.length());\n            // special cases are the debugout and profile.out files. They are\n            // guaranteed\n            // to be associated with each task attempt since jvm reuse is disabled\n            // when profiling/debugging is enabled\n            if (filter.equals(LogName.DEBUGOUT) || filter.equals(LogName.PROFILE)) {\n                l.length = new File(l.location, filter.toString()).length();\n                l.start = 0;\n                fis.close();\n                return l;\n            }\n            str = fis.readLine();\n            while (str != null) {\n                // look for the exact line containing the logname\n                if (str.contains(filter.toString())) {\n                    str = str.substring(filter.toString().length() + 1);\n                    String[] startAndLen = str.split(\" \");\n                    l.start = Long.parseLong(startAndLen[0]);\n                    l.length = Long.parseLong(startAndLen[1]);\n                    break;\n                }\n                str = fis.readLine();\n            }\n            fis.close();\n            fis = null;\n        } finally {\n            IOUtils.cleanup(LOG, fis);\n        }\n        return l;\n    }\n    \n    private static File getTmpIndexFile(TaskAttemptID taskid, boolean isCleanup) {\n        return new File(getAttemptDir(taskid, isCleanup), \"log.tmp\");\n    }\n    \n    static File getIndexFile(TaskAttemptID taskid, boolean isCleanup) {\n        return new File(getAttemptDir(taskid, isCleanup), \"log.index\");\n    }\n    \n    /**\n     * Obtain the owner of the log dir. This is\n     * determined by checking the job's log directory.\n     */\n    static String obtainLogDirOwner(TaskAttemptID taskid) throws IOException {\n        Configuration conf = new Configuration();\n        FileSystem raw = FileSystem.getLocal(conf).getRaw();\n        Path jobLogDir = new Path(getJobDir(taskid.getJobID()).getAbsolutePath());\n        FileStatus jobStat = raw.getFileStatus(jobLogDir);\n        return jobStat.getOwner();\n    }\n    \n    static String getBaseLogDir() {\n        return System.getProperty(\"hadoop.log.dir\");\n    }\n    \n    static File getAttemptDir(TaskAttemptID taskid, boolean isCleanup) {\n        String cleanupSuffix = isCleanup ? \".cleanup\" : \"\";\n        return new File(getJobDir(taskid.getJobID()), taskid + cleanupSuffix);\n    }\n    private static long prevOutLength;\n    private static long prevErrLength;\n    private static long prevLogLength;\n    \n    private static synchronized\n    void writeToIndexFile(String logLocation,\n                          boolean isCleanup) throws IOException {\n        // To ensure atomicity of updates to index file, write to temporary index\n        // file first and then rename.\n        File tmpIndexFile = getTmpIndexFile(currentTaskid, isCleanup);\n        \n        BufferedOutputStream bos =\n        new BufferedOutputStream(\n                                 SecureIOUtils.createForWrite(tmpIndexFile, 0644));\n        DataOutputStream dos = new DataOutputStream(bos);\n        //the format of the index file is\n        //LOG_DIR: <the dir where the task logs are really stored>\n        //STDOUT: <start-offset in the stdout file> <length>\n        //STDERR: <start-offset in the stderr file> <length>\n        //SYSLOG: <start-offset in the syslog file> <length>\n        try{\n            dos.writeBytes(LogFileDetail.LOCATION + logLocation + \"\\n\"\n                           + LogName.STDOUT.toString() + \":\");\n            dos.writeBytes(Long.toString(prevOutLength) + \" \");\n            dos.writeBytes(Long.toString(new File(logLocation, LogName.STDOUT\n                                                  .toString()).length() - prevOutLength)\n                           + \"\\n\" + LogName.STDERR + \":\");\n            dos.writeBytes(Long.toString(prevErrLength) + \" \");\n            dos.writeBytes(Long.toString(new File(logLocation, LogName.STDERR\n                                                  .toString()).length() - prevErrLength)\n                           + \"\\n\" + LogName.SYSLOG.toString() + \":\");\n            dos.writeBytes(Long.toString(prevLogLength) + \" \");\n            dos.writeBytes(Long.toString(new File(logLocation, LogName.SYSLOG\n                                                  .toString()).length() - prevLogLength)\n                           + \"\\n\");\n            dos.close();\n            dos = null;\n        } finally {\n            IOUtils.cleanup(LOG, dos);\n        }\n        \n        File indexFile = getIndexFile(currentTaskid, isCleanup);\n        Path indexFilePath = new Path(indexFile.getAbsolutePath());\n        Path tmpIndexFilePath = new Path(tmpIndexFile.getAbsolutePath());\n        \n        if (localFS == null) {// set localFS once\n            localFS = FileSystem.getLocal(new Configuration());\n        }\n        localFS.rename (tmpIndexFilePath, indexFilePath);\n    }\n    private static void resetPrevLengths(String logLocation) {\n        prevOutLength = new File(logLocation, LogName.STDOUT.toString()).length();\n        prevErrLength = new File(logLocation, LogName.STDERR.toString()).length();\n        prevLogLength = new File(logLocation, LogName.SYSLOG.toString()).length();\n    }\n    private volatile static TaskAttemptID currentTaskid = null;\n    \n    @SuppressWarnings(\"unchecked\")\n    public synchronized static void syncLogs(String logLocation,\n                                             TaskAttemptID taskid,\n                                             boolean isCleanup)\n    throws IOException {\n        System.out.flush();\n        System.err.flush();\n        Enumeration<Logger> allLoggers = LogManager.getCurrentLoggers();\n        while (allLoggers.hasMoreElements()) {\n            Logger l = allLoggers.nextElement();\n            Enumeration<Appender> allAppenders = l.getAllAppenders();\n            while (allAppenders.hasMoreElements()) {\n                Appender a = allAppenders.nextElement();\n                if (a instanceof TaskLogAppender) {\n                    ((TaskLogAppender)a).flush();\n                }\n            }\n        }\n        if (currentTaskid != taskid) {\n            currentTaskid = taskid;\n            resetPrevLengths(logLocation);\n        }\n        writeToIndexFile(logLocation, isCleanup);\n    }\n    \n    public static synchronized void syncLogsShutdown(\n                                                     ScheduledExecutorService scheduler)\n    {\n        // flush standard streams\n        //\n        System.out.flush();\n        System.err.flush();\n        \n        if (scheduler != null) {\n            scheduler.shutdownNow();\n        }\n        \n        // flush & close all appenders\n        LogManager.shutdown();\n    }\n    \n    @SuppressWarnings(\"unchecked\")\n    public static synchronized void syncLogs() {\n        // flush standard streams\n        //\n        System.out.flush();\n        System.err.flush();\n        \n        // flush flushable appenders\n        //\n        final Logger rootLogger = Logger.getRootLogger();\n        flushAppenders(rootLogger);\n        final Enumeration<Logger> allLoggers = rootLogger.getLoggerRepository().\n        getCurrentLoggers();\n        while (allLoggers.hasMoreElements()) {\n            final Logger l = allLoggers.nextElement();\n            flushAppenders(l);\n        }\n    }\n    \n    @SuppressWarnings(\"unchecked\")\n    private static void flushAppenders(Logger l) {\n        final Enumeration<Appender> allAppenders = l.getAllAppenders();\n        while (allAppenders.hasMoreElements()) {\n            final Appender a = allAppenders.nextElement();\n            if (a instanceof Flushable) {\n                try {\n                    ((Flushable) a).flush();\n                } catch (IOException ioe) {\n                    System.err.println(a + \": Failed to flush!\"\n                                       + StringUtils.stringifyException(ioe));\n                }\n            }\n        }\n    }\n    \n    public static ScheduledExecutorService createLogSyncer() {\n        final ScheduledExecutorService scheduler =\n        Executors.newSingleThreadScheduledExecutor(\n                                                   new ThreadFactory() {\n            @Override\n            public Thread newThread(Runnable r) {\n                final Thread t = Executors.defaultThreadFactory().newThread(r);\n                t.setDaemon(true);\n                t.setName(\"Thread for syncLogs\");\n                return t;\n            }\n        });\n        ShutdownHookManager.get().addShutdownHook(new Runnable() {\n            @Override\n            public void run() {\n                TaskLog.syncLogsShutdown(scheduler);\n            }\n        }, 50);\n        scheduler.scheduleWithFixedDelay(\n                                         new Runnable() {\n            @Override\n            public void run() {\n                TaskLog.syncLogs();\n            }\n        }, 0L, 5L, TimeUnit.SECONDS);\n        return scheduler;\n    }\n    \n    /**\n     * The filter for userlogs.\n     */\n    @InterfaceAudience.Private\n    public static enum LogName {\n        /** Log on the stdout of the task. */\n        STDOUT (\"stdout\"),\n        \n        /** Log on the stderr of the task. */\n        STDERR (\"stderr\"),\n        \n        /** Log on the map-reduce system logs of the task. */\n        SYSLOG (\"syslog\"),\n        \n        /** The java profiler information. */\n        PROFILE (\"profile.out\"),\n        \n        /** Log the debug script's stdout  */\n        DEBUGOUT (\"debugout\");\n        \n        private String prefix;\n        \n        private LogName(String prefix) {\n            this.prefix = prefix;\n        }\n        \n        @Override\n        public String toString() {\n            return prefix;\n        }\n    }\n    \n    public static class Reader extends InputStream {\n        private long bytesRemaining;\n        private FileInputStream file;\n        \n        /**\n         * Read a log file from start to end positions. The offsets may be negative,\n         * in which case they are relative to the end of the file. For example,\n         * Reader(taskid, kind, 0, -1) is the entire file and\n         * Reader(taskid, kind, -4197, -1) is the last 4196 bytes.\n         * @param taskid the id of the task to read the log file for\n         * @param kind the kind of log to read\n         * @param start the offset to read from (negative is relative to tail)\n         * @param end the offset to read upto (negative is relative to tail)\n         * @param isCleanup whether the attempt is cleanup attempt or not\n         * @throws IOException\n         */\n        public Reader(TaskAttemptID taskid, LogName kind,\n                      long start, long end, boolean isCleanup) throws IOException {\n            // find the right log file\n            LogFileDetail fileDetail = getLogFileDetail(taskid, kind, isCleanup);\n            // calculate the start and stop\n            long size = fileDetail.length;\n            if (start < 0) {\n                start += size + 1;\n            }\n            if (end < 0) {\n                end += size + 1;\n            }\n            start = Math.max(0, Math.min(start, size));\n            end = Math.max(0, Math.min(end, size));\n            start += fileDetail.start;\n            end += fileDetail.start;\n            bytesRemaining = end - start;\n            String owner = obtainLogDirOwner(taskid);\n            file = SecureIOUtils.openForRead(new File(fileDetail.location, kind.toString()),\n                                             owner, null);\n            // skip upto start\n            long pos = 0;\n            while (pos < start) {\n                long result = file.skip(start - pos);\n                if (result < 0) {\n                    bytesRemaining = 0;\n                    break;\n                }\n                pos += result;\n            }\n        }\n        \n        @Override\n        public int read() throws IOException {\n            int result = -1;\n            if (bytesRemaining > 0) {\n                bytesRemaining -= 1;\n                result = file.read();\n            }\n            return result;\n        }\n        \n        @Override\n        public int read(byte[] buffer, int offset, int length) throws IOException {\n            length = (int) Math.min(length, bytesRemaining);\n            int bytes = file.read(buffer, offset, length);\n            if (bytes > 0) {\n                bytesRemaining -= bytes;\n            }\n            return bytes;\n        }\n        \n        @Override\n        public int available() throws IOException {\n            return (int) Math.min(bytesRemaining, file.available());\n        }\n        \n        @Override\n        public void close() throws IOException {\n            file.close();\n        }\n    }\n    \n    private static final String bashCommand = \"bash\";\n    private static final String tailCommand = \"tail\";\n    \n    /**\n     * Get the desired maximum length of task's logs.\n     * @param conf the job to look in\n     * @return the number of bytes to cap the log files at\n     */\n    public static long getTaskLogLength(Configuration conf) {\n        return conf.getLong(MRJobConfig.TASK_USERLOG_LIMIT, 0) * 1024;\n    }\n    \n    \n    /**\n     * Wrap a command in a shell to capture stdout and stderr to files.\n     * Setup commands such as setting memory limit can be passed which\n     * will be executed before exec.\n     * If the tailLength is 0, the entire output will be saved.\n     * @param setup The setup commands for the execed process.\n     * @param cmd The command and the arguments that should be run\n     * @param stdoutFilename The filename that stdout should be saved to\n     * @param stderrFilename The filename that stderr should be saved to\n     * @param tailLength The length of the tail to be saved.\n     * @param useSetsid Should setsid be used in the command or not.\n     * @return the modified command that should be run\n     */\n    public static List<String> captureOutAndError(List<String> setup,\n                                                  List<String> cmd,\n                                                  File stdoutFilename,\n                                                  File stderrFilename,\n                                                  long tailLength,\n                                                  boolean useSetsid\n                                                  ) throws IOException {\n        List<String> result = new ArrayList<String>(3);\n        result.add(bashCommand);\n        result.add(\"-c\");\n        String mergedCmd = buildCommandLine(setup, cmd, stdoutFilename,\n                                            stderrFilename, tailLength,\n                                            useSetsid);\n        result.add(mergedCmd);\n        return result;\n    }\n    \n    /**\n     * Construct the command line for running the task JVM\n     * @param setup The setup commands for the execed process.\n     * @param cmd The command and the arguments that should be run\n     * @param stdoutFilename The filename that stdout should be saved to\n     * @param stderrFilename The filename that stderr should be saved to\n     * @param tailLength The length of the tail to be saved.\n     * @return the command line as a String\n     * @throws IOException\n     */\n    static String buildCommandLine(List<String> setup, List<String> cmd,\n                                   File stdoutFilename,\n                                   File stderrFilename,\n                                   long tailLength,\n                                   boolean useSetsid)\n    throws IOException {\n        \n        String stdout = FileUtil.makeShellPath(stdoutFilename);\n        String stderr = FileUtil.makeShellPath(stderrFilename);\n        StringBuffer mergedCmd = new StringBuffer();\n        \n        // Export the pid of taskJvm to env variable JVM_PID.\n        // Currently pid is not used on Windows\n        if (!Shell.WINDOWS) {\n            mergedCmd.append(\" export JVM_PID=`echo $$` ; \");\n        }\n        \n        if (setup != null && setup.size() > 0) {\n            mergedCmd.append(addCommand(setup, false));\n            mergedCmd.append(\";\");\n        }\n        if (tailLength > 0) {\n            mergedCmd.append(\"(\");\n        } else if(ProcessTree.isSetsidAvailable && useSetsid &&\n                  !Shell.WINDOWS) {\n            mergedCmd.append(\"exec setsid \");\n        } else {\n            mergedCmd.append(\"exec \");\n        }\n        mergedCmd.append(addCommand(cmd, true));\n        mergedCmd.append(\" < /dev/null \");\n        if (tailLength > 0) {\n            mergedCmd.append(\" | \");\n            mergedCmd.append(tailCommand);\n            mergedCmd.append(\" -c \");\n            mergedCmd.append(tailLength);\n            mergedCmd.append(\" >> \");\n            mergedCmd.append(stdout);\n            mergedCmd.append(\" ; exit $PIPESTATUS ) 2>&1 | \");\n            mergedCmd.append(tailCommand);\n            mergedCmd.append(\" -c \");\n            mergedCmd.append(tailLength);\n            mergedCmd.append(\" >> \");\n            mergedCmd.append(stderr);\n            mergedCmd.append(\" ; exit $PIPESTATUS\");\n        } else {\n            mergedCmd.append(\" 1>> \");\n            mergedCmd.append(stdout);\n            mergedCmd.append(\" 2>> \");\n            mergedCmd.append(stderr);\n        }\n        return mergedCmd.toString();\n    }\n    \n    /**\n     * Construct the command line for running the debug script\n     * @param cmd The command and the arguments that should be run\n     * @param stdoutFilename The filename that stdout should be saved to\n     * @param stderrFilename The filename that stderr should be saved to\n     * @param tailLength The length of the tail to be saved.\n     * @return the command line as a String\n     * @throws IOException\n     */\n    static String buildDebugScriptCommandLine(List<String> cmd, String debugout)\n    throws IOException {\n        StringBuilder mergedCmd = new StringBuilder();\n        mergedCmd.append(\"exec \");\n        boolean isExecutable = true;\n        for(String s: cmd) {\n            if (isExecutable) {\n                // the executable name needs to be expressed as a shell path for the  \n                // shell to find it.\n                mergedCmd.append(FileUtil.makeShellPath(new File(s)));\n                isExecutable = false; \n            } else {\n                mergedCmd.append(s);\n            }\n            mergedCmd.append(\" \");\n        }\n        mergedCmd.append(\" < /dev/null \");\n        mergedCmd.append(\" >\");\n        mergedCmd.append(debugout);\n        mergedCmd.append(\" 2>&1 \");\n        return mergedCmd.toString();\n    }\n    /**\n     * Add quotes to each of the command strings and\n     * return as a single string \n     * @param cmd The command to be quoted\n     * @param isExecutable makes shell path if the first \n     * argument is executable\n     * @return returns The quoted string. \n     * @throws IOException\n     */\n    public static String addCommand(List<String> cmd, boolean isExecutable) \n    throws IOException {\n        StringBuffer command = new StringBuffer();\n        for(String s: cmd) {\n            command.append('\\'');\n            if (isExecutable) {\n                // the executable name needs to be expressed as a shell path for the  \n                // shell to find it.\n                command.append(FileUtil.makeShellPath(new File(s)));\n                isExecutable = false; \n            } else {\n                command.append(s);\n            }\n            command.append('\\'');\n            command.append(\" \");\n        }\n        return command.toString();\n    }\n    \n    \n    /**\n     * Method to return the location of user log directory.\n     * \n     * @return base log directory\n     */\n    static File getUserLogDir() {\n        if (!LOG_DIR.exists()) {\n            boolean b = LOG_DIR.mkdirs();\n            if (!b) {\n                LOG.debug(\"mkdirs failed. Ignoring.\");\n            }\n        }\n        return LOG_DIR;\n    }\n    \n    /**\n     * Get the user log directory for the job jobid.\n     * \n     * @param jobid\n     * @return user log directory for the job\n     */\n    public static File getJobDir(JobID jobid) {\n        return new File(getUserLogDir(), jobid.toString());\n    }\n    \n} // TaskLog\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/TaskLogAppender.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.Flushable;\nimport java.util.LinkedList;\nimport java.util.Queue;\n\n\nimport org.apache.log4j.FileAppender;\nimport org.apache.log4j.spi.LoggingEvent;\n\nimport org.apache.hadoop.classification.InterfaceStability;\nimport org.apache.hadoop.mapreduce.TaskAttemptID;\n\n\n/**\n * A simple log4j-appender for the task child's \n * map-reduce system logs.\n * \n */\n@InterfaceStability.Unstable\npublic class TaskLogAppender extends FileAppender implements Flushable {\n  private String taskId; //taskId should be managed as String rather than TaskID object\n  //so that log4j can configure it from the configuration(log4j.properties). \n  private Integer maxEvents;\n  private Queue<LoggingEvent> tail = null;\n  private Boolean isCleanup;\n\n  // System properties passed in from JVM runner\n  static final String ISCLEANUP_PROPERTY = \"hadoop.tasklog.iscleanup\";\n  static final String LOGSIZE_PROPERTY = \"hadoop.tasklog.totalLogFileSize\";\n  static final String TASKID_PROPERTY = \"hadoop.tasklog.taskid\";\n\n  @Override\n  public void activateOptions() {\n    synchronized (this) {\n      setOptionsFromSystemProperties();\n\n      if (maxEvents > 0) {\n        tail = new LinkedList<LoggingEvent>();\n      }\n      setFile(TaskLog.getTaskLogFile(TaskAttemptID.forName(taskId),\n          isCleanup, TaskLog.LogName.SYSLOG).toString());\n      setAppend(true);\n      super.activateOptions();\n    }\n  }\n\n  /**\n   * The Task Runner passes in the options as system properties. Set\n   * the options if the setters haven't already been called.\n   */\n  private synchronized void setOptionsFromSystemProperties() {\n    if (isCleanup == null) {\n      String propValue = System.getProperty(ISCLEANUP_PROPERTY, \"false\");\n      isCleanup = Boolean.valueOf(propValue);\n    }\n\n    if (taskId == null) {\n      taskId = System.getProperty(TASKID_PROPERTY);\n    }\n\n    if (maxEvents == null) {\n      String propValue = System.getProperty(LOGSIZE_PROPERTY, \"0\");\n      setTotalLogFileSize(Long.valueOf(propValue));\n    }\n  }\n  \n  @Override\n  public void append(LoggingEvent event) {\n    synchronized (this) {\n      if (tail == null) {\n        super.append(event);\n      } else {\n        if (tail.size() >= maxEvents) {\n          tail.remove();\n        }\n        tail.add(event);\n      }\n    }\n  }\n  \n  @Override\n  public void flush() {\n    if (qw != null) {\n      qw.flush();\n    }\n  }\n\n  @Override\n  public synchronized void close() {\n    if (tail != null) {\n      for(LoggingEvent event: tail) {\n        super.append(event);\n      }\n    }\n    super.close();\n  }\n\n  /**\n   * Getter/Setter methods for log4j.\n   */\n  \n  public synchronized String getTaskId() {\n    return taskId;\n  }\n\n  public synchronized void setTaskId(String taskId) {\n    this.taskId = taskId;\n  }\n\n  private static final int EVENT_SIZE = 100;\n  \n  public synchronized long getTotalLogFileSize() {\n    return maxEvents * EVENT_SIZE;\n  }\n\n  public synchronized void setTotalLogFileSize(long logSize) {\n    maxEvents = (int) logSize / EVENT_SIZE;\n  }\n\n  /**\n   * Set whether the task is a cleanup attempt or not.\n   * \n   * @param isCleanup\n   *          true if the task is cleanup attempt, false otherwise.\n   */\n  public synchronized void setIsCleanup(boolean isCleanup) {\n    this.isCleanup = isCleanup;\n  }\n\n  /**\n   * Get whether task is cleanup attempt or not.\n   * \n   * @return true if the task is cleanup attempt, false otherwise.\n   */\n  public synchronized boolean getIsCleanup() {\n    return isCleanup;\n  }\n}\n"
  },
  {
    "path": "src/it/crs4/pydoop/mapreduce/pipes/UpwardProtocol.java",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\npackage it.crs4.pydoop.mapreduce.pipes;\n\n\nimport java.io.IOException;\nimport org.apache.hadoop.io.Writable;\nimport org.apache.hadoop.io.WritableComparable;\n\n/**\n * The interface for the messages that can come up from the child. All of these\n * calls are asynchronous and return before the message has been processed.\n */\ninterface UpwardProtocol<K extends WritableComparable, V extends Writable> {\n    /**\n     * Output a record from the child.\n     * @param key the record's key\n     * @param value the record's value\n     * @throws IOException\n     */\n    void output(K key, V value) throws IOException, InterruptedException;\n  \n    /**\n     * Map functions where the application has defined a partition function\n     * output records along with their partition.\n     * @param reduce the reduce to send this record to\n     * @param key the record's key\n     * @param value the record's value\n     * @throws IOException\n     */\n    void partitionedOutput(int reduce, K key, \n                           V value) throws IOException, InterruptedException;\n  \n    /**\n     * Update the task's status message\n     * @param msg the string to display to the user\n     * @throws IOException\n     */\n    void status(String msg) throws IOException, InterruptedException;\n  \n    /**\n     * Report making progress (and the current progress)\n     * @param progress the current progress (0.0 to 1.0)\n     * @throws IOException\n     */\n    void progress(float progress) throws IOException, InterruptedException;\n  \n    /**\n     * Report that the application has finished processing all inputs \n     * successfully.\n     * @throws IOException\n     */\n    void done() throws IOException, InterruptedException;\n  \n    /**\n     * Report that the application or more likely communication failed.\n     * @param e\n     */\n    void failed(Throwable e);\n  \n    /**\n     * Register a counter with the given id and group/name.\n     * @param group counter group\n     * @param name counter name\n     * @throws IOException\n     */\n    void registerCounter(int id, String group, String name) throws IOException;\n  \n    /**\n     * Increment the value of a registered counter.\n     * @param id counter id of the registered counter\n     * @param amount increment for the counter value\n     * @throws IOException\n     */\n    void incrementCounter(int id, long amount) throws IOException;\n\n    /**\n     * Handles authentication response from client.\n     * It must notify the threads waiting for authentication response.\n     * @param digest\n     * @return true if authentication is successful\n     * @throws IOException\n     */\n    boolean authenticate(String digest) throws IOException;\n\n}\n"
  },
  {
    "path": "src/libhdfs/common/htable.c",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#include \"common/htable.h\"\n\n#include <errno.h>\n#include <inttypes.h>\n#include <stdlib.h>\n#include <string.h>\n#include <unistd.h>\n\nstruct htable_pair {\n    void *key;\n    void *val;\n};\n\n/**\n * A hash table which uses linear probing.\n */\nstruct htable {\n    uint32_t capacity;\n    uint32_t used;\n    htable_hash_fn_t hash_fun;\n    htable_eq_fn_t eq_fun;\n    struct htable_pair *elem;\n};\n\n/**\n * An internal function for inserting a value into the hash table.\n *\n * Note: this function assumes that you have made enough space in the table.\n *\n * @param nelem         The new element to insert.\n * @param capacity      The capacity of the hash table.\n * @param hash_fun      The hash function to use.\n * @param key           The key to insert.\n * @param val           The value to insert.\n */\nstatic void htable_insert_internal(struct htable_pair *nelem, \n        uint32_t capacity, htable_hash_fn_t hash_fun, void *key,\n        void *val)\n{\n    uint32_t i;\n\n    i = hash_fun(key, capacity);\n    while (1) {\n        if (!nelem[i].key) {\n            nelem[i].key = key;\n            nelem[i].val = val;\n            return;\n        }\n        i++;\n        if (i == capacity) {\n            i = 0;\n        }\n    }\n}\n\nstatic int htable_realloc(struct htable *htable, uint32_t new_capacity)\n{\n    struct htable_pair *nelem;\n    uint32_t i, old_capacity = htable->capacity;\n    htable_hash_fn_t hash_fun = htable->hash_fun;\n\n    nelem = calloc(new_capacity, sizeof(struct htable_pair));\n    if (!nelem) {\n        return ENOMEM;\n    }\n    for (i = 0; i < old_capacity; i++) {\n        struct htable_pair *pair = htable->elem + i;\n        if (pair->key) {\n            htable_insert_internal(nelem, new_capacity, hash_fun,\n                                   pair->key, pair->val);\n        }\n    }\n    free(htable->elem);\n    htable->elem = nelem;\n    htable->capacity = new_capacity;\n    return 0;\n}\n\nstatic uint32_t round_up_to_power_of_2(uint32_t i)\n{\n    if (i == 0) {\n        return 1;\n    }\n    i--;\n    i |= i >> 1;\n    i |= i >> 2;\n    i |= i >> 4;\n    i |= i >> 8;\n    i |= i >> 16;\n    i++;\n    return i;\n}\n\nstruct htable *htable_alloc(uint32_t size,\n                htable_hash_fn_t hash_fun, htable_eq_fn_t eq_fun)\n{\n    struct htable *htable;\n\n    htable = calloc(1, sizeof(*htable));\n    if (!htable) {\n        return NULL;\n    }\n    size = round_up_to_power_of_2(size);\n    if (size < HTABLE_MIN_SIZE) {\n        size = HTABLE_MIN_SIZE;\n    }\n    htable->hash_fun = hash_fun;\n    htable->eq_fun = eq_fun;\n    htable->used = 0;\n    if (htable_realloc(htable, size)) {\n        free(htable);\n        return NULL;\n    }\n    return htable;\n}\n\nvoid htable_visit(struct htable *htable, visitor_fn_t fun, void *ctx)\n{\n    uint32_t i;\n\n    for (i = 0; i != htable->capacity; ++i) {\n        struct htable_pair *elem = htable->elem + i;\n        if (elem->key) {\n            fun(ctx, elem->key, elem->val);\n        }\n    }\n}\n\nvoid htable_free(struct htable *htable)\n{\n    if (htable) {\n        free(htable->elem);\n        free(htable);\n    }\n}\n\nint htable_put(struct htable *htable, void *key, void *val)\n{\n    int ret;\n    uint32_t nused;\n\n    // NULL is not a valid key value.\n    // This helps us implement htable_get_internal efficiently, since we know\n    // that we can stop when we encounter the first NULL key.\n    if (!key) {\n        return EINVAL;\n    }\n    // NULL is not a valid value.  Otherwise the results of htable_get would\n    // be confusing (does a NULL return mean entry not found, or that the\n    // entry was found and was NULL?) \n    if (!val) {\n        return EINVAL;\n    }\n    // Re-hash if we have used more than half of the hash table\n    nused = htable->used + 1;\n    if (nused >= (htable->capacity / 2)) {\n        ret = htable_realloc(htable, htable->capacity * 2);\n        if (ret)\n            return ret;\n    }\n    htable_insert_internal(htable->elem, htable->capacity,\n                                htable->hash_fun, key, val);\n    htable->used++;\n    return 0;\n}\n\nstatic int htable_get_internal(const struct htable *htable,\n                               const void *key, uint32_t *out)\n{\n    uint32_t start_idx, idx;\n\n    start_idx = htable->hash_fun(key, htable->capacity);\n    idx = start_idx;\n    while (1) {\n        struct htable_pair *pair = htable->elem + idx;\n        if (!pair->key) {\n            // We always maintain the invariant that the entries corresponding\n            // to a given key are stored in a contiguous block, not separated\n            // by any NULLs.  So if we encounter a NULL, our search is over.\n            return ENOENT;\n        } else if (htable->eq_fun(pair->key, key)) {\n            *out = idx;\n            return 0;\n        }\n        idx++;\n        if (idx == htable->capacity) {\n            idx = 0;\n        }\n        if (idx == start_idx) {\n            return ENOENT;\n        }\n    }\n}\n\nvoid *htable_get(const struct htable *htable, const void *key)\n{\n    uint32_t idx;\n\n    if (htable_get_internal(htable, key, &idx)) {\n        return NULL;\n    }\n    return htable->elem[idx].val;\n}\n\nvoid htable_pop(struct htable *htable, const void *key,\n                void **found_key, void **found_val)\n{\n    uint32_t hole, i;\n    const void *nkey;\n\n    if (htable_get_internal(htable, key, &hole)) {\n        *found_key = NULL;\n        *found_val = NULL;\n        return;\n    }\n    i = hole;\n    htable->used--;\n    // We need to maintain the compactness invariant used in\n    // htable_get_internal.  This invariant specifies that the entries for any\n    // given key are never separated by NULLs (although they may be separated\n    // by entries for other keys.)\n    while (1) {\n        i++;\n        if (i == htable->capacity) {\n            i = 0;\n        }\n        nkey = htable->elem[i].key;\n        if (!nkey) {\n            *found_key = htable->elem[hole].key;\n            *found_val = htable->elem[hole].val;\n            htable->elem[hole].key = NULL;\n            htable->elem[hole].val = NULL;\n            return;\n        } else if (htable->eq_fun(key, nkey)) {\n            htable->elem[hole].key = htable->elem[i].key;\n            htable->elem[hole].val = htable->elem[i].val;\n            hole = i;\n        }\n    }\n}\n\nuint32_t htable_used(const struct htable *htable)\n{\n    return htable->used;\n}\n\nuint32_t htable_capacity(const struct htable *htable)\n{\n    return htable->capacity;\n}\n\nuint32_t ht_hash_string(const void *str, uint32_t max)\n{\n    const char *s = str;\n    uint32_t hash = 0;\n\n    while (*s) {\n        hash = (hash * 31) + *s;\n        s++;\n    }\n    return hash % max;\n}\n\nint ht_compare_string(const void *a, const void *b)\n{\n    return strcmp(a, b) == 0;\n}\n\n// vim: ts=4:sw=4:tw=79:et\n"
  },
  {
    "path": "src/libhdfs/common/htable.h",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#ifndef HADOOP_CORE_COMMON_HASH_TABLE\n#define HADOOP_CORE_COMMON_HASH_TABLE\n\n#include <inttypes.h>\n#include <stdio.h>\n#include <stdint.h>\n\n#define HTABLE_MIN_SIZE 4\n\nstruct htable;\n\n/**\n * An HTable hash function.\n *\n * @param key       The key.\n * @param capacity  The total capacity.\n *\n * @return          The hash slot.  Must be less than the capacity.\n */\ntypedef uint32_t (*htable_hash_fn_t)(const void *key, uint32_t capacity);\n\n/**\n * An HTable equality function.  Compares two keys.\n *\n * @param a         First key.\n * @param b         Second key.\n *\n * @return          nonzero if the keys are equal.\n */\ntypedef int (*htable_eq_fn_t)(const void *a, const void *b);\n\n/**\n * Allocate a new hash table.\n *\n * @param capacity  The minimum suggested starting capacity.\n * @param hash_fun  The hash function to use in this hash table.\n * @param eq_fun    The equals function to use in this hash table.\n *\n * @return          The new hash table on success; NULL on OOM.\n */\nstruct htable *htable_alloc(uint32_t capacity, htable_hash_fn_t hash_fun,\n                            htable_eq_fn_t eq_fun);\n\ntypedef void (*visitor_fn_t)(void *ctx, void *key, void *val);\n\n/**\n * Visit all of the entries in the hash table.\n *\n * @param htable    The hash table.\n * @param fun       The callback function to invoke on each key and value.\n * @param ctx       Context pointer to pass to the callback.\n */\nvoid htable_visit(struct htable *htable, visitor_fn_t fun, void *ctx);\n\n/**\n * Free the hash table.\n *\n * It is up the calling code to ensure that the keys and values inside the\n * table are de-allocated, if that is necessary.\n *\n * @param htable    The hash table.\n */\nvoid htable_free(struct htable *htable);\n\n/**\n * Add an entry to the hash table.\n *\n * @param htable    The hash table.\n * @param key       The key to add.  This cannot be NULL.\n * @param fun       The value to add.  This cannot be NULL.\n *\n * @return          0 on success;\n *                  EEXIST if the value already exists in the table;\n *                  ENOMEM if there is not enough memory to add the element.\n *                  EFBIG if the hash table has too many entries to fit in 32\n *                      bits.\n */\nint htable_put(struct htable *htable, void *key, void *val);\n\n/**\n * Get an entry from the hash table.\n *\n * @param htable    The hash table.\n * @param key       The key to find.\n *\n * @return          NULL if there is no such entry; the entry otherwise.\n */\nvoid *htable_get(const struct htable *htable, const void *key);\n\n/**\n * Get an entry from the hash table and remove it.\n *\n * @param htable    The hash table.\n * @param key       The key for the entry find and remove.\n * @param found_key (out param) NULL if the entry was not found; the found key\n *                      otherwise.\n * @param found_val (out param) NULL if the entry was not found; the found\n *                      value otherwise.\n */\nvoid htable_pop(struct htable *htable, const void *key,\n                void **found_key, void **found_val);\n\n/**\n * Get the number of entries used in the hash table.\n *\n * @param htable    The hash table.\n *\n * @return          The number of entries used in the hash table.\n */\nuint32_t htable_used(const struct htable *htable);\n\n/**\n * Get the capacity of the hash table.\n *\n * @param htable    The hash table.\n *\n * @return          The capacity of the hash table.\n */\nuint32_t htable_capacity(const struct htable *htable);\n\n/**\n * Hash a string.\n *\n * @param str       The string.\n * @param max       Maximum hash value\n *\n * @return          A number less than max.\n */\nuint32_t ht_hash_string(const void *str, uint32_t max);\n\n/**\n * Compare two strings.\n *\n * @param a         The first string.\n * @param b         The second string.\n *\n * @return          1 if the strings are identical; 0 otherwise.\n */\nint ht_compare_string(const void *a, const void *b);\n\n#endif\n\n// vim: ts=4:sw=4:tw=79:et\n"
  },
  {
    "path": "src/libhdfs/config.h",
    "content": "#ifndef CONFIG_H\n#define CONFIG_H\n#endif\n"
  },
  {
    "path": "src/libhdfs/exception.c",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#include \"exception.h\"\n#include \"hdfs/hdfs.h\"\n#include \"jni_helper.h\"\n#include \"platform.h\"\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#define EXCEPTION_INFO_LEN (sizeof(gExceptionInfo)/sizeof(gExceptionInfo[0]))\n\nstruct ExceptionInfo {\n    const char * const name;\n    int noPrintFlag;\n    int excErrno;\n};\n\nstatic const struct ExceptionInfo gExceptionInfo[] = {\n    {\n        \"java.io.FileNotFoundException\",\n        NOPRINT_EXC_FILE_NOT_FOUND,\n        ENOENT,\n    },\n    {\n        \"org.apache.hadoop.security.AccessControlException\",\n        NOPRINT_EXC_ACCESS_CONTROL,\n        EACCES,\n    },\n    {\n        \"org.apache.hadoop.fs.UnresolvedLinkException\",\n        NOPRINT_EXC_UNRESOLVED_LINK,\n        ENOLINK,\n    },\n    {\n        \"org.apache.hadoop.fs.ParentNotDirectoryException\",\n        NOPRINT_EXC_PARENT_NOT_DIRECTORY,\n        ENOTDIR,\n    },\n    {\n        \"java.lang.IllegalArgumentException\",\n        NOPRINT_EXC_ILLEGAL_ARGUMENT,\n        EINVAL,\n    },\n    {\n        \"java.lang.OutOfMemoryError\",\n        0,\n        ENOMEM,\n    },\n    {\n        \"org.apache.hadoop.hdfs.server.namenode.SafeModeException\",\n        0,\n        EROFS,\n    },\n    {\n        \"org.apache.hadoop.fs.FileAlreadyExistsException\",\n        0,\n        EEXIST,\n    },\n    {\n        \"org.apache.hadoop.hdfs.protocol.QuotaExceededException\",\n        0,\n        EDQUOT,\n    },\n    {\n        \"java.lang.UnsupportedOperationException\",\n        0,\n        ENOTSUP,\n    },\n    {\n        \"org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException\",\n        0,\n        ESTALE,\n    },\n};\n\nvoid getExceptionInfo(const char *excName, int noPrintFlags,\n                      int *excErrno, int *shouldPrint)\n{\n    int i;\n\n    for (i = 0; i < EXCEPTION_INFO_LEN; i++) {\n        if (strstr(gExceptionInfo[i].name, excName)) {\n            break;\n        }\n    }\n    if (i < EXCEPTION_INFO_LEN) {\n        *shouldPrint = !(gExceptionInfo[i].noPrintFlag & noPrintFlags);\n        *excErrno = gExceptionInfo[i].excErrno;\n    } else {\n        *shouldPrint = 1;\n        *excErrno = EINTERNAL;\n    }\n}\n\nint printExceptionAndFreeV(JNIEnv *env, jthrowable exc, int noPrintFlags,\n        const char *fmt, va_list ap)\n{\n    int i, noPrint, excErrno;\n    char *className = NULL;\n    jstring jStr = NULL;\n    jvalue jVal;\n    jthrowable jthr;\n    const char *stackTrace;\n\n    jthr = classNameOfObject(exc, env, &className);\n    if (jthr) {\n        fprintf(stderr, \"PrintExceptionAndFree: error determining class name \"\n            \"of exception.\\n\");\n        className = strdup(\"(unknown)\");\n        destroyLocalReference(env, jthr);\n    }\n    for (i = 0; i < EXCEPTION_INFO_LEN; i++) {\n        if (!strcmp(gExceptionInfo[i].name, className)) {\n            break;\n        }\n    }\n    if (i < EXCEPTION_INFO_LEN) {\n        noPrint = (gExceptionInfo[i].noPrintFlag & noPrintFlags);\n        excErrno = gExceptionInfo[i].excErrno;\n    } else {\n        noPrint = 0;\n        excErrno = EINTERNAL;\n    }\n    if (!noPrint) {\n        vfprintf(stderr, fmt, ap);\n        fprintf(stderr, \" error:\\n\");\n\n        // We don't want to  use ExceptionDescribe here, because that requires a\n        // pending exception.  Instead, use ExceptionUtils.\n        jthr = invokeMethod(env, &jVal, STATIC, NULL, \n            \"org/apache/commons/lang/exception/ExceptionUtils\",\n            \"getStackTrace\", \"(Ljava/lang/Throwable;)Ljava/lang/String;\", exc);\n        if (jthr) {\n            fprintf(stderr, \"(unable to get stack trace for %s exception: \"\n                    \"ExceptionUtils::getStackTrace error.)\\n\", className);\n            destroyLocalReference(env, jthr);\n        } else {\n            jStr = jVal.l;\n            stackTrace = (*env)->GetStringUTFChars(env, jStr, NULL);\n            if (!stackTrace) {\n                fprintf(stderr, \"(unable to get stack trace for %s exception: \"\n                        \"GetStringUTFChars error.)\\n\", className);\n            } else {\n                fprintf(stderr, \"%s\", stackTrace);\n                (*env)->ReleaseStringUTFChars(env, jStr, stackTrace);\n            }\n        }\n    }\n    destroyLocalReference(env, jStr);\n    destroyLocalReference(env, exc);\n    free(className);\n    return excErrno;\n}\n\nint printExceptionAndFree(JNIEnv *env, jthrowable exc, int noPrintFlags,\n        const char *fmt, ...)\n{\n    va_list ap;\n    int ret;\n\n    va_start(ap, fmt);\n    ret = printExceptionAndFreeV(env, exc, noPrintFlags, fmt, ap);\n    va_end(ap);\n    return ret;\n}\n\nint printPendingExceptionAndFree(JNIEnv *env, int noPrintFlags,\n        const char *fmt, ...)\n{\n    va_list ap;\n    int ret;\n    jthrowable exc;\n\n    exc = (*env)->ExceptionOccurred(env);\n    if (!exc) {\n        va_start(ap, fmt);\n        vfprintf(stderr, fmt, ap);\n        va_end(ap);\n        fprintf(stderr, \" error: (no exception)\");\n        ret = 0;\n    } else {\n        (*env)->ExceptionClear(env);\n        va_start(ap, fmt);\n        ret = printExceptionAndFreeV(env, exc, noPrintFlags, fmt, ap);\n        va_end(ap);\n    }\n    return ret;\n}\n\njthrowable getPendingExceptionAndClear(JNIEnv *env)\n{\n    jthrowable jthr = (*env)->ExceptionOccurred(env);\n    if (!jthr)\n        return NULL;\n    (*env)->ExceptionClear(env);\n    return jthr;\n}\n\njthrowable newRuntimeError(JNIEnv *env, const char *fmt, ...)\n{\n    char buf[512];\n    jobject out, exc;\n    jstring jstr;\n    va_list ap;\n\n    va_start(ap, fmt);\n    vsnprintf(buf, sizeof(buf), fmt, ap);\n    va_end(ap);\n    jstr = (*env)->NewStringUTF(env, buf);\n    if (!jstr) {\n        // We got an out of memory exception rather than a RuntimeException.\n        // Too bad...\n        return getPendingExceptionAndClear(env);\n    }\n    exc = constructNewObjectOfClass(env, &out, \"RuntimeException\",\n        \"(java/lang/String;)V\", jstr);\n    (*env)->DeleteLocalRef(env, jstr);\n    // Again, we'll either get an out of memory exception or the\n    // RuntimeException we wanted.\n    return (exc) ? exc : out;\n}\n"
  },
  {
    "path": "src/libhdfs/exception.h",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#ifndef LIBHDFS_EXCEPTION_H\n#define LIBHDFS_EXCEPTION_H\n\n/**\n * Exception handling routines for libhdfs.\n *\n * The convention we follow here is to clear pending exceptions as soon as they\n * are raised.  Never assume that the caller of your function will clean up\n * after you-- do it yourself.  Unhandled exceptions can lead to memory leaks\n * and other undefined behavior.\n *\n * If you encounter an exception, return a local reference to it.  The caller is\n * responsible for freeing the local reference, by calling a function like\n * PrintExceptionAndFree.  (You can also free exceptions directly by calling\n * DeleteLocalRef.  However, that would not produce an error message, so it's\n * usually not what you want.)\n */\n\n#include \"platform.h\"\n\n#include <jni.h>\n#include <stdio.h>\n\n#include <stdlib.h>\n#include <stdarg.h>\n#include <search.h>\n#include <errno.h>\n\n/**\n * Exception noprint flags\n *\n * Theses flags determine which exceptions should NOT be printed to stderr by\n * the exception printing routines.  For example, if you expect to see\n * FileNotFound, you might use NOPRINT_EXC_FILE_NOT_FOUND, to avoid filling the\n * logs with messages about routine events.\n *\n * On the other hand, if you don't expect any failures, you might pass\n * PRINT_EXC_ALL.\n *\n * You can OR these flags together to avoid printing multiple classes of\n * exceptions.\n */\n#define PRINT_EXC_ALL                           0x00\n#define NOPRINT_EXC_FILE_NOT_FOUND              0x01\n#define NOPRINT_EXC_ACCESS_CONTROL              0x02\n#define NOPRINT_EXC_UNRESOLVED_LINK             0x04\n#define NOPRINT_EXC_PARENT_NOT_DIRECTORY        0x08\n#define NOPRINT_EXC_ILLEGAL_ARGUMENT            0x10\n\n/**\n * Get information about an exception.\n *\n * @param excName         The Exception name.\n *                        This is a Java class name in JNI format.\n * @param noPrintFlags    Flags which determine which exceptions we should NOT\n *                        print.\n * @param excErrno        (out param) The POSIX error number associated with the\n *                        exception.\n * @param shouldPrint     (out param) Nonzero if we should print this exception,\n *                        based on the noPrintFlags and its name. \n */\nvoid getExceptionInfo(const char *excName, int noPrintFlags,\n                      int *excErrno, int *shouldPrint);\n\n/**\n * Print out information about an exception and free it.\n *\n * @param env             The JNI environment\n * @param exc             The exception to print and free\n * @param noPrintFlags    Flags which determine which exceptions we should NOT\n *                        print.\n * @param fmt             Printf-style format list\n * @param ap              Printf-style varargs\n *\n * @return                The POSIX error number associated with the exception\n *                        object.\n */\nint printExceptionAndFreeV(JNIEnv *env, jthrowable exc, int noPrintFlags,\n        const char *fmt, va_list ap);\n\n/**\n * Print out information about an exception and free it.\n *\n * @param env             The JNI environment\n * @param exc             The exception to print and free\n * @param noPrintFlags    Flags which determine which exceptions we should NOT\n *                        print.\n * @param fmt             Printf-style format list\n * @param ...             Printf-style varargs\n *\n * @return                The POSIX error number associated with the exception\n *                        object.\n */\nint printExceptionAndFree(JNIEnv *env, jthrowable exc, int noPrintFlags,\n        const char *fmt, ...) TYPE_CHECKED_PRINTF_FORMAT(4, 5);\n\n/**\n * Print out information about the pending exception and free it.\n *\n * @param env             The JNI environment\n * @param noPrintFlags    Flags which determine which exceptions we should NOT\n *                        print.\n * @param fmt             Printf-style format list\n * @param ...             Printf-style varargs\n *\n * @return                The POSIX error number associated with the exception\n *                        object.\n */\nint printPendingExceptionAndFree(JNIEnv *env, int noPrintFlags,\n        const char *fmt, ...) TYPE_CHECKED_PRINTF_FORMAT(3, 4);\n\n/**\n * Get a local reference to the pending exception and clear it.\n *\n * Once it is cleared, the exception will no longer be pending.  The caller will\n * have to decide what to do with the exception object.\n *\n * @param env             The JNI environment\n *\n * @return                The exception, or NULL if there was no exception\n */\njthrowable getPendingExceptionAndClear(JNIEnv *env);\n\n/**\n * Create a new runtime error.\n *\n * This creates (but does not throw) a new RuntimeError.\n *\n * @param env             The JNI environment\n * @param fmt             Printf-style format list\n * @param ...             Printf-style varargs\n *\n * @return                A local reference to a RuntimeError\n */\njthrowable newRuntimeError(JNIEnv *env, const char *fmt, ...)\n        TYPE_CHECKED_PRINTF_FORMAT(2, 3);\n\n#undef TYPE_CHECKED_PRINTF_FORMAT\n#endif\n"
  },
  {
    "path": "src/libhdfs/hdfs.c",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#include \"exception.h\"\n#include \"hdfs/hdfs.h\"\n#include \"jni_helper.h\"\n#include \"platform.h\"\n\n#include <fcntl.h>\n#include <inttypes.h>\n#include <stdio.h>\n#include <string.h>\n\n/* Some frequently used Java paths */\n#define HADOOP_CONF     \"org/apache/hadoop/conf/Configuration\"\n#define HADOOP_PATH     \"org/apache/hadoop/fs/Path\"\n#define HADOOP_LOCALFS  \"org/apache/hadoop/fs/LocalFileSystem\"\n#define HADOOP_FS       \"org/apache/hadoop/fs/FileSystem\"\n#define HADOOP_FSSTATUS \"org/apache/hadoop/fs/FsStatus\"\n#define HADOOP_BLK_LOC  \"org/apache/hadoop/fs/BlockLocation\"\n#define HADOOP_DFS      \"org/apache/hadoop/hdfs/DistributedFileSystem\"\n#define HADOOP_ISTRM    \"org/apache/hadoop/fs/FSDataInputStream\"\n#define HADOOP_OSTRM    \"org/apache/hadoop/fs/FSDataOutputStream\"\n#define HADOOP_STAT     \"org/apache/hadoop/fs/FileStatus\"\n#define HADOOP_FSPERM   \"org/apache/hadoop/fs/permission/FsPermission\"\n#define JAVA_NET_ISA    \"java/net/InetSocketAddress\"\n#define JAVA_NET_URI    \"java/net/URI\"\n#define JAVA_STRING     \"java/lang/String\"\n#define READ_OPTION     \"org/apache/hadoop/fs/ReadOption\"\n\n#define JAVA_VOID       \"V\"\n\n/* Macros for constructing method signatures */\n#define JPARAM(X)           \"L\" X \";\"\n#define JARRPARAM(X)        \"[L\" X \";\"\n#define JMETHOD1(X, R)      \"(\" X \")\" R\n#define JMETHOD2(X, Y, R)   \"(\" X Y \")\" R\n#define JMETHOD3(X, Y, Z, R)   \"(\" X Y Z\")\" R\n\n#define KERBEROS_TICKET_CACHE_PATH \"hadoop.security.kerberos.ticket.cache.path\"\n\n// Bit fields for hdfsFile_internal flags\n#define HDFS_FILE_SUPPORTS_DIRECT_READ (1<<0)\n\ntSize readDirect(hdfsFS fs, hdfsFile f, void* buffer, tSize length);\nstatic void hdfsFreeFileInfoEntry(hdfsFileInfo *hdfsFileInfo);\n\n/**\n * The C equivalent of org.apache.org.hadoop.FSData(Input|Output)Stream .\n */\nenum hdfsStreamType\n{\n    HDFS_STREAM_UNINITIALIZED = 0,\n    HDFS_STREAM_INPUT = 1,\n    HDFS_STREAM_OUTPUT = 2,\n};\n\n/**\n * The 'file-handle' to a file in hdfs.\n */\nstruct hdfsFile_internal {\n    void* file;\n    enum hdfsStreamType type;\n    int flags;\n};\n\n#define HDFS_EXTENDED_FILE_INFO_ENCRYPTED 0x1\n\n/**\n * Extended file information.\n */\nstruct hdfsExtendedFileInfo {\n    int flags;\n};\n\nint hdfsFileIsOpenForRead(hdfsFile file)\n{\n    return (file->type == HDFS_STREAM_INPUT);\n}\n\nint hdfsFileGetReadStatistics(hdfsFile file,\n                              struct hdfsReadStatistics **stats)\n{\n    jthrowable jthr;\n    jobject readStats = NULL;\n    jvalue jVal;\n    struct hdfsReadStatistics *s = NULL;\n    int ret;\n    JNIEnv* env = getJNIEnv();\n\n    if (env == NULL) {\n        errno = EINTERNAL;\n        return -1;\n    }\n    if (file->type != HDFS_STREAM_INPUT) {\n        ret = EINVAL;\n        goto done;\n    }\n    jthr = invokeMethod(env, &jVal, INSTANCE, file->file, \n                  \"org/apache/hadoop/hdfs/client/HdfsDataInputStream\",\n                  \"getReadStatistics\",\n                  \"()Lorg/apache/hadoop/hdfs/DFSInputStream$ReadStatistics;\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsFileGetReadStatistics: getReadStatistics failed\");\n        goto done;\n    }\n    readStats = jVal.l;\n    s = malloc(sizeof(struct hdfsReadStatistics));\n    if (!s) {\n        ret = ENOMEM;\n        goto done;\n    }\n    jthr = invokeMethod(env, &jVal, INSTANCE, readStats,\n                  \"org/apache/hadoop/hdfs/DFSInputStream$ReadStatistics\",\n                  \"getTotalBytesRead\", \"()J\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsFileGetReadStatistics: getTotalBytesRead failed\");\n        goto done;\n    }\n    s->totalBytesRead = jVal.j;\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, readStats,\n                  \"org/apache/hadoop/hdfs/DFSInputStream$ReadStatistics\",\n                  \"getTotalLocalBytesRead\", \"()J\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsFileGetReadStatistics: getTotalLocalBytesRead failed\");\n        goto done;\n    }\n    s->totalLocalBytesRead = jVal.j;\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, readStats,\n                  \"org/apache/hadoop/hdfs/DFSInputStream$ReadStatistics\",\n                  \"getTotalShortCircuitBytesRead\", \"()J\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsFileGetReadStatistics: getTotalShortCircuitBytesRead failed\");\n        goto done;\n    }\n    s->totalShortCircuitBytesRead = jVal.j;\n    jthr = invokeMethod(env, &jVal, INSTANCE, readStats,\n                  \"org/apache/hadoop/hdfs/DFSInputStream$ReadStatistics\",\n                  \"getTotalZeroCopyBytesRead\", \"()J\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsFileGetReadStatistics: getTotalZeroCopyBytesRead failed\");\n        goto done;\n    }\n    s->totalZeroCopyBytesRead = jVal.j;\n    *stats = s;\n    s = NULL;\n    ret = 0;\n\ndone:\n    destroyLocalReference(env, readStats);\n    free(s);\n    if (ret) {\n      errno = ret;\n      return -1;\n    }\n    return 0;\n}\n\nint64_t hdfsReadStatisticsGetRemoteBytesRead(\n                            const struct hdfsReadStatistics *stats)\n{\n    return stats->totalBytesRead - stats->totalLocalBytesRead;\n}\n\nint hdfsFileClearReadStatistics(hdfsFile file)\n{\n    jthrowable jthr;\n    int ret;\n    JNIEnv* env = getJNIEnv();\n\n    if (env == NULL) {\n        errno = EINTERNAL;\n        return EINTERNAL;\n    }\n    if (file->type != HDFS_STREAM_INPUT) {\n        ret = EINVAL;\n        goto done;\n    }\n    jthr = invokeMethod(env, NULL, INSTANCE, file->file,\n                  \"org/apache/hadoop/hdfs/client/HdfsDataInputStream\",\n                  \"clearReadStatistics\", \"()V\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsFileClearReadStatistics: clearReadStatistics failed\");\n        goto done;\n    }\n    ret = 0;\ndone:\n    if (ret) {\n        errno = ret;\n        return ret;\n    }\n    return 0;\n}\n\nvoid hdfsFileFreeReadStatistics(struct hdfsReadStatistics *stats)\n{\n    free(stats);\n}\n\nint hdfsFileIsOpenForWrite(hdfsFile file)\n{\n    return (file->type == HDFS_STREAM_OUTPUT);\n}\n\nint hdfsFileUsesDirectRead(hdfsFile file)\n{\n    return !!(file->flags & HDFS_FILE_SUPPORTS_DIRECT_READ);\n}\n\nvoid hdfsFileDisableDirectRead(hdfsFile file)\n{\n    file->flags &= ~HDFS_FILE_SUPPORTS_DIRECT_READ;\n}\n\nint hdfsDisableDomainSocketSecurity(void)\n{\n    jthrowable jthr;\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n    jthr = invokeMethod(env, NULL, STATIC, NULL,\n            \"org/apache/hadoop/net/unix/DomainSocket\",\n            \"disableBindPathValidation\", \"()V\");\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"DomainSocket#disableBindPathValidation\");\n        return -1;\n    }\n    return 0;\n}\n\n/**\n * hdfsJniEnv: A wrapper struct to be used as 'value'\n * while saving thread -> JNIEnv* mappings\n */\ntypedef struct\n{\n    JNIEnv* env;\n} hdfsJniEnv;\n\n/**\n * Helper function to create a org.apache.hadoop.fs.Path object.\n * @param env: The JNIEnv pointer. \n * @param path: The file-path for which to construct org.apache.hadoop.fs.Path\n * object.\n * @return Returns a jobject on success and NULL on error.\n */\nstatic jthrowable constructNewObjectOfPath(JNIEnv *env, const char *path,\n                                           jobject *out)\n{\n    jthrowable jthr;\n    jstring jPathString;\n    jobject jPath;\n\n    //Construct a java.lang.String object\n    jthr = newJavaStr(env, path, &jPathString);\n    if (jthr)\n        return jthr;\n    //Construct the org.apache.hadoop.fs.Path object\n    jthr = constructNewObjectOfClass(env, &jPath, \"org/apache/hadoop/fs/Path\",\n                                     \"(Ljava/lang/String;)V\", jPathString);\n    destroyLocalReference(env, jPathString);\n    if (jthr)\n        return jthr;\n    *out = jPath;\n    return NULL;\n}\n\nstatic jthrowable hadoopConfGetStr(JNIEnv *env, jobject jConfiguration,\n        const char *key, char **val)\n{\n    jthrowable jthr;\n    jvalue jVal;\n    jstring jkey = NULL, jRet = NULL;\n\n    jthr = newJavaStr(env, key, &jkey);\n    if (jthr)\n        goto done;\n    jthr = invokeMethod(env, &jVal, INSTANCE, jConfiguration,\n            HADOOP_CONF, \"get\", JMETHOD1(JPARAM(JAVA_STRING),\n                                         JPARAM(JAVA_STRING)), jkey);\n    if (jthr)\n        goto done;\n    jRet = jVal.l;\n    jthr = newCStr(env, jRet, val);\ndone:\n    destroyLocalReference(env, jkey);\n    destroyLocalReference(env, jRet);\n    return jthr;\n}\n\nint hdfsConfGetStr(const char *key, char **val)\n{\n    JNIEnv *env;\n    int ret;\n    jthrowable jthr;\n    jobject jConfiguration = NULL;\n\n    env = getJNIEnv();\n    if (env == NULL) {\n        ret = EINTERNAL;\n        goto done;\n    }\n    jthr = constructNewObjectOfClass(env, &jConfiguration, HADOOP_CONF, \"()V\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsConfGetStr(%s): new Configuration\", key);\n        goto done;\n    }\n    jthr = hadoopConfGetStr(env, jConfiguration, key, val);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsConfGetStr(%s): hadoopConfGetStr\", key);\n        goto done;\n    }\n    ret = 0;\ndone:\n    destroyLocalReference(env, jConfiguration);\n    if (ret)\n        errno = ret;\n    return ret;\n}\n\nvoid hdfsConfStrFree(char *val)\n{\n    free(val);\n}\n\nstatic jthrowable hadoopConfGetInt(JNIEnv *env, jobject jConfiguration,\n        const char *key, int32_t *val)\n{\n    jthrowable jthr = NULL;\n    jvalue jVal;\n    jstring jkey = NULL;\n\n    jthr = newJavaStr(env, key, &jkey);\n    if (jthr)\n        return jthr;\n    jthr = invokeMethod(env, &jVal, INSTANCE, jConfiguration,\n            HADOOP_CONF, \"getInt\", JMETHOD2(JPARAM(JAVA_STRING), \"I\", \"I\"),\n            jkey, (jint)(*val));\n    destroyLocalReference(env, jkey);\n    if (jthr)\n        return jthr;\n    *val = jVal.i;\n    return NULL;\n}\n\nint hdfsConfGetInt(const char *key, int32_t *val)\n{\n    JNIEnv *env;\n    int ret;\n    jobject jConfiguration = NULL;\n    jthrowable jthr;\n\n    env = getJNIEnv();\n    if (env == NULL) {\n      ret = EINTERNAL;\n      goto done;\n    }\n    jthr = constructNewObjectOfClass(env, &jConfiguration, HADOOP_CONF, \"()V\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsConfGetInt(%s): new Configuration\", key);\n        goto done;\n    }\n    jthr = hadoopConfGetInt(env, jConfiguration, key, val);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsConfGetInt(%s): hadoopConfGetInt\", key);\n        goto done;\n    }\n    ret = 0;\ndone:\n    destroyLocalReference(env, jConfiguration);\n    if (ret)\n        errno = ret;\n    return ret;\n}\n\nstruct hdfsBuilderConfOpt {\n    struct hdfsBuilderConfOpt *next;\n    const char *key;\n    const char *val;\n};\n\nstruct hdfsBuilder {\n    int forceNewInstance;\n    const char *nn;\n    tPort port;\n    const char *kerbTicketCachePath;\n    const char *userName;\n    struct hdfsBuilderConfOpt *opts;\n};\n\nstruct hdfsBuilder *hdfsNewBuilder(void)\n{\n    struct hdfsBuilder *bld = calloc(1, sizeof(struct hdfsBuilder));\n    if (!bld) {\n        errno = ENOMEM;\n        return NULL;\n    }\n    return bld;\n}\n\nint hdfsBuilderConfSetStr(struct hdfsBuilder *bld, const char *key,\n                          const char *val)\n{\n    struct hdfsBuilderConfOpt *opt, *next;\n    \n    opt = calloc(1, sizeof(struct hdfsBuilderConfOpt));\n    if (!opt)\n        return -ENOMEM;\n    next = bld->opts;\n    bld->opts = opt;\n    opt->next = next;\n    opt->key = key;\n    opt->val = val;\n    return 0;\n}\n\nvoid hdfsFreeBuilder(struct hdfsBuilder *bld)\n{\n    struct hdfsBuilderConfOpt *cur, *next;\n\n    cur = bld->opts;\n    for (cur = bld->opts; cur; ) {\n        next = cur->next;\n        free(cur);\n        cur = next;\n    }\n    free(bld);\n}\n\nvoid hdfsBuilderSetForceNewInstance(struct hdfsBuilder *bld)\n{\n    bld->forceNewInstance = 1;\n}\n\nvoid hdfsBuilderSetNameNode(struct hdfsBuilder *bld, const char *nn)\n{\n    bld->nn = nn;\n}\n\nvoid hdfsBuilderSetNameNodePort(struct hdfsBuilder *bld, tPort port)\n{\n    bld->port = port;\n}\n\nvoid hdfsBuilderSetUserName(struct hdfsBuilder *bld, const char *userName)\n{\n    bld->userName = userName;\n}\n\nvoid hdfsBuilderSetKerbTicketCachePath(struct hdfsBuilder *bld,\n                                       const char *kerbTicketCachePath)\n{\n    bld->kerbTicketCachePath = kerbTicketCachePath;\n}\n\nhdfsFS hdfsConnect(const char *host, tPort port)\n{\n    struct hdfsBuilder *bld = hdfsNewBuilder();\n    if (!bld)\n        return NULL;\n    hdfsBuilderSetNameNode(bld, host);\n    hdfsBuilderSetNameNodePort(bld, port);\n    return hdfsBuilderConnect(bld);\n}\n\n/** Always return a new FileSystem handle */\nhdfsFS hdfsConnectNewInstance(const char *host, tPort port)\n{\n    struct hdfsBuilder *bld = hdfsNewBuilder();\n    if (!bld)\n        return NULL;\n    hdfsBuilderSetNameNode(bld, host);\n    hdfsBuilderSetNameNodePort(bld, port);\n    hdfsBuilderSetForceNewInstance(bld);\n    return hdfsBuilderConnect(bld);\n}\n\nhdfsFS hdfsConnectAsUser(const char *host, tPort port, const char *user)\n{\n    struct hdfsBuilder *bld = hdfsNewBuilder();\n    if (!bld)\n        return NULL;\n    hdfsBuilderSetNameNode(bld, host);\n    hdfsBuilderSetNameNodePort(bld, port);\n    hdfsBuilderSetUserName(bld, user);\n    return hdfsBuilderConnect(bld);\n}\n\n/** Always return a new FileSystem handle */\nhdfsFS hdfsConnectAsUserNewInstance(const char *host, tPort port,\n        const char *user)\n{\n    struct hdfsBuilder *bld = hdfsNewBuilder();\n    if (!bld)\n        return NULL;\n    hdfsBuilderSetNameNode(bld, host);\n    hdfsBuilderSetNameNodePort(bld, port);\n    hdfsBuilderSetForceNewInstance(bld);\n    hdfsBuilderSetUserName(bld, user);\n    return hdfsBuilderConnect(bld);\n}\n\n\n/**\n * Calculate the effective URI to use, given a builder configuration.\n *\n * If there is not already a URI scheme, we prepend 'hdfs://'.\n *\n * If there is not already a port specified, and a port was given to the\n * builder, we suffix that port.  If there is a port specified but also one in\n * the URI, that is an error.\n *\n * @param bld       The hdfs builder object\n * @param uri       (out param) dynamically allocated string representing the\n *                  effective URI\n *\n * @return          0 on success; error code otherwise\n */\nstatic int calcEffectiveURI(struct hdfsBuilder *bld, char ** uri)\n{\n    const char *scheme;\n    char suffix[64];\n    const char *lastColon;\n    char *u;\n    size_t uriLen;\n\n    if (!bld->nn)\n        return EINVAL;\n    scheme = (strstr(bld->nn, \"://\")) ? \"\" : \"hdfs://\";\n    if (bld->port == 0) {\n        suffix[0] = '\\0';\n    } else {\n        lastColon = strrchr(bld->nn, ':');\n        if (lastColon && (strspn(lastColon + 1, \"0123456789\") ==\n                          strlen(lastColon + 1))) {\n            fprintf(stderr, \"port %d was given, but URI '%s' already \"\n                \"contains a port!\\n\", bld->port, bld->nn);\n            return EINVAL;\n        }\n        snprintf(suffix, sizeof(suffix), \":%d\", bld->port);\n    }\n\n    uriLen = strlen(scheme) + strlen(bld->nn) + strlen(suffix);\n    u = malloc((uriLen + 1) * (sizeof(char)));\n    if (!u) {\n        fprintf(stderr, \"calcEffectiveURI: out of memory\");\n        return ENOMEM;\n    }\n    snprintf(u, uriLen + 1, \"%s%s%s\", scheme, bld->nn, suffix);\n    *uri = u;\n    return 0;\n}\n\nstatic const char *maybeNull(const char *str)\n{\n    return str ? str : \"(NULL)\";\n}\n\nstatic const char *hdfsBuilderToStr(const struct hdfsBuilder *bld,\n                                    char *buf, size_t bufLen)\n{\n    snprintf(buf, bufLen, \"forceNewInstance=%d, nn=%s, port=%d, \"\n             \"kerbTicketCachePath=%s, userName=%s\",\n             bld->forceNewInstance, maybeNull(bld->nn), bld->port,\n             maybeNull(bld->kerbTicketCachePath), maybeNull(bld->userName));\n    return buf;\n}\n\nhdfsFS hdfsBuilderConnect(struct hdfsBuilder *bld)\n{\n    JNIEnv *env = 0;\n    jobject jConfiguration = NULL, jFS = NULL, jURI = NULL, jCachePath = NULL;\n    jstring jURIString = NULL, jUserString = NULL;\n    jvalue  jVal;\n    jthrowable jthr = NULL;\n    char *cURI = 0, buf[512];\n    int ret;\n    jobject jRet = NULL;\n    struct hdfsBuilderConfOpt *opt;\n\n    //Get the JNIEnv* corresponding to current thread\n    env = getJNIEnv();\n    if (env == NULL) {\n        ret = EINTERNAL;\n        goto done;\n    }\n\n    //  jConfiguration = new Configuration();\n    jthr = constructNewObjectOfClass(env, &jConfiguration, HADOOP_CONF, \"()V\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsBuilderConnect(%s)\", hdfsBuilderToStr(bld, buf, sizeof(buf)));\n        goto done;\n    }\n    // set configuration values\n    for (opt = bld->opts; opt; opt = opt->next) {\n        jthr = hadoopConfSetStr(env, jConfiguration, opt->key, opt->val);\n        if (jthr) {\n            ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                \"hdfsBuilderConnect(%s): error setting conf '%s' to '%s'\",\n                hdfsBuilderToStr(bld, buf, sizeof(buf)), opt->key, opt->val);\n            goto done;\n        }\n    }\n \n    //Check what type of FileSystem the caller wants...\n    if (bld->nn == NULL) {\n        // Get a local filesystem.\n        if (bld->forceNewInstance) {\n            // fs = FileSytem#newInstanceLocal(conf);\n            jthr = invokeMethod(env, &jVal, STATIC, NULL, HADOOP_FS,\n                    \"newInstanceLocal\", JMETHOD1(JPARAM(HADOOP_CONF),\n                    JPARAM(HADOOP_LOCALFS)), jConfiguration);\n            if (jthr) {\n                ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                    \"hdfsBuilderConnect(%s)\",\n                    hdfsBuilderToStr(bld, buf, sizeof(buf)));\n                goto done;\n            }\n            jFS = jVal.l;\n        } else {\n            // fs = FileSytem#getLocal(conf);\n            jthr = invokeMethod(env, &jVal, STATIC, NULL, HADOOP_FS, \"getLocal\",\n                             JMETHOD1(JPARAM(HADOOP_CONF),\n                                      JPARAM(HADOOP_LOCALFS)),\n                             jConfiguration);\n            if (jthr) {\n                ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                    \"hdfsBuilderConnect(%s)\",\n                    hdfsBuilderToStr(bld, buf, sizeof(buf)));\n                goto done;\n            }\n            jFS = jVal.l;\n        }\n    } else {\n        if (!strcmp(bld->nn, \"default\")) {\n            // jURI = FileSystem.getDefaultUri(conf)\n            jthr = invokeMethod(env, &jVal, STATIC, NULL, HADOOP_FS,\n                          \"getDefaultUri\",\n                          \"(Lorg/apache/hadoop/conf/Configuration;)Ljava/net/URI;\",\n                          jConfiguration);\n            if (jthr) {\n                ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                    \"hdfsBuilderConnect(%s)\",\n                    hdfsBuilderToStr(bld, buf, sizeof(buf)));\n                goto done;\n            }\n            jURI = jVal.l;\n        } else {\n            // fs = FileSystem#get(URI, conf, ugi);\n            ret = calcEffectiveURI(bld, &cURI);\n            if (ret)\n                goto done;\n            jthr = newJavaStr(env, cURI, &jURIString);\n            if (jthr) {\n                ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                    \"hdfsBuilderConnect(%s)\",\n                    hdfsBuilderToStr(bld, buf, sizeof(buf)));\n                goto done;\n            }\n            jthr = invokeMethod(env, &jVal, STATIC, NULL, JAVA_NET_URI,\n                             \"create\", \"(Ljava/lang/String;)Ljava/net/URI;\",\n                             jURIString);\n            if (jthr) {\n                ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                    \"hdfsBuilderConnect(%s)\",\n                    hdfsBuilderToStr(bld, buf, sizeof(buf)));\n                goto done;\n            }\n            jURI = jVal.l;\n        }\n\n        if (bld->kerbTicketCachePath) {\n            jthr = hadoopConfSetStr(env, jConfiguration,\n                KERBEROS_TICKET_CACHE_PATH, bld->kerbTicketCachePath);\n            if (jthr) {\n                ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                    \"hdfsBuilderConnect(%s)\",\n                    hdfsBuilderToStr(bld, buf, sizeof(buf)));\n                goto done;\n            }\n        }\n        jthr = newJavaStr(env, bld->userName, &jUserString);\n        if (jthr) {\n            ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                \"hdfsBuilderConnect(%s)\",\n                hdfsBuilderToStr(bld, buf, sizeof(buf)));\n            goto done;\n        }\n        if (bld->forceNewInstance) {\n            jthr = invokeMethod(env, &jVal, STATIC, NULL, HADOOP_FS,\n                    \"newInstance\", JMETHOD3(JPARAM(JAVA_NET_URI), \n                        JPARAM(HADOOP_CONF), JPARAM(JAVA_STRING),\n                        JPARAM(HADOOP_FS)),\n                    jURI, jConfiguration, jUserString);\n            if (jthr) {\n                ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                    \"hdfsBuilderConnect(%s)\",\n                    hdfsBuilderToStr(bld, buf, sizeof(buf)));\n                goto done;\n            }\n            jFS = jVal.l;\n        } else {\n            jthr = invokeMethod(env, &jVal, STATIC, NULL, HADOOP_FS, \"get\",\n                    JMETHOD3(JPARAM(JAVA_NET_URI), JPARAM(HADOOP_CONF),\n                        JPARAM(JAVA_STRING), JPARAM(HADOOP_FS)),\n                        jURI, jConfiguration, jUserString);\n            if (jthr) {\n                ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                    \"hdfsBuilderConnect(%s)\",\n                    hdfsBuilderToStr(bld, buf, sizeof(buf)));\n                goto done;\n            }\n            jFS = jVal.l;\n        }\n    }\n    jRet = (*env)->NewGlobalRef(env, jFS);\n    if (!jRet) {\n        ret = printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n                    \"hdfsBuilderConnect(%s)\",\n                    hdfsBuilderToStr(bld, buf, sizeof(buf)));\n        goto done;\n    }\n    ret = 0;\n\ndone:\n    // Release unnecessary local references\n    destroyLocalReference(env, jConfiguration);\n    destroyLocalReference(env, jFS);\n    destroyLocalReference(env, jURI);\n    destroyLocalReference(env, jCachePath);\n    destroyLocalReference(env, jURIString);\n    destroyLocalReference(env, jUserString);\n    free(cURI);\n    hdfsFreeBuilder(bld);\n\n    if (ret) {\n        errno = ret;\n        return NULL;\n    }\n    return (hdfsFS)jRet;\n}\n\nint hdfsDisconnect(hdfsFS fs)\n{\n    // JAVA EQUIVALENT:\n    //  fs.close()\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    int ret;\n    jobject jFS;\n    jthrowable jthr;\n\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //Parameters\n    jFS = (jobject)fs;\n\n    //Sanity check\n    if (fs == NULL) {\n        errno = EBADF;\n        return -1;\n    }\n\n    jthr = invokeMethod(env, NULL, INSTANCE, jFS, HADOOP_FS,\n                     \"close\", \"()V\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsDisconnect: FileSystem#close\");\n    } else {\n        ret = 0;\n    }\n    (*env)->DeleteGlobalRef(env, jFS);\n    if (ret) {\n        errno = ret;\n        return -1;\n    }\n    return 0;\n}\n\n/**\n * Get the default block size of a FileSystem object.\n *\n * @param env       The Java env\n * @param jFS       The FileSystem object\n * @param jPath     The path to find the default blocksize at\n * @param out       (out param) the default block size\n *\n * @return          NULL on success; or the exception\n */\nstatic jthrowable getDefaultBlockSize(JNIEnv *env, jobject jFS,\n                                      jobject jPath, jlong *out)\n{\n    jthrowable jthr;\n    jvalue jVal;\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS,\n                 \"getDefaultBlockSize\", JMETHOD1(JPARAM(HADOOP_PATH), \"J\"), jPath);\n    if (jthr)\n        return jthr;\n    *out = jVal.j;\n    return NULL;\n}\n\nhdfsFile hdfsOpenFile(hdfsFS fs, const char *path, int flags, \n                      int bufferSize, short replication, tSize blockSize)\n{\n    /*\n      JAVA EQUIVALENT:\n       File f = new File(path);\n       FSData{Input|Output}Stream f{is|os} = fs.create(f);\n       return f{is|os};\n    */\n    int accmode = flags & O_ACCMODE;\n    jstring jStrBufferSize = NULL, jStrReplication = NULL;\n    jobject jConfiguration = NULL, jPath = NULL, jFile = NULL;\n    jobject jFS = (jobject)fs;\n    jthrowable jthr;\n    jvalue jVal;\n    hdfsFile file = NULL;\n    int ret;\n    jint jBufferSize = bufferSize;\n    jshort jReplication = replication;\n\n    /* The hadoop java api/signature */\n    const char *method = NULL;\n    const char *signature = NULL;\n\n    /* Get the JNIEnv* corresponding to current thread */\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return NULL;\n    }\n\n\n    if (accmode == O_RDONLY || accmode == O_WRONLY) {\n\t/* yay */\n    } else if (accmode == O_RDWR) {\n      fprintf(stderr, \"ERROR: cannot open an hdfs file in O_RDWR mode\\n\");\n      errno = ENOTSUP;\n      return NULL;\n    } else {\n      fprintf(stderr, \"ERROR: cannot open an hdfs file in mode 0x%x\\n\", accmode);\n      errno = EINVAL;\n      return NULL;\n    }\n\n    if ((flags & O_CREAT) && (flags & O_EXCL)) {\n      fprintf(stderr, \"WARN: hdfs does not truly support O_CREATE && O_EXCL\\n\");\n    }\n\n    if (accmode == O_RDONLY) {\n\tmethod = \"open\";\n        signature = JMETHOD2(JPARAM(HADOOP_PATH), \"I\", JPARAM(HADOOP_ISTRM));\n    } else if (flags & O_APPEND) {\n\tmethod = \"append\";\n\tsignature = JMETHOD1(JPARAM(HADOOP_PATH), JPARAM(HADOOP_OSTRM));\n    } else {\n\tmethod = \"create\";\n\tsignature = JMETHOD2(JPARAM(HADOOP_PATH), \"ZISJ\", JPARAM(HADOOP_OSTRM));\n    }\n\n    /* Create an object of org.apache.hadoop.fs.Path */\n    jthr = constructNewObjectOfPath(env, path, &jPath);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsOpenFile(%s): constructNewObjectOfPath\", path);\n        goto done;\n    }\n\n    /* Get the Configuration object from the FileSystem object */\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS,\n                     \"getConf\", JMETHOD1(\"\", JPARAM(HADOOP_CONF)));\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsOpenFile(%s): FileSystem#getConf\", path);\n        goto done;\n    }\n    jConfiguration = jVal.l;\n\n    jStrBufferSize = (*env)->NewStringUTF(env, \"io.file.buffer.size\"); \n    if (!jStrBufferSize) {\n        ret = printPendingExceptionAndFree(env, PRINT_EXC_ALL, \"OOM\");\n        goto done;\n    }\n    jStrReplication = (*env)->NewStringUTF(env, \"dfs.replication\");\n    if (!jStrReplication) {\n        ret = printPendingExceptionAndFree(env, PRINT_EXC_ALL, \"OOM\");\n        goto done;\n    }\n\n    if (!bufferSize) {\n        jthr = invokeMethod(env, &jVal, INSTANCE, jConfiguration, \n                         HADOOP_CONF, \"getInt\", \"(Ljava/lang/String;I)I\",\n                         jStrBufferSize, 4096);\n        if (jthr) {\n            ret = printExceptionAndFree(env, jthr, NOPRINT_EXC_FILE_NOT_FOUND |\n                NOPRINT_EXC_ACCESS_CONTROL | NOPRINT_EXC_UNRESOLVED_LINK,\n                \"hdfsOpenFile(%s): Configuration#getInt(io.file.buffer.size)\",\n                path);\n            goto done;\n        }\n        jBufferSize = jVal.i;\n    }\n\n    if ((accmode == O_WRONLY) && (flags & O_APPEND) == 0) {\n        if (!replication) {\n            jthr = invokeMethod(env, &jVal, INSTANCE, jConfiguration, \n                             HADOOP_CONF, \"getInt\", \"(Ljava/lang/String;I)I\",\n                             jStrReplication, 1);\n            if (jthr) {\n                ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                    \"hdfsOpenFile(%s): Configuration#getInt(dfs.replication)\",\n                    path);\n                goto done;\n            }\n            jReplication = (jshort)jVal.i;\n        }\n    }\n \n    /* Create and return either the FSDataInputStream or\n       FSDataOutputStream references jobject jStream */\n\n    // READ?\n    if (accmode == O_RDONLY) {\n        jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS,\n                       method, signature, jPath, jBufferSize);\n    }  else if ((accmode == O_WRONLY) && (flags & O_APPEND)) {\n        // WRITE/APPEND?\n       jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS,\n                       method, signature, jPath);\n    } else {\n        // WRITE/CREATE\n        jboolean jOverWrite = 1;\n        jlong jBlockSize = blockSize;\n\n        if (jBlockSize == 0) {\n            jthr = getDefaultBlockSize(env, jFS, jPath, &jBlockSize);\n            if (jthr) {\n                ret = EIO;\n                goto done;\n            }\n        }\n        jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS,\n                         method, signature, jPath, jOverWrite,\n                         jBufferSize, jReplication, jBlockSize);\n    }\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsOpenFile(%s): FileSystem#%s(%s)\", path, method, signature);\n        goto done;\n    }\n    jFile = jVal.l;\n\n    file = calloc(1, sizeof(struct hdfsFile_internal));\n    if (!file) {\n        fprintf(stderr, \"hdfsOpenFile(%s): OOM create hdfsFile\\n\", path);\n        ret = ENOMEM;\n        goto done;\n    }\n    file->file = (*env)->NewGlobalRef(env, jFile);\n    if (!file->file) {\n        ret = printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n            \"hdfsOpenFile(%s): NewGlobalRef\", path); \n        goto done;\n    }\n    file->type = (((flags & O_WRONLY) == 0) ? HDFS_STREAM_INPUT :\n        HDFS_STREAM_OUTPUT);\n    file->flags = 0;\n    if ((flags & O_WRONLY) == 0) {\n\tfile->flags |= HDFS_FILE_SUPPORTS_DIRECT_READ;\n    }\n    ret = 0;\n\ndone:\n    destroyLocalReference(env, jStrBufferSize);\n    destroyLocalReference(env, jStrReplication);\n    destroyLocalReference(env, jConfiguration); \n    destroyLocalReference(env, jPath); \n    destroyLocalReference(env, jFile); \n    if (ret) {\n        if (file) {\n            if (file->file) {\n                (*env)->DeleteGlobalRef(env, file->file);\n            }\n            free(file);\n        }\n        errno = ret;\n        return NULL;\n    }\n    return file;\n}\n\nint hdfsTruncateFile(hdfsFS fs, const char* path, tOffset newlength)\n{\n    jobject jFS = (jobject)fs;\n    jthrowable jthr;\n    jvalue jVal;\n    jobject jPath = NULL;\n\n    JNIEnv *env = getJNIEnv();\n\n    if (!env) {\n        errno = EINTERNAL;\n        return -1;\n    }\n\n    /* Create an object of org.apache.hadoop.fs.Path */\n    jthr = constructNewObjectOfPath(env, path, &jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsTruncateFile(%s): constructNewObjectOfPath\", path);\n        return -1;\n    }\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS,\n                        \"truncate\", JMETHOD2(JPARAM(HADOOP_PATH), \"J\", \"Z\"),\n                        jPath, newlength);\n    destroyLocalReference(env, jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsTruncateFile(%s): FileSystem#truncate\", path);\n        return -1;\n    }\n    if (jVal.z == JNI_TRUE) {\n        return 1;\n    }\n    return 0;\n}\n\nint hdfsUnbufferFile(hdfsFile file)\n{\n    int ret;\n    jthrowable jthr;\n    JNIEnv *env = getJNIEnv();\n\n    if (!env) {\n        ret = EINTERNAL;\n        goto done;\n    }\n    if (file->type != HDFS_STREAM_INPUT) {\n        ret = ENOTSUP;\n        goto done;\n    }\n    jthr = invokeMethod(env, NULL, INSTANCE, file->file, HADOOP_ISTRM,\n                     \"unbuffer\", \"()V\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                HADOOP_ISTRM \"#unbuffer failed:\");\n        goto done;\n    }\n    ret = 0;\n\ndone:\n    errno = ret;\n    return ret;\n}\n\nint hdfsCloseFile(hdfsFS fs, hdfsFile file)\n{\n    int ret;\n    // JAVA EQUIVALENT:\n    //  file.close \n\n    //The interface whose 'close' method to be called\n    const char *interface;\n    const char *interfaceShortName;\n\n    //Caught exception\n    jthrowable jthr;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n        errno = EINTERNAL;\n        return -1;\n    }\n\n    //Sanity check\n    if (!file || file->type == HDFS_STREAM_UNINITIALIZED) {\n        errno = EBADF;\n        return -1;\n    }\n\n    interface = (file->type == HDFS_STREAM_INPUT) ?\n        HADOOP_ISTRM : HADOOP_OSTRM;\n  \n    jthr = invokeMethod(env, NULL, INSTANCE, file->file, interface,\n                     \"close\", \"()V\");\n    if (jthr) {\n        interfaceShortName = (file->type == HDFS_STREAM_INPUT) ? \n            \"FSDataInputStream\" : \"FSDataOutputStream\";\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                \"%s#close\", interfaceShortName);\n    } else {\n        ret = 0;\n    }\n\n    //De-allocate memory\n    (*env)->DeleteGlobalRef(env, file->file);\n    free(file);\n\n    if (ret) {\n        errno = ret;\n        return -1;\n    }\n    return 0;\n}\n\nint hdfsExists(hdfsFS fs, const char *path)\n{\n    JNIEnv *env = getJNIEnv();\n    jobject jPath;\n    jvalue  jVal;\n    jobject jFS = (jobject)fs;\n    jthrowable jthr;\n\n    if (env == NULL) {\n        errno = EINTERNAL;\n        return -1;\n    }\n    \n    if (path == NULL) {\n        errno = EINVAL;\n        return -1;\n    }\n    jthr = constructNewObjectOfPath(env, path, &jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsExists: constructNewObjectOfPath\");\n        return -1;\n    }\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS,\n            \"exists\", JMETHOD1(JPARAM(HADOOP_PATH), \"Z\"), jPath);\n    destroyLocalReference(env, jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsExists: invokeMethod(%s)\",\n            JMETHOD1(JPARAM(HADOOP_PATH), \"Z\"));\n        return -1;\n    }\n    if (jVal.z) {\n        return 0;\n    } else {\n        errno = ENOENT;\n        return -1;\n    }\n}\n\n// Checks input file for readiness for reading.\nstatic int readPrepare(JNIEnv* env, hdfsFS fs, hdfsFile f,\n                       jobject* jInputStream)\n{\n    *jInputStream = (jobject)(f ? f->file : NULL);\n\n    //Sanity check\n    if (!f || f->type == HDFS_STREAM_UNINITIALIZED) {\n      errno = EBADF;\n      return -1;\n    }\n\n    //Error checking... make sure that this file is 'readable'\n    if (f->type != HDFS_STREAM_INPUT) {\n      fprintf(stderr, \"Cannot read from a non-InputStream object!\\n\");\n      errno = EINVAL;\n      return -1;\n    }\n\n    return 0;\n}\n\ntSize hdfsRead(hdfsFS fs, hdfsFile f, void* buffer, tSize length)\n{\n    jobject jInputStream;\n    jbyteArray jbRarray;\n    jint noReadBytes = length;\n    jvalue jVal;\n    jthrowable jthr;\n    JNIEnv* env;\n    tSize ret;\n\n    if (length == 0) {\n        return 0;\n    } else if (length < 0) {\n        errno = EINVAL;\n        return -1;\n    }\n    if (f->flags & HDFS_FILE_SUPPORTS_DIRECT_READ) {\n      if ((ret = readDirect(fs, f, buffer, length)) < 0) {\n\t  if (errno != ENOTSUP) {\n\t      return -1;\n\t  }\n\t  hdfsFileDisableDirectRead(f);\n      } else {\n\t  return ret;\n      }\n    }\n\n    // JAVA EQUIVALENT:\n    //  byte [] bR = new byte[length];\n    //  fis.read(bR);\n\n    //Get the JNIEnv* corresponding to current thread\n    env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //Parameters\n    if (readPrepare(env, fs, f, &jInputStream) == -1) {\n      return -1;\n    }\n\n    //Read the requisite bytes\n    jbRarray = (*env)->NewByteArray(env, length);\n    if (!jbRarray) {\n        errno = printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n            \"hdfsRead: NewByteArray\");\n        return -1;\n    }\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, jInputStream, HADOOP_ISTRM,\n                               \"read\", \"([B)I\", jbRarray);\n    if (jthr) {\n        destroyLocalReference(env, jbRarray);\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsRead: FSDataInputStream#read\");\n        return -1;\n    }\n    if (jVal.i < 0) {\n        // EOF\n        destroyLocalReference(env, jbRarray);\n        return 0;\n    } else if (jVal.i == 0) {\n        destroyLocalReference(env, jbRarray);\n        errno = EINTR;\n        return -1;\n    }\n    (*env)->GetByteArrayRegion(env, jbRarray, 0, noReadBytes, buffer);\n    destroyLocalReference(env, jbRarray);\n    if ((*env)->ExceptionCheck(env)) {\n        errno = printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n            \"hdfsRead: GetByteArrayRegion\");\n        return -1;\n    }\n    return jVal.i;\n}\n\n// Reads using the read(ByteBuffer) API, which does fewer copies\ntSize readDirect(hdfsFS fs, hdfsFile f, void* buffer, tSize length)\n{\n    // JAVA EQUIVALENT:\n    //  ByteBuffer bbuffer = ByteBuffer.allocateDirect(length) // wraps C buffer\n    //  fis.read(bbuffer);\n\n    jobject jInputStream;\n    jvalue jVal;\n    jthrowable jthr;\n    jobject bb;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    if (readPrepare(env, fs, f, &jInputStream) == -1) {\n      return -1;\n    }\n\n    //Read the requisite bytes\n    bb = (*env)->NewDirectByteBuffer(env, buffer, length);\n    if (bb == NULL) {\n        errno = printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n            \"readDirect: NewDirectByteBuffer\");\n        return -1;\n    }\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, jInputStream,\n        HADOOP_ISTRM, \"read\", \"(Ljava/nio/ByteBuffer;)I\", bb);\n    destroyLocalReference(env, bb);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"readDirect: FSDataInputStream#read\");\n        return -1;\n    }\n    return (jVal.i < 0) ? 0 : jVal.i;\n}\n\ntSize hdfsPread(hdfsFS fs, hdfsFile f, tOffset position,\n                void* buffer, tSize length)\n{\n    JNIEnv* env;\n    jbyteArray jbRarray;\n    jvalue jVal;\n    jthrowable jthr;\n\n    if (length == 0) {\n        return 0;\n    } else if (length < 0) {\n        errno = EINVAL;\n        return -1;\n    }\n    if (!f || f->type == HDFS_STREAM_UNINITIALIZED) {\n        errno = EBADF;\n        return -1;\n    }\n\n    env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //Error checking... make sure that this file is 'readable'\n    if (f->type != HDFS_STREAM_INPUT) {\n        fprintf(stderr, \"Cannot read from a non-InputStream object!\\n\");\n        errno = EINVAL;\n        return -1;\n    }\n\n    // JAVA EQUIVALENT:\n    //  byte [] bR = new byte[length];\n    //  fis.read(pos, bR, 0, length);\n    jbRarray = (*env)->NewByteArray(env, length);\n    if (!jbRarray) {\n        errno = printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n            \"hdfsPread: NewByteArray\");\n        return -1;\n    }\n    jthr = invokeMethod(env, &jVal, INSTANCE, f->file, HADOOP_ISTRM,\n                     \"read\", \"(J[BII)I\", position, jbRarray, 0, length);\n    if (jthr) {\n        destroyLocalReference(env, jbRarray);\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsPread: FSDataInputStream#read\");\n        return -1;\n    }\n    if (jVal.i < 0) {\n        // EOF\n        destroyLocalReference(env, jbRarray);\n        return 0;\n    } else if (jVal.i == 0) {\n        destroyLocalReference(env, jbRarray);\n        errno = EINTR;\n        return -1;\n    }\n    (*env)->GetByteArrayRegion(env, jbRarray, 0, jVal.i, buffer);\n    destroyLocalReference(env, jbRarray);\n    if ((*env)->ExceptionCheck(env)) {\n        errno = printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n            \"hdfsPread: GetByteArrayRegion\");\n        return -1;\n    }\n    return jVal.i;\n}\n\ntSize hdfsWrite(hdfsFS fs, hdfsFile f, const void* buffer, tSize length)\n{\n    // JAVA EQUIVALENT\n    // byte b[] = str.getBytes();\n    // fso.write(b);\n\n    jobject jOutputStream;\n    jbyteArray jbWarray;\n    jthrowable jthr;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //Sanity check\n    if (!f || f->type == HDFS_STREAM_UNINITIALIZED) {\n        errno = EBADF;\n        return -1;\n    }\n\n    jOutputStream = f->file;\n    \n    if (length < 0) {\n    \terrno = EINVAL;\n    \treturn -1;\n    }\n\n    //Error checking... make sure that this file is 'writable'\n    if (f->type != HDFS_STREAM_OUTPUT) {\n        fprintf(stderr, \"Cannot write into a non-OutputStream object!\\n\");\n        errno = EINVAL;\n        return -1;\n    }\n\n    if (length < 0) {\n        errno = EINVAL;\n        return -1;\n    }\n    if (length == 0) {\n        return 0;\n    }\n    //Write the requisite bytes into the file\n    jbWarray = (*env)->NewByteArray(env, length);\n    if (!jbWarray) {\n        errno = printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n            \"hdfsWrite: NewByteArray\");\n        return -1;\n    }\n    (*env)->SetByteArrayRegion(env, jbWarray, 0, length, buffer);\n    if ((*env)->ExceptionCheck(env)) {\n        destroyLocalReference(env, jbWarray);\n        errno = printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n            \"hdfsWrite(length = %d): SetByteArrayRegion\", length);\n        return -1;\n    }\n    jthr = invokeMethod(env, NULL, INSTANCE, jOutputStream,\n            HADOOP_OSTRM, \"write\", \"([B)V\", jbWarray);\n    destroyLocalReference(env, jbWarray);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsWrite: FSDataOutputStream#write\");\n        return -1;\n    }\n    // Unlike most Java streams, FSDataOutputStream never does partial writes.\n    // If we succeeded, all the data was written.\n    return length;\n}\n\nint hdfsSeek(hdfsFS fs, hdfsFile f, tOffset desiredPos) \n{\n    // JAVA EQUIVALENT\n    //  fis.seek(pos);\n\n    jobject jInputStream;\n    jthrowable jthr;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //Sanity check\n    if (!f || f->type != HDFS_STREAM_INPUT) {\n        errno = EBADF;\n        return -1;\n    }\n\n    jInputStream = f->file;\n    jthr = invokeMethod(env, NULL, INSTANCE, jInputStream,\n            HADOOP_ISTRM, \"seek\", \"(J)V\", desiredPos);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsSeek(desiredPos=%\" PRId64 \")\"\n            \": FSDataInputStream#seek\", desiredPos);\n        return -1;\n    }\n    return 0;\n}\n\n\n\ntOffset hdfsTell(hdfsFS fs, hdfsFile f)\n{\n    // JAVA EQUIVALENT\n    //  pos = f.getPos();\n\n    jobject jStream;\n    const char *interface;\n    jvalue jVal;\n    jthrowable jthr;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //Sanity check\n    if (!f || f->type == HDFS_STREAM_UNINITIALIZED) {\n        errno = EBADF;\n        return -1;\n    }\n\n    //Parameters\n    jStream = f->file;\n    interface = (f->type == HDFS_STREAM_INPUT) ?\n        HADOOP_ISTRM : HADOOP_OSTRM;\n    jthr = invokeMethod(env, &jVal, INSTANCE, jStream,\n                     interface, \"getPos\", \"()J\");\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsTell: %s#getPos\",\n            ((f->type == HDFS_STREAM_INPUT) ? \"FSDataInputStream\" :\n                                 \"FSDataOutputStream\"));\n        return -1;\n    }\n    return jVal.j;\n}\n\nint hdfsFlush(hdfsFS fs, hdfsFile f) \n{\n    // JAVA EQUIVALENT\n    //  fos.flush();\n\n    jthrowable jthr;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //Sanity check\n    if (!f || f->type != HDFS_STREAM_OUTPUT) {\n        errno = EBADF;\n        return -1;\n    }\n    jthr = invokeMethod(env, NULL, INSTANCE, f->file,\n                     HADOOP_OSTRM, \"flush\", \"()V\");\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsFlush: FSDataInputStream#flush\");\n        return -1;\n    }\n    return 0;\n}\n\nint hdfsHFlush(hdfsFS fs, hdfsFile f)\n{\n    jobject jOutputStream;\n    jthrowable jthr;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //Sanity check\n    if (!f || f->type != HDFS_STREAM_OUTPUT) {\n        errno = EBADF;\n        return -1;\n    }\n\n    jOutputStream = f->file;\n    jthr = invokeMethod(env, NULL, INSTANCE, jOutputStream,\n                     HADOOP_OSTRM, \"hflush\", \"()V\");\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsHFlush: FSDataOutputStream#hflush\");\n        return -1;\n    }\n    return 0;\n}\n\nint hdfsHSync(hdfsFS fs, hdfsFile f)\n{\n    jobject jOutputStream;\n    jthrowable jthr;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //Sanity check\n    if (!f || f->type != HDFS_STREAM_OUTPUT) {\n        errno = EBADF;\n        return -1;\n    }\n\n    jOutputStream = f->file;\n    jthr = invokeMethod(env, NULL, INSTANCE, jOutputStream,\n                     HADOOP_OSTRM, \"hsync\", \"()V\");\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsHSync: FSDataOutputStream#hsync\");\n        return -1;\n    }\n    return 0;\n}\n\nint hdfsAvailable(hdfsFS fs, hdfsFile f)\n{\n    // JAVA EQUIVALENT\n    //  fis.available();\n\n    jobject jInputStream;\n    jvalue jVal;\n    jthrowable jthr;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //Sanity check\n    if (!f || f->type != HDFS_STREAM_INPUT) {\n        errno = EBADF;\n        return -1;\n    }\n\n    //Parameters\n    jInputStream = f->file;\n    jthr = invokeMethod(env, &jVal, INSTANCE, jInputStream,\n                     HADOOP_ISTRM, \"available\", \"()I\");\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsAvailable: FSDataInputStream#available\");\n        return -1;\n    }\n    return jVal.i;\n}\n\nstatic int hdfsCopyImpl(hdfsFS srcFS, const char *src, hdfsFS dstFS,\n        const char *dst, jboolean deleteSource)\n{\n    //JAVA EQUIVALENT\n    //  FileUtil#copy(srcFS, srcPath, dstFS, dstPath,\n    //                 deleteSource = false, conf)\n\n    //Parameters\n    jobject jSrcFS = (jobject)srcFS;\n    jobject jDstFS = (jobject)dstFS;\n    jobject jConfiguration = NULL, jSrcPath = NULL, jDstPath = NULL;\n    jthrowable jthr;\n    jvalue jVal;\n    int ret;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    jthr = constructNewObjectOfPath(env, src, &jSrcPath);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsCopyImpl(src=%s): constructNewObjectOfPath\", src);\n        goto done;\n    }\n    jthr = constructNewObjectOfPath(env, dst, &jDstPath);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsCopyImpl(dst=%s): constructNewObjectOfPath\", dst);\n        goto done;\n    }\n\n    //Create the org.apache.hadoop.conf.Configuration object\n    jthr = constructNewObjectOfClass(env, &jConfiguration,\n                                     HADOOP_CONF, \"()V\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsCopyImpl: Configuration constructor\");\n        goto done;\n    }\n\n    //FileUtil#copy\n    jthr = invokeMethod(env, &jVal, STATIC,\n            NULL, \"org/apache/hadoop/fs/FileUtil\", \"copy\",\n            \"(Lorg/apache/hadoop/fs/FileSystem;Lorg/apache/hadoop/fs/Path;\"\n            \"Lorg/apache/hadoop/fs/FileSystem;Lorg/apache/hadoop/fs/Path;\"\n            \"ZLorg/apache/hadoop/conf/Configuration;)Z\",\n            jSrcFS, jSrcPath, jDstFS, jDstPath, deleteSource, \n            jConfiguration);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsCopyImpl(src=%s, dst=%s, deleteSource=%d): \"\n            \"FileUtil#copy\", src, dst, deleteSource);\n        goto done;\n    }\n    if (!jVal.z) {\n        ret = EIO;\n        goto done;\n    }\n    ret = 0;\n\ndone:\n    destroyLocalReference(env, jConfiguration);\n    destroyLocalReference(env, jSrcPath);\n    destroyLocalReference(env, jDstPath);\n  \n    if (ret) {\n        errno = ret;\n        return -1;\n    }\n    return 0;\n}\n\nint hdfsCopy(hdfsFS srcFS, const char *src, hdfsFS dstFS, const char *dst)\n{\n    return hdfsCopyImpl(srcFS, src, dstFS, dst, 0);\n}\n\nint hdfsMove(hdfsFS srcFS, const char *src, hdfsFS dstFS, const char *dst)\n{\n    return hdfsCopyImpl(srcFS, src, dstFS, dst, 1);\n}\n\nint hdfsDelete(hdfsFS fs, const char *path, int recursive)\n{\n    // JAVA EQUIVALENT:\n    //  Path p = new Path(path);\n    //  bool retval = fs.delete(p, recursive);\n\n    jobject jFS = (jobject)fs;\n    jthrowable jthr;\n    jobject jPath;\n    jvalue jVal;\n    jboolean jRecursive;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    jthr = constructNewObjectOfPath(env, path, &jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsDelete(path=%s): constructNewObjectOfPath\", path);\n        return -1;\n    }\n    jRecursive = recursive ? JNI_TRUE : JNI_FALSE;\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS,\n                     \"delete\", \"(Lorg/apache/hadoop/fs/Path;Z)Z\",\n                     jPath, jRecursive);\n    destroyLocalReference(env, jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsDelete(path=%s, recursive=%d): \"\n            \"FileSystem#delete\", path, recursive);\n        return -1;\n    }\n    if (!jVal.z) {\n        errno = EIO;\n        return -1;\n    }\n    return 0;\n}\n\n\n\nint hdfsRename(hdfsFS fs, const char *oldPath, const char *newPath)\n{\n    // JAVA EQUIVALENT:\n    //  Path old = new Path(oldPath);\n    //  Path new = new Path(newPath);\n    //  fs.rename(old, new);\n\n    jobject jFS = (jobject)fs;\n    jthrowable jthr;\n    jobject jOldPath = NULL, jNewPath = NULL;\n    int ret = -1;\n    jvalue jVal;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    jthr = constructNewObjectOfPath(env, oldPath, &jOldPath );\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsRename: constructNewObjectOfPath(%s)\", oldPath);\n        goto done;\n    }\n    jthr = constructNewObjectOfPath(env, newPath, &jNewPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsRename: constructNewObjectOfPath(%s)\", newPath);\n        goto done;\n    }\n\n    // Rename the file\n    // TODO: use rename2 here?  (See HDFS-3592)\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS, \"rename\",\n                     JMETHOD2(JPARAM(HADOOP_PATH), JPARAM(HADOOP_PATH), \"Z\"),\n                     jOldPath, jNewPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsRename(oldPath=%s, newPath=%s): FileSystem#rename\",\n            oldPath, newPath);\n        goto done;\n    }\n    if (!jVal.z) {\n        errno = EIO;\n        goto done;\n    }\n    ret = 0;\n\ndone:\n    destroyLocalReference(env, jOldPath);\n    destroyLocalReference(env, jNewPath);\n    return ret;\n}\n\n\n\nchar* hdfsGetWorkingDirectory(hdfsFS fs, char* buffer, size_t bufferSize)\n{\n    // JAVA EQUIVALENT:\n    //  Path p = fs.getWorkingDirectory(); \n    //  return p.toString()\n\n    jobject jPath = NULL;\n    jstring jPathString = NULL;\n    jobject jFS = (jobject)fs;\n    jvalue jVal;\n    jthrowable jthr;\n    int ret;\n    const char *jPathChars = NULL;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return NULL;\n    }\n\n    //FileSystem#getWorkingDirectory()\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS,\n                     HADOOP_FS, \"getWorkingDirectory\",\n                     \"()Lorg/apache/hadoop/fs/Path;\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsGetWorkingDirectory: FileSystem#getWorkingDirectory\");\n        goto done;\n    }\n    jPath = jVal.l;\n    if (!jPath) {\n        fprintf(stderr, \"hdfsGetWorkingDirectory: \"\n            \"FileSystem#getWorkingDirectory returned NULL\");\n        ret = -EIO;\n        goto done;\n    }\n\n    //Path#toString()\n    jthr = invokeMethod(env, &jVal, INSTANCE, jPath, \n                     \"org/apache/hadoop/fs/Path\", \"toString\",\n                     \"()Ljava/lang/String;\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsGetWorkingDirectory: Path#toString\");\n        goto done;\n    }\n    jPathString = jVal.l;\n    jPathChars = (*env)->GetStringUTFChars(env, jPathString, NULL);\n    if (!jPathChars) {\n        ret = printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n            \"hdfsGetWorkingDirectory: GetStringUTFChars\");\n        goto done;\n    }\n\n    //Copy to user-provided buffer\n    ret = snprintf(buffer, bufferSize, \"%s\", jPathChars);\n    if (ret >= bufferSize) {\n        ret = ENAMETOOLONG;\n        goto done;\n    }\n    ret = 0;\n\ndone:\n    if (jPathChars) {\n        (*env)->ReleaseStringUTFChars(env, jPathString, jPathChars);\n    }\n    destroyLocalReference(env, jPath);\n    destroyLocalReference(env, jPathString);\n\n    if (ret) {\n        errno = ret;\n        return NULL;\n    }\n    return buffer;\n}\n\n\n\nint hdfsSetWorkingDirectory(hdfsFS fs, const char *path)\n{\n    // JAVA EQUIVALENT:\n    //  fs.setWorkingDirectory(Path(path)); \n\n    jobject jFS = (jobject)fs;\n    jthrowable jthr;\n    jobject jPath;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //Create an object of org.apache.hadoop.fs.Path\n    jthr = constructNewObjectOfPath(env, path, &jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsSetWorkingDirectory(%s): constructNewObjectOfPath\",\n            path);\n        return -1;\n    }\n\n    //FileSystem#setWorkingDirectory()\n    jthr = invokeMethod(env, NULL, INSTANCE, jFS, HADOOP_FS,\n                     \"setWorkingDirectory\", \n                     \"(Lorg/apache/hadoop/fs/Path;)V\", jPath);\n    destroyLocalReference(env, jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, NOPRINT_EXC_ILLEGAL_ARGUMENT,\n            \"hdfsSetWorkingDirectory(%s): FileSystem#setWorkingDirectory\",\n            path);\n        return -1;\n    }\n    return 0;\n}\n\n\n\nint hdfsCreateDirectory(hdfsFS fs, const char *path)\n{\n    // JAVA EQUIVALENT:\n    //  fs.mkdirs(new Path(path));\n\n    jobject jFS = (jobject)fs;\n    jobject jPath;\n    jthrowable jthr;\n    jvalue jVal;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //Create an object of org.apache.hadoop.fs.Path\n    jthr = constructNewObjectOfPath(env, path, &jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsCreateDirectory(%s): constructNewObjectOfPath\", path);\n        return -1;\n    }\n\n    //Create the directory\n    jVal.z = 0;\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS,\n                     \"mkdirs\", \"(Lorg/apache/hadoop/fs/Path;)Z\",\n                     jPath);\n    destroyLocalReference(env, jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr,\n            NOPRINT_EXC_ACCESS_CONTROL | NOPRINT_EXC_FILE_NOT_FOUND |\n            NOPRINT_EXC_UNRESOLVED_LINK | NOPRINT_EXC_PARENT_NOT_DIRECTORY,\n            \"hdfsCreateDirectory(%s): FileSystem#mkdirs\", path);\n        return -1;\n    }\n    if (!jVal.z) {\n        // It's unclear under exactly which conditions FileSystem#mkdirs\n        // is supposed to return false (as opposed to throwing an exception.)\n        // It seems like the current code never actually returns false.\n        // So we're going to translate this to EIO, since there seems to be\n        // nothing more specific we can do with it.\n        errno = EIO;\n        return -1;\n    }\n    return 0;\n}\n\n\nint hdfsSetReplication(hdfsFS fs, const char *path, int16_t replication)\n{\n    // JAVA EQUIVALENT:\n    //  fs.setReplication(new Path(path), replication);\n\n    jobject jFS = (jobject)fs;\n    jthrowable jthr;\n    jobject jPath;\n    jvalue jVal;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //Create an object of org.apache.hadoop.fs.Path\n    jthr = constructNewObjectOfPath(env, path, &jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsSetReplication(path=%s): constructNewObjectOfPath\", path);\n        return -1;\n    }\n\n    //Create the directory\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS,\n                     \"setReplication\", \"(Lorg/apache/hadoop/fs/Path;S)Z\",\n                     jPath, replication);\n    destroyLocalReference(env, jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsSetReplication(path=%s, replication=%d): \"\n            \"FileSystem#setReplication\", path, replication);\n        return -1;\n    }\n    if (!jVal.z) {\n        // setReplication returns false \"if file does not exist or is a\n        // directory.\"  So the nearest translation to that is ENOENT.\n        errno = ENOENT;\n        return -1;\n    }\n\n    return 0;\n}\n\nint hdfsChown(hdfsFS fs, const char *path, const char *owner, const char *group)\n{\n    // JAVA EQUIVALENT:\n    //  fs.setOwner(path, owner, group)\n\n    jobject jFS = (jobject)fs;\n    jobject jPath = NULL;\n    jstring jOwner = NULL, jGroup = NULL;\n    jthrowable jthr;\n    int ret;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    if (owner == NULL && group == NULL) {\n      return 0;\n    }\n\n    jthr = constructNewObjectOfPath(env, path, &jPath);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsChown(path=%s): constructNewObjectOfPath\", path);\n        goto done;\n    }\n\n    jthr = newJavaStr(env, owner, &jOwner); \n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsChown(path=%s): newJavaStr(%s)\", path, owner);\n        goto done;\n    }\n    jthr = newJavaStr(env, group, &jGroup);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsChown(path=%s): newJavaStr(%s)\", path, group);\n        goto done;\n    }\n\n    //Create the directory\n    jthr = invokeMethod(env, NULL, INSTANCE, jFS, HADOOP_FS,\n            \"setOwner\", JMETHOD3(JPARAM(HADOOP_PATH), \n                    JPARAM(JAVA_STRING), JPARAM(JAVA_STRING), JAVA_VOID),\n            jPath, jOwner, jGroup);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr,\n            NOPRINT_EXC_ACCESS_CONTROL | NOPRINT_EXC_FILE_NOT_FOUND |\n            NOPRINT_EXC_UNRESOLVED_LINK,\n            \"hdfsChown(path=%s, owner=%s, group=%s): \"\n            \"FileSystem#setOwner\", path, owner, group);\n        goto done;\n    }\n    ret = 0;\n\ndone:\n    destroyLocalReference(env, jPath);\n    destroyLocalReference(env, jOwner);\n    destroyLocalReference(env, jGroup);\n\n    if (ret) {\n        errno = ret;\n        return -1;\n    }\n    return 0;\n}\n\nint hdfsChmod(hdfsFS fs, const char *path, short mode)\n{\n    int ret;\n    // JAVA EQUIVALENT:\n    //  fs.setPermission(path, FsPermission)\n\n    jthrowable jthr;\n    jobject jPath = NULL, jPermObj = NULL;\n    jobject jFS = (jobject)fs;\n    jshort jmode = mode;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    // construct jPerm = FsPermission.createImmutable(short mode);\n    jthr = constructNewObjectOfClass(env, &jPermObj,\n                HADOOP_FSPERM,\"(S)V\",jmode);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"constructNewObjectOfClass(%s)\", HADOOP_FSPERM);\n        return -1;\n    }\n\n    //Create an object of org.apache.hadoop.fs.Path\n    jthr = constructNewObjectOfPath(env, path, &jPath);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsChmod(%s): constructNewObjectOfPath\", path);\n        goto done;\n    }\n\n    //Create the directory\n    jthr = invokeMethod(env, NULL, INSTANCE, jFS, HADOOP_FS,\n            \"setPermission\",\n            JMETHOD2(JPARAM(HADOOP_PATH), JPARAM(HADOOP_FSPERM), JAVA_VOID),\n            jPath, jPermObj);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr,\n            NOPRINT_EXC_ACCESS_CONTROL | NOPRINT_EXC_FILE_NOT_FOUND |\n            NOPRINT_EXC_UNRESOLVED_LINK,\n            \"hdfsChmod(%s): FileSystem#setPermission\", path);\n        goto done;\n    }\n    ret = 0;\n\ndone:\n    destroyLocalReference(env, jPath);\n    destroyLocalReference(env, jPermObj);\n\n    if (ret) {\n        errno = ret;\n        return -1;\n    }\n    return 0;\n}\n\nint hdfsUtime(hdfsFS fs, const char *path, tTime mtime, tTime atime)\n{\n    // JAVA EQUIVALENT:\n    //  fs.setTimes(src, mtime, atime)\n\n    jthrowable jthr;\n    jobject jFS = (jobject)fs;\n    jobject jPath;\n    static const tTime NO_CHANGE = -1;\n    jlong jmtime, jatime;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //Create an object of org.apache.hadoop.fs.Path\n    jthr = constructNewObjectOfPath(env, path, &jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsUtime(path=%s): constructNewObjectOfPath\", path);\n        return -1;\n    }\n\n    jmtime = (mtime == NO_CHANGE) ? -1 : (mtime * (jlong)1000);\n    jatime = (atime == NO_CHANGE) ? -1 : (atime * (jlong)1000);\n\n    jthr = invokeMethod(env, NULL, INSTANCE, jFS, HADOOP_FS,\n            \"setTimes\", JMETHOD3(JPARAM(HADOOP_PATH), \"J\", \"J\", JAVA_VOID),\n            jPath, jmtime, jatime);\n    destroyLocalReference(env, jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr,\n            NOPRINT_EXC_ACCESS_CONTROL | NOPRINT_EXC_FILE_NOT_FOUND |\n            NOPRINT_EXC_UNRESOLVED_LINK,\n            \"hdfsUtime(path=%s): FileSystem#setTimes\", path);\n        return -1;\n    }\n    return 0;\n}\n\n/**\n * Zero-copy options.\n *\n * We cache the EnumSet of ReadOptions which has to be passed into every\n * readZero call, to avoid reconstructing it each time.  This cache is cleared\n * whenever an element changes.\n */\nstruct hadoopRzOptions\n{\n    JNIEnv *env;\n    int skipChecksums;\n    jobject byteBufferPool;\n    jobject cachedEnumSet;\n};\n\nstruct hadoopRzOptions *hadoopRzOptionsAlloc(void)\n{\n    struct hadoopRzOptions *opts;\n    JNIEnv *env;\n\n    env = getJNIEnv();\n    if (!env) {\n        // Check to make sure the JNI environment is set up properly.\n        errno = EINTERNAL;\n        return NULL;\n    }\n    opts = calloc(1, sizeof(struct hadoopRzOptions));\n    if (!opts) {\n        errno = ENOMEM;\n        return NULL;\n    }\n    return opts;\n}\n\nstatic void hadoopRzOptionsClearCached(JNIEnv *env,\n        struct hadoopRzOptions *opts)\n{\n    if (!opts->cachedEnumSet) {\n        return;\n    }\n    (*env)->DeleteGlobalRef(env, opts->cachedEnumSet);\n    opts->cachedEnumSet = NULL;\n}\n\nint hadoopRzOptionsSetSkipChecksum(\n        struct hadoopRzOptions *opts, int skip)\n{\n    JNIEnv *env;\n    env = getJNIEnv();\n    if (!env) {\n        errno = EINTERNAL;\n        return -1;\n    }\n    hadoopRzOptionsClearCached(env, opts);\n    opts->skipChecksums = !!skip;\n    return 0;\n}\n\nint hadoopRzOptionsSetByteBufferPool(\n        struct hadoopRzOptions *opts, const char *className)\n{\n    JNIEnv *env;\n    jthrowable jthr;\n    jobject byteBufferPool = NULL;\n\n    env = getJNIEnv();\n    if (!env) {\n        errno = EINTERNAL;\n        return -1;\n    }\n\n    if (className) {\n      // Note: we don't have to call hadoopRzOptionsClearCached in this\n      // function, since the ByteBufferPool is passed separately from the\n      // EnumSet of ReadOptions.\n\n      jthr = constructNewObjectOfClass(env, &byteBufferPool, className, \"()V\");\n      if (jthr) {\n          printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n              \"hadoopRzOptionsSetByteBufferPool(className=%s): \", className);\n          errno = EINVAL;\n          return -1;\n      }\n    }\n    if (opts->byteBufferPool) {\n        // Delete any previous ByteBufferPool we had.\n        (*env)->DeleteGlobalRef(env, opts->byteBufferPool);\n    }\n    opts->byteBufferPool = byteBufferPool;\n    return 0;\n}\n\nvoid hadoopRzOptionsFree(struct hadoopRzOptions *opts)\n{\n    JNIEnv *env;\n    env = getJNIEnv();\n    if (!env) {\n        return;\n    }\n    hadoopRzOptionsClearCached(env, opts);\n    if (opts->byteBufferPool) {\n        (*env)->DeleteGlobalRef(env, opts->byteBufferPool);\n        opts->byteBufferPool = NULL;\n    }\n    free(opts);\n}\n\nstruct hadoopRzBuffer\n{\n    jobject byteBuffer;\n    uint8_t *ptr;\n    int32_t length;\n    int direct;\n};\n\nstatic jthrowable hadoopRzOptionsGetEnumSet(JNIEnv *env,\n        struct hadoopRzOptions *opts, jobject *enumSet)\n{\n    jthrowable jthr = NULL;\n    jobject enumInst = NULL, enumSetObj = NULL;\n    jvalue jVal;\n\n    if (opts->cachedEnumSet) {\n        // If we cached the value, return it now.\n        *enumSet = opts->cachedEnumSet;\n        goto done;\n    }\n    if (opts->skipChecksums) {\n        jthr = fetchEnumInstance(env, READ_OPTION,\n                  \"SKIP_CHECKSUMS\", &enumInst);\n        if (jthr) {\n            goto done;\n        }\n        jthr = invokeMethod(env, &jVal, STATIC, NULL,\n                \"java/util/EnumSet\", \"of\",\n                \"(Ljava/lang/Enum;)Ljava/util/EnumSet;\", enumInst);\n        if (jthr) {\n            goto done;\n        }\n        enumSetObj = jVal.l;\n    } else {\n        jclass clazz = (*env)->FindClass(env, READ_OPTION);\n        if (!clazz) {\n            jthr = newRuntimeError(env, \"failed \"\n                    \"to find class for %s\", READ_OPTION);\n            goto done;\n        }\n        jthr = invokeMethod(env, &jVal, STATIC, NULL,\n                \"java/util/EnumSet\", \"noneOf\",\n                \"(Ljava/lang/Class;)Ljava/util/EnumSet;\", clazz);\n        enumSetObj = jVal.l;\n    }\n    // create global ref\n    opts->cachedEnumSet = (*env)->NewGlobalRef(env, enumSetObj);\n    if (!opts->cachedEnumSet) {\n        jthr = getPendingExceptionAndClear(env);\n        goto done;\n    }\n    *enumSet = opts->cachedEnumSet;\n    jthr = NULL;\ndone:\n    (*env)->DeleteLocalRef(env, enumInst);\n    (*env)->DeleteLocalRef(env, enumSetObj);\n    return jthr;\n}\n\nstatic int hadoopReadZeroExtractBuffer(JNIEnv *env,\n        const struct hadoopRzOptions *opts, struct hadoopRzBuffer *buffer)\n{\n    int ret;\n    jthrowable jthr;\n    jvalue jVal;\n    uint8_t *directStart;\n    void *mallocBuf = NULL;\n    jint position;\n    jarray array = NULL;\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, buffer->byteBuffer,\n                     \"java/nio/ByteBuffer\", \"remaining\", \"()I\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                \"hadoopReadZeroExtractBuffer: ByteBuffer#remaining failed: \");\n        goto done;\n    }\n    buffer->length = jVal.i;\n    jthr = invokeMethod(env, &jVal, INSTANCE, buffer->byteBuffer,\n                     \"java/nio/ByteBuffer\", \"position\", \"()I\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                \"hadoopReadZeroExtractBuffer: ByteBuffer#position failed: \");\n        goto done;\n    }\n    position = jVal.i;\n    directStart = (*env)->GetDirectBufferAddress(env, buffer->byteBuffer);\n    if (directStart) {\n        // Handle direct buffers.\n        buffer->ptr = directStart + position;\n        buffer->direct = 1;\n        ret = 0;\n        goto done;\n    }\n    // Handle indirect buffers.\n    // The JNI docs don't say that GetDirectBufferAddress throws any exceptions\n    // when it fails.  However, they also don't clearly say that it doesn't.  It\n    // seems safest to clear any pending exceptions here, to prevent problems on\n    // various JVMs.\n    (*env)->ExceptionClear(env);\n    if (!opts->byteBufferPool) {\n        fputs(\"hadoopReadZeroExtractBuffer: we read through the \"\n                \"zero-copy path, but failed to get the address of the buffer via \"\n                \"GetDirectBufferAddress.  Please make sure your JVM supports \"\n                \"GetDirectBufferAddress.\\n\", stderr);\n        ret = ENOTSUP;\n        goto done;\n    }\n    // Get the backing array object of this buffer.\n    jthr = invokeMethod(env, &jVal, INSTANCE, buffer->byteBuffer,\n                     \"java/nio/ByteBuffer\", \"array\", \"()[B\");\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                \"hadoopReadZeroExtractBuffer: ByteBuffer#array failed: \");\n        goto done;\n    }\n    array = jVal.l;\n    if (!array) {\n        fputs(\"hadoopReadZeroExtractBuffer: ByteBuffer#array returned NULL.\",\n              stderr);\n        ret = EIO;\n        goto done;\n    }\n    mallocBuf = malloc(buffer->length);\n    if (!mallocBuf) {\n        fprintf(stderr, \"hadoopReadZeroExtractBuffer: failed to allocate %d bytes of memory\\n\",\n                buffer->length);\n        ret = ENOMEM;\n        goto done;\n    }\n    (*env)->GetByteArrayRegion(env, array, position, buffer->length, mallocBuf);\n    jthr = (*env)->ExceptionOccurred(env);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                \"hadoopReadZeroExtractBuffer: GetByteArrayRegion failed: \");\n        goto done;\n    }\n    buffer->ptr = mallocBuf;\n    buffer->direct = 0;\n    ret = 0;\n\ndone:\n    free(mallocBuf);\n    (*env)->DeleteLocalRef(env, array);\n    return ret;\n}\n\nstatic int translateZCRException(JNIEnv *env, jthrowable exc)\n{\n    int ret;\n    char *className = NULL;\n    jthrowable jthr = classNameOfObject(exc, env, &className);\n\n    if (jthr) {\n        fputs(\"hadoopReadZero: failed to get class name of \"\n                \"exception from read().\\n\", stderr);\n        destroyLocalReference(env, exc);\n        destroyLocalReference(env, jthr);\n        ret = EIO;\n        goto done;\n    }\n    if (!strcmp(className, \"java.lang.UnsupportedOperationException\")) {\n        ret = EPROTONOSUPPORT;\n        goto done;\n    }\n    ret = printExceptionAndFree(env, exc, PRINT_EXC_ALL,\n            \"hadoopZeroCopyRead: ZeroCopyCursor#read failed\");\ndone:\n    free(className);\n    return ret;\n}\n\nstruct hadoopRzBuffer* hadoopReadZero(hdfsFile file,\n            struct hadoopRzOptions *opts, int32_t maxLength)\n{\n    JNIEnv *env;\n    jthrowable jthr = NULL;\n    jvalue jVal;\n    jobject enumSet = NULL, byteBuffer = NULL;\n    struct hadoopRzBuffer* buffer = NULL;\n    int ret;\n\n    env = getJNIEnv();\n    if (!env) {\n        errno = EINTERNAL;\n        return NULL;\n    }\n    if (file->type != HDFS_STREAM_INPUT) {\n        fputs(\"Cannot read from a non-InputStream object!\\n\", stderr);\n        ret = EINVAL;\n        goto done;\n    }\n    buffer = calloc(1, sizeof(struct hadoopRzBuffer));\n    if (!buffer) {\n        ret = ENOMEM;\n        goto done;\n    }\n    jthr = hadoopRzOptionsGetEnumSet(env, opts, &enumSet);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                \"hadoopReadZero: hadoopRzOptionsGetEnumSet failed: \");\n        goto done;\n    }\n    jthr = invokeMethod(env, &jVal, INSTANCE, file->file, HADOOP_ISTRM, \"read\",\n        \"(Lorg/apache/hadoop/io/ByteBufferPool;ILjava/util/EnumSet;)\"\n        \"Ljava/nio/ByteBuffer;\", opts->byteBufferPool, maxLength, enumSet);\n    if (jthr) {\n        ret = translateZCRException(env, jthr);\n        goto done;\n    }\n    byteBuffer = jVal.l;\n    if (!byteBuffer) {\n        buffer->byteBuffer = NULL;\n        buffer->length = 0;\n        buffer->ptr = NULL;\n    } else {\n        buffer->byteBuffer = (*env)->NewGlobalRef(env, byteBuffer);\n        if (!buffer->byteBuffer) {\n            ret = printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n                \"hadoopReadZero: failed to create global ref to ByteBuffer\");\n            goto done;\n        }\n        ret = hadoopReadZeroExtractBuffer(env, opts, buffer);\n        if (ret) {\n            goto done;\n        }\n    }\n    ret = 0;\ndone:\n    (*env)->DeleteLocalRef(env, byteBuffer);\n    if (ret) {\n        if (buffer) {\n            if (buffer->byteBuffer) {\n                (*env)->DeleteGlobalRef(env, buffer->byteBuffer);\n            }\n            free(buffer);\n        }\n        errno = ret;\n        return NULL;\n    } else {\n        errno = 0;\n    }\n    return buffer;\n}\n\nint32_t hadoopRzBufferLength(const struct hadoopRzBuffer *buffer)\n{\n    return buffer->length;\n}\n\nconst void *hadoopRzBufferGet(const struct hadoopRzBuffer *buffer)\n{\n    return buffer->ptr;\n}\n\nvoid hadoopRzBufferFree(hdfsFile file, struct hadoopRzBuffer *buffer)\n{\n    jvalue jVal;\n    jthrowable jthr;\n    JNIEnv* env;\n    \n    env = getJNIEnv();\n    if (env == NULL) {\n        errno = EINTERNAL;\n        return;\n    }\n    if (buffer->byteBuffer) {\n        jthr = invokeMethod(env, &jVal, INSTANCE, file->file,\n                    HADOOP_ISTRM, \"releaseBuffer\",\n                    \"(Ljava/nio/ByteBuffer;)V\", buffer->byteBuffer);\n        if (jthr) {\n            printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                    \"hadoopRzBufferFree: releaseBuffer failed: \");\n            // even on error, we have to delete the reference.\n        }\n        (*env)->DeleteGlobalRef(env, buffer->byteBuffer);\n    }\n    if (!buffer->direct) {\n        free(buffer->ptr);\n    }\n    memset(buffer, 0, sizeof(*buffer));\n    free(buffer);\n}\n\nchar***\nhdfsGetHosts(hdfsFS fs, const char *path, tOffset start, tOffset length)\n{\n    // JAVA EQUIVALENT:\n    //  fs.getFileBlockLoctions(new Path(path), start, length);\n\n    jobject jFS = (jobject)fs;\n    jthrowable jthr;\n    jobject jPath = NULL;\n    jobject jFileStatus = NULL;\n    jvalue jFSVal, jVal;\n    jobjectArray jBlockLocations = NULL, jFileBlockHosts = NULL;\n    jstring jHost = NULL;\n    char*** blockHosts = NULL;\n    int i, j, ret;\n    jsize jNumFileBlocks = 0;\n    jobject jFileBlock;\n    jsize jNumBlockHosts;\n    const char *hostName;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return NULL;\n    }\n\n    //Create an object of org.apache.hadoop.fs.Path\n    jthr = constructNewObjectOfPath(env, path, &jPath);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsGetHosts(path=%s): constructNewObjectOfPath\", path);\n        goto done;\n    }\n    jthr = invokeMethod(env, &jFSVal, INSTANCE, jFS,\n            HADOOP_FS, \"getFileStatus\", \"(Lorg/apache/hadoop/fs/Path;)\"\n            \"Lorg/apache/hadoop/fs/FileStatus;\", jPath);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, NOPRINT_EXC_FILE_NOT_FOUND,\n                \"hdfsGetHosts(path=%s, start=%\"PRId64\", length=%\"PRId64\"):\"\n                \"FileSystem#getFileStatus\", path, start, length);\n        destroyLocalReference(env, jPath);\n        goto done;\n    }\n    jFileStatus = jFSVal.l;\n\n    //org.apache.hadoop.fs.FileSystem#getFileBlockLocations\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS,\n                     HADOOP_FS, \"getFileBlockLocations\", \n                     \"(Lorg/apache/hadoop/fs/FileStatus;JJ)\"\n                     \"[Lorg/apache/hadoop/fs/BlockLocation;\",\n                     jFileStatus, start, length);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                \"hdfsGetHosts(path=%s, start=%\"PRId64\", length=%\"PRId64\"):\"\n                \"FileSystem#getFileBlockLocations\", path, start, length);\n        goto done;\n    }\n    jBlockLocations = jVal.l;\n\n    //Figure out no of entries in jBlockLocations\n    //Allocate memory and add NULL at the end\n    jNumFileBlocks = (*env)->GetArrayLength(env, jBlockLocations);\n\n    blockHosts = calloc(jNumFileBlocks + 1, sizeof(char**));\n    if (blockHosts == NULL) {\n        ret = ENOMEM;\n        goto done;\n    }\n    if (jNumFileBlocks == 0) {\n        ret = 0;\n        goto done;\n    }\n\n    //Now parse each block to get hostnames\n    for (i = 0; i < jNumFileBlocks; ++i) {\n        jFileBlock =\n            (*env)->GetObjectArrayElement(env, jBlockLocations, i);\n        if (!jFileBlock) {\n            ret = printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n                \"hdfsGetHosts(path=%s, start=%\"PRId64\", length=%\"PRId64\"):\"\n                \"GetObjectArrayElement(%d)\", path, start, length, i);\n            goto done;\n        }\n        \n        jthr = invokeMethod(env, &jVal, INSTANCE, jFileBlock, HADOOP_BLK_LOC,\n                         \"getHosts\", \"()[Ljava/lang/String;\");\n        if (jthr) {\n            ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                \"hdfsGetHosts(path=%s, start=%\"PRId64\", length=%\"PRId64\"):\"\n                \"BlockLocation#getHosts\", path, start, length);\n            goto done;\n        }\n        jFileBlockHosts = jVal.l;\n        if (!jFileBlockHosts) {\n            fprintf(stderr,\n                \"hdfsGetHosts(path=%s, start=%\"PRId64\", length=%\"PRId64\"):\"\n                \"BlockLocation#getHosts returned NULL\", path, start, length);\n            ret = EINTERNAL;\n            goto done;\n        }\n        //Figure out no of hosts in jFileBlockHosts, and allocate the memory\n        jNumBlockHosts = (*env)->GetArrayLength(env, jFileBlockHosts);\n        blockHosts[i] = calloc(jNumBlockHosts + 1, sizeof(char*));\n        if (!blockHosts[i]) {\n            ret = ENOMEM;\n            goto done;\n        }\n\n        //Now parse each hostname\n        for (j = 0; j < jNumBlockHosts; ++j) {\n            jHost = (*env)->GetObjectArrayElement(env, jFileBlockHosts, j);\n            if (!jHost) {\n                ret = printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n                    \"hdfsGetHosts(path=%s, start=%\"PRId64\", length=%\"PRId64\"): \"\n                    \"NewByteArray\", path, start, length);\n                goto done;\n            }\n            hostName =\n                (const char*)((*env)->GetStringUTFChars(env, jHost, NULL));\n            if (!hostName) {\n                ret = printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n                    \"hdfsGetHosts(path=%s, start=%\"PRId64\", length=%\"PRId64\", \"\n                    \"j=%d out of %d): GetStringUTFChars\",\n                    path, start, length, j, jNumBlockHosts);\n                goto done;\n            }\n            blockHosts[i][j] = strdup(hostName);\n            (*env)->ReleaseStringUTFChars(env, jHost, hostName);\n            if (!blockHosts[i][j]) {\n                ret = ENOMEM;\n                goto done;\n            }\n            destroyLocalReference(env, jHost);\n            jHost = NULL;\n        }\n\n        destroyLocalReference(env, jFileBlockHosts);\n        jFileBlockHosts = NULL;\n    }\n    ret = 0;\n\ndone:\n    destroyLocalReference(env, jPath);\n    destroyLocalReference(env, jFileStatus);\n    destroyLocalReference(env, jBlockLocations);\n    destroyLocalReference(env, jFileBlockHosts);\n    destroyLocalReference(env, jHost);\n    if (ret) {\n        if (blockHosts) {\n            hdfsFreeHosts(blockHosts);\n        }\n        return NULL;\n    }\n\n    return blockHosts;\n}\n\n\nvoid hdfsFreeHosts(char ***blockHosts)\n{\n    int i, j;\n    for (i=0; blockHosts[i]; i++) {\n        for (j=0; blockHosts[i][j]; j++) {\n            free(blockHosts[i][j]);\n        }\n        free(blockHosts[i]);\n    }\n    free(blockHosts);\n}\n\n\ntOffset hdfsGetDefaultBlockSize(hdfsFS fs)\n{\n    // JAVA EQUIVALENT:\n    //  fs.getDefaultBlockSize();\n\n    jobject jFS = (jobject)fs;\n    jvalue jVal;\n    jthrowable jthr;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //FileSystem#getDefaultBlockSize()\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS,\n                     \"getDefaultBlockSize\", \"()J\");\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsGetDefaultBlockSize: FileSystem#getDefaultBlockSize\");\n        return -1;\n    }\n    return jVal.j;\n}\n\n\ntOffset hdfsGetDefaultBlockSizeAtPath(hdfsFS fs, const char *path)\n{\n    // JAVA EQUIVALENT:\n    //  fs.getDefaultBlockSize(path);\n\n    jthrowable jthr;\n    jobject jFS = (jobject)fs;\n    jobject jPath;\n    tOffset blockSize;\n    JNIEnv* env = getJNIEnv();\n\n    if (env == NULL) {\n        errno = EINTERNAL;\n        return -1;\n    }\n    jthr = constructNewObjectOfPath(env, path, &jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsGetDefaultBlockSize(path=%s): constructNewObjectOfPath\",\n            path);\n        return -1;\n    }\n    jthr = getDefaultBlockSize(env, jFS, jPath, &blockSize);\n    (*env)->DeleteLocalRef(env, jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsGetDefaultBlockSize(path=%s): \"\n            \"FileSystem#getDefaultBlockSize\", path);\n        return -1;\n    }\n    return blockSize;\n}\n\n\ntOffset hdfsGetCapacity(hdfsFS fs)\n{\n    // JAVA EQUIVALENT:\n    //  FsStatus fss = fs.getStatus();\n    //  return Fss.getCapacity();\n\n    jobject jFS = (jobject)fs;\n    jvalue  jVal;\n    jthrowable jthr;\n    jobject fss;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //FileSystem#getStatus\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS,\n                     \"getStatus\", \"()Lorg/apache/hadoop/fs/FsStatus;\");\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsGetCapacity: FileSystem#getStatus\");\n        return -1;\n    }\n    fss = (jobject)jVal.l;\n    jthr = invokeMethod(env, &jVal, INSTANCE, fss, HADOOP_FSSTATUS,\n                     \"getCapacity\", \"()J\");\n    destroyLocalReference(env, fss);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsGetCapacity: FsStatus#getCapacity\");\n        return -1;\n    }\n    return jVal.j;\n}\n\n\n  \ntOffset hdfsGetUsed(hdfsFS fs)\n{\n    // JAVA EQUIVALENT:\n    //  FsStatus fss = fs.getStatus();\n    //  return Fss.getUsed();\n\n    jobject jFS = (jobject)fs;\n    jvalue  jVal;\n    jthrowable jthr;\n    jobject fss;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return -1;\n    }\n\n    //FileSystem#getStatus\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS,\n                     \"getStatus\", \"()Lorg/apache/hadoop/fs/FsStatus;\");\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsGetUsed: FileSystem#getStatus\");\n        return -1;\n    }\n    fss = (jobject)jVal.l;\n    jthr = invokeMethod(env, &jVal, INSTANCE, fss, HADOOP_FSSTATUS,\n                     \"getUsed\", \"()J\");\n    destroyLocalReference(env, fss);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsGetUsed: FsStatus#getUsed\");\n        return -1;\n    }\n    return jVal.j;\n}\n \n/**\n * We cannot add new fields to the hdfsFileInfo structure because it would break\n * binary compatibility.  The reason is because we return an array\n * of hdfsFileInfo structures from hdfsListDirectory.  So changing the size of\n * those structures would break all programs that relied on finding the second\n * element in the array at <base_offset> + sizeof(struct hdfsFileInfo).\n *\n * So instead, we add the new fields to the hdfsExtendedFileInfo structure.\n * This structure is contained in the mOwner string found inside the\n * hdfsFileInfo.  Specifically, the format of mOwner is:\n *\n * [owner-string] [null byte] [padding] [hdfsExtendedFileInfo structure]\n *\n * The padding is added so that the hdfsExtendedFileInfo structure starts on an\n * 8-byte boundary.\n *\n * @param str           The string to locate the extended info in.\n * @return              The offset of the hdfsExtendedFileInfo structure.\n */\nstatic size_t getExtendedFileInfoOffset(const char *str)\n{\n    int num_64_bit_words = ((strlen(str) + 1) + 7) / 8;\n    return num_64_bit_words * 8;\n}\n\nstatic struct hdfsExtendedFileInfo *getExtendedFileInfo(hdfsFileInfo *fileInfo)\n{\n    char *owner = fileInfo->mOwner;\n    return (struct hdfsExtendedFileInfo *)(owner +\n                getExtendedFileInfoOffset(owner));\n}\n\nstatic jthrowable\ngetFileInfoFromStat(JNIEnv *env, jobject jStat, hdfsFileInfo *fileInfo)\n{\n    jvalue jVal;\n    jthrowable jthr;\n    jobject jPath = NULL;\n    jstring jPathName = NULL;\n    jstring jUserName = NULL;\n    jstring jGroupName = NULL;\n    jobject jPermission = NULL;\n    const char *cPathName;\n    const char *cUserName;\n    const char *cGroupName;\n    struct hdfsExtendedFileInfo *extInfo;\n    size_t extOffset;\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, jStat,\n                     HADOOP_STAT, \"isDir\", \"()Z\");\n    if (jthr)\n        goto done;\n    fileInfo->mKind = jVal.z ? kObjectKindDirectory : kObjectKindFile;\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, jStat,\n                     HADOOP_STAT, \"getReplication\", \"()S\");\n    if (jthr)\n        goto done;\n    fileInfo->mReplication = jVal.s;\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, jStat,\n                     HADOOP_STAT, \"getBlockSize\", \"()J\");\n    if (jthr)\n        goto done;\n    fileInfo->mBlockSize = jVal.j;\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, jStat,\n                     HADOOP_STAT, \"getModificationTime\", \"()J\");\n    if (jthr)\n        goto done;\n    fileInfo->mLastMod = jVal.j / 1000;\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, jStat,\n                     HADOOP_STAT, \"getAccessTime\", \"()J\");\n    if (jthr)\n        goto done;\n    fileInfo->mLastAccess = (tTime) (jVal.j / 1000);\n\n    if (fileInfo->mKind == kObjectKindFile) {\n        jthr = invokeMethod(env, &jVal, INSTANCE, jStat,\n                         HADOOP_STAT, \"getLen\", \"()J\");\n        if (jthr)\n            goto done;\n        fileInfo->mSize = jVal.j;\n    }\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, jStat, HADOOP_STAT,\n                     \"getPath\", \"()Lorg/apache/hadoop/fs/Path;\");\n    if (jthr)\n        goto done;\n    jPath = jVal.l;\n    if (jPath == NULL) {\n        jthr = newRuntimeError(env, \"org.apache.hadoop.fs.FileStatus#\"\n            \"getPath returned NULL!\");\n        goto done;\n    }\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, jPath, HADOOP_PATH,\n                     \"toString\", \"()Ljava/lang/String;\");\n    if (jthr)\n        goto done;\n    jPathName = jVal.l;\n    cPathName =\n        (const char*) ((*env)->GetStringUTFChars(env, jPathName, NULL));\n    if (!cPathName) {\n        jthr = getPendingExceptionAndClear(env);\n        goto done;\n    }\n    fileInfo->mName = strdup(cPathName);\n    (*env)->ReleaseStringUTFChars(env, jPathName, cPathName);\n    jthr = invokeMethod(env, &jVal, INSTANCE, jStat, HADOOP_STAT,\n                    \"getOwner\", \"()Ljava/lang/String;\");\n    if (jthr)\n        goto done;\n    jUserName = jVal.l;\n    cUserName =\n        (const char*) ((*env)->GetStringUTFChars(env, jUserName, NULL));\n    if (!cUserName) {\n        jthr = getPendingExceptionAndClear(env);\n        goto done;\n    }\n    extOffset = getExtendedFileInfoOffset(cUserName);\n    fileInfo->mOwner = malloc(extOffset + sizeof(struct hdfsExtendedFileInfo));\n    if (!fileInfo->mOwner) {\n        jthr = newRuntimeError(env, \"getFileInfo: OOM allocating mOwner\");\n        goto done;\n    }\n    strcpy(fileInfo->mOwner, cUserName);\n    (*env)->ReleaseStringUTFChars(env, jUserName, cUserName);\n    extInfo = getExtendedFileInfo(fileInfo);\n    memset(extInfo, 0, sizeof(*extInfo));\n    jthr = invokeMethod(env, &jVal, INSTANCE, jStat,\n                    HADOOP_STAT, \"isEncrypted\", \"()Z\");\n    if (jthr) {\n        goto done;\n    }\n    if (jVal.z == JNI_TRUE) {\n        extInfo->flags |= HDFS_EXTENDED_FILE_INFO_ENCRYPTED;\n    }\n    jthr = invokeMethod(env, &jVal, INSTANCE, jStat, HADOOP_STAT,\n                    \"getGroup\", \"()Ljava/lang/String;\");\n    if (jthr)\n        goto done;\n    jGroupName = jVal.l;\n    cGroupName = (const char*) ((*env)->GetStringUTFChars(env, jGroupName, NULL));\n    if (!cGroupName) {\n        jthr = getPendingExceptionAndClear(env);\n        goto done;\n    }\n    fileInfo->mGroup = strdup(cGroupName);\n    (*env)->ReleaseStringUTFChars(env, jGroupName, cGroupName);\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, jStat, HADOOP_STAT,\n            \"getPermission\",\n            \"()Lorg/apache/hadoop/fs/permission/FsPermission;\");\n    if (jthr)\n        goto done;\n    if (jVal.l == NULL) {\n        jthr = newRuntimeError(env, \"%s#getPermission returned NULL!\",\n            HADOOP_STAT);\n        goto done;\n    }\n    jPermission = jVal.l;\n    jthr = invokeMethod(env, &jVal, INSTANCE, jPermission, HADOOP_FSPERM,\n                         \"toShort\", \"()S\");\n    if (jthr)\n        goto done;\n    fileInfo->mPermissions = jVal.s;\n    jthr = NULL;\n\ndone:\n    if (jthr)\n        hdfsFreeFileInfoEntry(fileInfo);\n    destroyLocalReference(env, jPath);\n    destroyLocalReference(env, jPathName);\n    destroyLocalReference(env, jUserName);\n    destroyLocalReference(env, jGroupName);\n    destroyLocalReference(env, jPermission);\n    destroyLocalReference(env, jPath);\n    return jthr;\n}\n\nstatic jthrowable\ngetFileInfo(JNIEnv *env, jobject jFS, jobject jPath, hdfsFileInfo **fileInfo)\n{\n    // JAVA EQUIVALENT:\n    //  fs.isDirectory(f)\n    //  fs.getModificationTime()\n    //  fs.getAccessTime()\n    //  fs.getLength(f)\n    //  f.getPath()\n    //  f.getOwner()\n    //  f.getGroup()\n    //  f.getPermission().toShort()\n    jobject jStat;\n    jvalue  jVal;\n    jthrowable jthr;\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_FS,\n                     \"exists\", JMETHOD1(JPARAM(HADOOP_PATH), \"Z\"),\n                     jPath);\n    if (jthr)\n        return jthr;\n    if (jVal.z == 0) {\n        *fileInfo = NULL;\n        return NULL;\n    }\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS,\n            HADOOP_FS, \"getFileStatus\",\n            JMETHOD1(JPARAM(HADOOP_PATH), JPARAM(HADOOP_STAT)), jPath);\n    if (jthr)\n        return jthr;\n    jStat = jVal.l;\n    *fileInfo = calloc(1, sizeof(hdfsFileInfo));\n    if (!*fileInfo) {\n        destroyLocalReference(env, jStat);\n        return newRuntimeError(env, \"getFileInfo: OOM allocating hdfsFileInfo\");\n    }\n    jthr = getFileInfoFromStat(env, jStat, *fileInfo); \n    destroyLocalReference(env, jStat);\n    return jthr;\n}\n\n\n\nhdfsFileInfo* hdfsListDirectory(hdfsFS fs, const char *path, int *numEntries)\n{\n    // JAVA EQUIVALENT:\n    //  Path p(path);\n    //  Path []pathList = fs.listPaths(p)\n    //  foreach path in pathList \n    //    getFileInfo(path)\n\n    jobject jFS = (jobject)fs;\n    jthrowable jthr;\n    jobject jPath = NULL;\n    hdfsFileInfo *pathList = NULL; \n    jobjectArray jPathList = NULL;\n    jvalue jVal;\n    jsize jPathListSize = 0;\n    int ret;\n    jsize i;\n    jobject tmpStat;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return NULL;\n    }\n\n    //Create an object of org.apache.hadoop.fs.Path\n    jthr = constructNewObjectOfPath(env, path, &jPath);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsListDirectory(%s): constructNewObjectOfPath\", path);\n        goto done;\n    }\n\n    jthr = invokeMethod(env, &jVal, INSTANCE, jFS, HADOOP_DFS, \"listStatus\",\n                     JMETHOD1(JPARAM(HADOOP_PATH), JARRPARAM(HADOOP_STAT)),\n                     jPath);\n    if (jthr) {\n        ret = printExceptionAndFree(env, jthr,\n            NOPRINT_EXC_ACCESS_CONTROL | NOPRINT_EXC_FILE_NOT_FOUND |\n            NOPRINT_EXC_UNRESOLVED_LINK,\n            \"hdfsListDirectory(%s): FileSystem#listStatus\", path);\n        goto done;\n    }\n    jPathList = jVal.l;\n\n    //Figure out the number of entries in that directory\n    jPathListSize = (*env)->GetArrayLength(env, jPathList);\n    if (jPathListSize == 0) {\n        ret = 0;\n        goto done;\n    }\n\n    //Allocate memory\n    pathList = calloc(jPathListSize, sizeof(hdfsFileInfo));\n    if (pathList == NULL) {\n        ret = ENOMEM;\n        goto done;\n    }\n\n    //Save path information in pathList\n    for (i=0; i < jPathListSize; ++i) {\n        tmpStat = (*env)->GetObjectArrayElement(env, jPathList, i);\n        if (!tmpStat) {\n            ret = printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n                \"hdfsListDirectory(%s): GetObjectArrayElement(%d out of %d)\",\n                path, i, jPathListSize);\n            goto done;\n        }\n        jthr = getFileInfoFromStat(env, tmpStat, &pathList[i]);\n        destroyLocalReference(env, tmpStat);\n        if (jthr) {\n            ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n                \"hdfsListDirectory(%s): getFileInfoFromStat(%d out of %d)\",\n                path, i, jPathListSize);\n            goto done;\n        }\n    }\n    ret = 0;\n\ndone:\n    destroyLocalReference(env, jPath);\n    destroyLocalReference(env, jPathList);\n\n    if (ret) {\n        hdfsFreeFileInfo(pathList, jPathListSize);\n        errno = ret;\n        return NULL;\n    }\n    *numEntries = jPathListSize;\n    errno = 0;\n    return pathList;\n}\n\n\n\nhdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char *path)\n{\n    // JAVA EQUIVALENT:\n    //  File f(path);\n    //  fs.isDirectory(f)\n    //  fs.lastModified() ??\n    //  fs.getLength(f)\n    //  f.getPath()\n\n    jobject jFS = (jobject)fs;\n    jobject jPath;\n    jthrowable jthr;\n    hdfsFileInfo *fileInfo;\n\n    //Get the JNIEnv* corresponding to current thread\n    JNIEnv* env = getJNIEnv();\n    if (env == NULL) {\n      errno = EINTERNAL;\n      return NULL;\n    }\n\n    //Create an object of org.apache.hadoop.fs.Path\n    jthr = constructNewObjectOfPath(env, path, &jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,\n            \"hdfsGetPathInfo(%s): constructNewObjectOfPath\", path);\n        return NULL;\n    }\n    jthr = getFileInfo(env, jFS, jPath, &fileInfo);\n    destroyLocalReference(env, jPath);\n    if (jthr) {\n        errno = printExceptionAndFree(env, jthr,\n            NOPRINT_EXC_ACCESS_CONTROL | NOPRINT_EXC_FILE_NOT_FOUND |\n            NOPRINT_EXC_UNRESOLVED_LINK,\n            \"hdfsGetPathInfo(%s): getFileInfo\", path);\n        return NULL;\n    }\n    if (!fileInfo) {\n        errno = ENOENT;\n        return NULL;\n    }\n    return fileInfo;\n}\n\nstatic void hdfsFreeFileInfoEntry(hdfsFileInfo *hdfsFileInfo)\n{\n    free(hdfsFileInfo->mName);\n    free(hdfsFileInfo->mOwner);\n    free(hdfsFileInfo->mGroup);\n    memset(hdfsFileInfo, 0, sizeof(*hdfsFileInfo));\n}\n\nvoid hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries)\n{\n    //Free the mName, mOwner, and mGroup\n    int i;\n    for (i=0; i < numEntries; ++i) {\n        hdfsFreeFileInfoEntry(hdfsFileInfo + i);\n    }\n\n    //Free entire block\n    free(hdfsFileInfo);\n}\n\nint hdfsFileIsEncrypted(hdfsFileInfo *fileInfo)\n{\n    struct hdfsExtendedFileInfo *extInfo;\n\n    extInfo = getExtendedFileInfo(fileInfo);\n    return !!(extInfo->flags & HDFS_EXTENDED_FILE_INFO_ENCRYPTED);\n}\n\n\n\n/**\n * vim: ts=4: sw=4: et:\n */\n"
  },
  {
    "path": "src/libhdfs/include/hdfs/hdfs.h",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#ifndef LIBHDFS_HDFS_H\n#define LIBHDFS_HDFS_H\n\n#include <errno.h> /* for EINTERNAL, etc. */\n#include <fcntl.h> /* for O_RDONLY, O_WRONLY */\n#include <stdint.h> /* for uint64_t, etc. */\n#include <time.h> /* for time_t */\n\n/*\n * Support export of DLL symbols during libhdfs build, and import of DLL symbols\n * during client application build.  A client application may optionally define\n * symbol LIBHDFS_DLL_IMPORT in its build.  This is not strictly required, but\n * the compiler can produce more efficient code with it.\n */\n#ifdef WIN32\n    #ifdef LIBHDFS_DLL_EXPORT\n        #define LIBHDFS_EXTERNAL __declspec(dllexport)\n    #elif LIBHDFS_DLL_IMPORT\n        #define LIBHDFS_EXTERNAL __declspec(dllimport)\n    #else\n        #define LIBHDFS_EXTERNAL\n    #endif\n#else\n    #ifdef LIBHDFS_DLL_EXPORT\n        #define LIBHDFS_EXTERNAL __attribute__((visibility(\"default\")))\n    #elif LIBHDFS_DLL_IMPORT\n        #define LIBHDFS_EXTERNAL __attribute__((visibility(\"default\")))\n    #else\n        #define LIBHDFS_EXTERNAL\n    #endif\n#endif\n\n#ifndef O_RDONLY\n#define O_RDONLY 1\n#endif\n\n#ifndef O_WRONLY \n#define O_WRONLY 2\n#endif\n\n#ifndef EINTERNAL\n#define EINTERNAL 255 \n#endif\n\n#define ELASTIC_BYTE_BUFFER_POOL_CLASS \\\n  \"org/apache/hadoop/io/ElasticByteBufferPool\"\n\n/** All APIs set errno to meaningful values */\n\n#ifdef __cplusplus\nextern  \"C\" {\n#endif\n    /**\n     * Some utility decls used in libhdfs.\n     */\n    struct hdfsBuilder;\n    typedef int32_t   tSize; /// size of data for read/write io ops \n    typedef time_t    tTime; /// time type in seconds\n    typedef int64_t   tOffset;/// offset within the file\n    typedef uint16_t  tPort; /// port\n    typedef enum tObjectKind {\n        kObjectKindFile = 'F',\n        kObjectKindDirectory = 'D',\n    } tObjectKind;\n\n\n    /**\n     * The C reflection of org.apache.org.hadoop.FileSystem .\n     */\n    struct hdfs_internal;\n    typedef struct hdfs_internal* hdfsFS;\n    \n    struct hdfsFile_internal;\n    typedef struct hdfsFile_internal* hdfsFile;\n\n    struct hadoopRzOptions;\n\n    struct hadoopRzBuffer;\n\n    /**\n     * Determine if a file is open for read.\n     *\n     * @param file     The HDFS file\n     * @return         1 if the file is open for read; 0 otherwise\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsFileIsOpenForRead(hdfsFile file);\n\n    /**\n     * Determine if a file is open for write.\n     *\n     * @param file     The HDFS file\n     * @return         1 if the file is open for write; 0 otherwise\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsFileIsOpenForWrite(hdfsFile file);\n\n    struct hdfsReadStatistics {\n      uint64_t totalBytesRead;\n      uint64_t totalLocalBytesRead;\n      uint64_t totalShortCircuitBytesRead;\n      uint64_t totalZeroCopyBytesRead;\n    };\n\n    /**\n     * Get read statistics about a file.  This is only applicable to files\n     * opened for reading.\n     *\n     * @param file     The HDFS file\n     * @param stats    (out parameter) on a successful return, the read\n     *                 statistics.  Unchanged otherwise.  You must free the\n     *                 returned statistics with hdfsFileFreeReadStatistics.\n     * @return         0 if the statistics were successfully returned,\n     *                 -1 otherwise.  On a failure, please check errno against\n     *                 ENOTSUP.  webhdfs, LocalFilesystem, and so forth may\n     *                 not support read statistics.\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsFileGetReadStatistics(hdfsFile file,\n                                  struct hdfsReadStatistics **stats);\n\n    /**\n     * @param stats    HDFS read statistics for a file.\n     *\n     * @return the number of remote bytes read.\n     */\n    LIBHDFS_EXTERNAL\n    int64_t hdfsReadStatisticsGetRemoteBytesRead(\n                            const struct hdfsReadStatistics *stats);\n\n    /**\n     * Clear the read statistics for a file.\n     *\n     * @param file      The file to clear the read statistics of.\n     *\n     * @return          0 on success; the error code otherwise.\n     *                  EINVAL: the file is not open for reading.\n     *                  ENOTSUP: the file does not support clearing the read\n     *                  statistics.\n     *                  Errno will also be set to this code on failure.\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsFileClearReadStatistics(hdfsFile file);\n\n    /**\n     * Free some HDFS read statistics.\n     *\n     * @param stats    The HDFS read statistics to free.\n     */\n    LIBHDFS_EXTERNAL\n    void hdfsFileFreeReadStatistics(struct hdfsReadStatistics *stats);\n\n    /** \n     * hdfsConnectAsUser - Connect to a hdfs file system as a specific user\n     * Connect to the hdfs.\n     * @param nn   The NameNode.  See hdfsBuilderSetNameNode for details.\n     * @param port The port on which the server is listening.\n     * @param user the user name (this is hadoop domain user). Or NULL is equivelant to hhdfsConnect(host, port)\n     * @return Returns a handle to the filesystem or NULL on error.\n     * @deprecated Use hdfsBuilderConnect instead. \n     */\n     LIBHDFS_EXTERNAL\n     hdfsFS hdfsConnectAsUser(const char* nn, tPort port, const char *user);\n\n    /** \n     * hdfsConnect - Connect to a hdfs file system.\n     * Connect to the hdfs.\n     * @param nn   The NameNode.  See hdfsBuilderSetNameNode for details.\n     * @param port The port on which the server is listening.\n     * @return Returns a handle to the filesystem or NULL on error.\n     * @deprecated Use hdfsBuilderConnect instead. \n     */\n     LIBHDFS_EXTERNAL\n     hdfsFS hdfsConnect(const char* nn, tPort port);\n\n    /** \n     * hdfsConnect - Connect to an hdfs file system.\n     *\n     * Forces a new instance to be created\n     *\n     * @param nn     The NameNode.  See hdfsBuilderSetNameNode for details.\n     * @param port   The port on which the server is listening.\n     * @param user   The user name to use when connecting\n     * @return       Returns a handle to the filesystem or NULL on error.\n     * @deprecated   Use hdfsBuilderConnect instead. \n     */\n     LIBHDFS_EXTERNAL\n     hdfsFS hdfsConnectAsUserNewInstance(const char* nn, tPort port, const char *user );\n\n    /** \n     * hdfsConnect - Connect to an hdfs file system.\n     *\n     * Forces a new instance to be created\n     *\n     * @param nn     The NameNode.  See hdfsBuilderSetNameNode for details.\n     * @param port   The port on which the server is listening.\n     * @return       Returns a handle to the filesystem or NULL on error.\n     * @deprecated   Use hdfsBuilderConnect instead. \n     */\n     LIBHDFS_EXTERNAL\n     hdfsFS hdfsConnectNewInstance(const char* nn, tPort port);\n\n    /** \n     * Connect to HDFS using the parameters defined by the builder.\n     *\n     * The HDFS builder will be freed, whether or not the connection was\n     * successful.\n     *\n     * Every successful call to hdfsBuilderConnect should be matched with a call\n     * to hdfsDisconnect, when the hdfsFS is no longer needed.\n     *\n     * @param bld    The HDFS builder\n     * @return       Returns a handle to the filesystem, or NULL on error.\n     */\n     LIBHDFS_EXTERNAL\n     hdfsFS hdfsBuilderConnect(struct hdfsBuilder *bld);\n\n    /**\n     * Create an HDFS builder.\n     *\n     * @return The HDFS builder, or NULL on error.\n     */\n    LIBHDFS_EXTERNAL\n    struct hdfsBuilder *hdfsNewBuilder(void);\n\n    /**\n     * Force the builder to always create a new instance of the FileSystem,\n     * rather than possibly finding one in the cache.\n     *\n     * @param bld The HDFS builder\n     */\n    LIBHDFS_EXTERNAL\n    void hdfsBuilderSetForceNewInstance(struct hdfsBuilder *bld);\n\n    /**\n     * Set the HDFS NameNode to connect to.\n     *\n     * @param bld  The HDFS builder\n     * @param nn   The NameNode to use.\n     *\n     *             If the string given is 'default', the default NameNode\n     *             configuration will be used (from the XML configuration files)\n     *\n     *             If NULL is given, a LocalFileSystem will be created.\n     *\n     *             If the string starts with a protocol type such as file:// or\n     *             hdfs://, this protocol type will be used.  If not, the\n     *             hdfs:// protocol type will be used.\n     *\n     *             You may specify a NameNode port in the usual way by \n     *             passing a string of the format hdfs://<hostname>:<port>.\n     *             Alternately, you may set the port with\n     *             hdfsBuilderSetNameNodePort.  However, you must not pass the\n     *             port in two different ways.\n     */\n    LIBHDFS_EXTERNAL\n    void hdfsBuilderSetNameNode(struct hdfsBuilder *bld, const char *nn);\n\n    /**\n     * Set the port of the HDFS NameNode to connect to.\n     *\n     * @param bld The HDFS builder\n     * @param port The port.\n     */\n    LIBHDFS_EXTERNAL\n    void hdfsBuilderSetNameNodePort(struct hdfsBuilder *bld, tPort port);\n\n    /**\n     * Set the username to use when connecting to the HDFS cluster.\n     *\n     * @param bld The HDFS builder\n     * @param userName The user name.  The string will be shallow-copied.\n     */\n    LIBHDFS_EXTERNAL\n    void hdfsBuilderSetUserName(struct hdfsBuilder *bld, const char *userName);\n\n    /**\n     * Set the path to the Kerberos ticket cache to use when connecting to\n     * the HDFS cluster.\n     *\n     * @param bld The HDFS builder\n     * @param kerbTicketCachePath The Kerberos ticket cache path.  The string\n     *                            will be shallow-copied.\n     */\n    LIBHDFS_EXTERNAL\n    void hdfsBuilderSetKerbTicketCachePath(struct hdfsBuilder *bld,\n                                   const char *kerbTicketCachePath);\n\n    /**\n     * Free an HDFS builder.\n     *\n     * It is normally not necessary to call this function since\n     * hdfsBuilderConnect frees the builder.\n     *\n     * @param bld The HDFS builder\n     */\n    LIBHDFS_EXTERNAL\n    void hdfsFreeBuilder(struct hdfsBuilder *bld);\n\n    /**\n     * Set a configuration string for an HdfsBuilder.\n     *\n     * @param key      The key to set.\n     * @param val      The value, or NULL to set no value.\n     *                 This will be shallow-copied.  You are responsible for\n     *                 ensuring that it remains valid until the builder is\n     *                 freed.\n     *\n     * @return         0 on success; nonzero error code otherwise.\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsBuilderConfSetStr(struct hdfsBuilder *bld, const char *key,\n                              const char *val);\n\n    /**\n     * Get a configuration string.\n     *\n     * @param key      The key to find\n     * @param val      (out param) The value.  This will be set to NULL if the\n     *                 key isn't found.  You must free this string with\n     *                 hdfsConfStrFree.\n     *\n     * @return         0 on success; nonzero error code otherwise.\n     *                 Failure to find the key is not an error.\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsConfGetStr(const char *key, char **val);\n\n    /**\n     * Get a configuration integer.\n     *\n     * @param key      The key to find\n     * @param val      (out param) The value.  This will NOT be changed if the\n     *                 key isn't found.\n     *\n     * @return         0 on success; nonzero error code otherwise.\n     *                 Failure to find the key is not an error.\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsConfGetInt(const char *key, int32_t *val);\n\n    /**\n     * Free a configuration string found with hdfsConfGetStr. \n     *\n     * @param val      A configuration string obtained from hdfsConfGetStr\n     */\n    LIBHDFS_EXTERNAL\n    void hdfsConfStrFree(char *val);\n\n    /** \n     * hdfsDisconnect - Disconnect from the hdfs file system.\n     * Disconnect from hdfs.\n     * @param fs The configured filesystem handle.\n     * @return Returns 0 on success, -1 on error.\n     *         Even if there is an error, the resources associated with the\n     *         hdfsFS will be freed.\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsDisconnect(hdfsFS fs);\n        \n\n    /** \n     * hdfsOpenFile - Open a hdfs file in given mode.\n     * @param fs The configured filesystem handle.\n     * @param path The full path to the file.\n     * @param flags - an | of bits/fcntl.h file flags - supported flags are O_RDONLY, O_WRONLY (meaning create or overwrite i.e., implies O_TRUNCAT), \n     * O_WRONLY|O_APPEND. Other flags are generally ignored other than (O_RDWR || (O_EXCL & O_CREAT)) which return NULL and set errno equal ENOTSUP.\n     * @param bufferSize Size of buffer for read/write - pass 0 if you want\n     * to use the default configured values.\n     * @param replication Block replication - pass 0 if you want to use\n     * the default configured values.\n     * @param blocksize Size of block - pass 0 if you want to use the\n     * default configured values.\n     * @return Returns the handle to the open file or NULL on error.\n     */\n    LIBHDFS_EXTERNAL\n    hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags,\n                          int bufferSize, short replication, tSize blocksize);\n\n    /**\n     * hdfsTruncateFile - Truncate a hdfs file to given lenght.\n     * @param fs The configured filesystem handle.\n     * @param path The full path to the file.\n     * @param newlength The size the file is to be truncated to\n     * @return 1 if the file has been truncated to the desired newlength \n     *         and is immediately available to be reused for write operations \n     *         such as append.\n     *         0 if a background process of adjusting the length of the last \n     *         block has been started, and clients should wait for it to\n     *         complete before proceeding with further file updates.\n     *         -1 on error.\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsTruncateFile(hdfsFS fs, const char* path, tOffset newlength);\n\n    /**\n     * hdfsUnbufferFile - Reduce the buffering done on a file.\n     *\n     * @param file  The file to unbuffer.\n     * @return      0 on success\n     *              ENOTSUP if the file does not support unbuffering\n     *              Errno will also be set to this value.\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsUnbufferFile(hdfsFile file);\n\n    /** \n     * hdfsCloseFile - Close an open file. \n     * @param fs The configured filesystem handle.\n     * @param file The file handle.\n     * @return Returns 0 on success, -1 on error.  \n     *         On error, errno will be set appropriately.\n     *         If the hdfs file was valid, the memory associated with it will\n     *         be freed at the end of this call, even if there was an I/O\n     *         error.\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsCloseFile(hdfsFS fs, hdfsFile file);\n\n\n    /** \n     * hdfsExists - Checks if a given path exsits on the filesystem \n     * @param fs The configured filesystem handle.\n     * @param path The path to look for\n     * @return Returns 0 on success, -1 on error.  \n     */\n    LIBHDFS_EXTERNAL\n    int hdfsExists(hdfsFS fs, const char *path);\n\n\n    /** \n     * hdfsSeek - Seek to given offset in file. \n     * This works only for files opened in read-only mode. \n     * @param fs The configured filesystem handle.\n     * @param file The file handle.\n     * @param desiredPos Offset into the file to seek into.\n     * @return Returns 0 on success, -1 on error.  \n     */\n    LIBHDFS_EXTERNAL\n    int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos); \n\n\n    /** \n     * hdfsTell - Get the current offset in the file, in bytes.\n     * @param fs The configured filesystem handle.\n     * @param file The file handle.\n     * @return Current offset, -1 on error.\n     */\n    LIBHDFS_EXTERNAL\n    tOffset hdfsTell(hdfsFS fs, hdfsFile file);\n\n\n    /** \n     * hdfsRead - Read data from an open file.\n     * @param fs The configured filesystem handle.\n     * @param file The file handle.\n     * @param buffer The buffer to copy read bytes into.\n     * @param length The length of the buffer.\n     * @return      On success, a positive number indicating how many bytes\n     *              were read.\n     *              On end-of-file, 0.\n     *              On error, -1.  Errno will be set to the error code.\n     *              Just like the POSIX read function, hdfsRead will return -1\n     *              and set errno to EINTR if data is temporarily unavailable,\n     *              but we are not yet at the end of the file.\n     */\n    LIBHDFS_EXTERNAL\n    tSize hdfsRead(hdfsFS fs, hdfsFile file, void* buffer, tSize length);\n\n    /** \n     * hdfsPread - Positional read of data from an open file.\n     * @param fs The configured filesystem handle.\n     * @param file The file handle.\n     * @param position Position from which to read\n     * @param buffer The buffer to copy read bytes into.\n     * @param length The length of the buffer.\n     * @return      See hdfsRead\n     */\n    LIBHDFS_EXTERNAL\n    tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position,\n                    void* buffer, tSize length);\n\n\n    /** \n     * hdfsWrite - Write data into an open file.\n     * @param fs The configured filesystem handle.\n     * @param file The file handle.\n     * @param buffer The data.\n     * @param length The no. of bytes to write. \n     * @return Returns the number of bytes written, -1 on error.\n     */\n    LIBHDFS_EXTERNAL\n    tSize hdfsWrite(hdfsFS fs, hdfsFile file, const void* buffer,\n                    tSize length);\n\n\n    /** \n     * hdfsWrite - Flush the data. \n     * @param fs The configured filesystem handle.\n     * @param file The file handle.\n     * @return Returns 0 on success, -1 on error. \n     */\n    LIBHDFS_EXTERNAL\n    int hdfsFlush(hdfsFS fs, hdfsFile file);\n\n\n    /**\n     * hdfsHFlush - Flush out the data in client's user buffer. After the\n     * return of this call, new readers will see the data.\n     * @param fs configured filesystem handle\n     * @param file file handle\n     * @return 0 on success, -1 on error and sets errno\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsHFlush(hdfsFS fs, hdfsFile file);\n\n\n    /**\n     * hdfsHSync - Similar to posix fsync, Flush out the data in client's \n     * user buffer. all the way to the disk device (but the disk may have \n     * it in its cache).\n     * @param fs configured filesystem handle\n     * @param file file handle\n     * @return 0 on success, -1 on error and sets errno\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsHSync(hdfsFS fs, hdfsFile file);\n\n\n    /**\n     * hdfsAvailable - Number of bytes that can be read from this\n     * input stream without blocking.\n     * @param fs The configured filesystem handle.\n     * @param file The file handle.\n     * @return Returns available bytes; -1 on error. \n     */\n    LIBHDFS_EXTERNAL\n    int hdfsAvailable(hdfsFS fs, hdfsFile file);\n\n\n    /**\n     * hdfsCopy - Copy file from one filesystem to another.\n     * @param srcFS The handle to source filesystem.\n     * @param src The path of source file. \n     * @param dstFS The handle to destination filesystem.\n     * @param dst The path of destination file. \n     * @return Returns 0 on success, -1 on error. \n     */\n    LIBHDFS_EXTERNAL\n    int hdfsCopy(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);\n\n\n    /**\n     * hdfsMove - Move file from one filesystem to another.\n     * @param srcFS The handle to source filesystem.\n     * @param src The path of source file. \n     * @param dstFS The handle to destination filesystem.\n     * @param dst The path of destination file. \n     * @return Returns 0 on success, -1 on error. \n     */\n    LIBHDFS_EXTERNAL\n    int hdfsMove(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst);\n\n\n    /**\n     * hdfsDelete - Delete file. \n     * @param fs The configured filesystem handle.\n     * @param path The path of the file. \n     * @param recursive if path is a directory and set to \n     * non-zero, the directory is deleted else throws an exception. In\n     * case of a file the recursive argument is irrelevant.\n     * @return Returns 0 on success, -1 on error. \n     */\n    LIBHDFS_EXTERNAL\n    int hdfsDelete(hdfsFS fs, const char* path, int recursive);\n\n    /**\n     * hdfsRename - Rename file. \n     * @param fs The configured filesystem handle.\n     * @param oldPath The path of the source file. \n     * @param newPath The path of the destination file. \n     * @return Returns 0 on success, -1 on error. \n     */\n    LIBHDFS_EXTERNAL\n    int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath);\n\n\n    /** \n     * hdfsGetWorkingDirectory - Get the current working directory for\n     * the given filesystem.\n     * @param fs The configured filesystem handle.\n     * @param buffer The user-buffer to copy path of cwd into. \n     * @param bufferSize The length of user-buffer.\n     * @return Returns buffer, NULL on error.\n     */\n    LIBHDFS_EXTERNAL\n    char* hdfsGetWorkingDirectory(hdfsFS fs, char *buffer, size_t bufferSize);\n\n\n    /** \n     * hdfsSetWorkingDirectory - Set the working directory. All relative\n     * paths will be resolved relative to it.\n     * @param fs The configured filesystem handle.\n     * @param path The path of the new 'cwd'. \n     * @return Returns 0 on success, -1 on error. \n     */\n    LIBHDFS_EXTERNAL\n    int hdfsSetWorkingDirectory(hdfsFS fs, const char* path);\n\n\n    /** \n     * hdfsCreateDirectory - Make the given file and all non-existent\n     * parents into directories.\n     * @param fs The configured filesystem handle.\n     * @param path The path of the directory. \n     * @return Returns 0 on success, -1 on error. \n     */\n    LIBHDFS_EXTERNAL\n    int hdfsCreateDirectory(hdfsFS fs, const char* path);\n\n\n    /** \n     * hdfsSetReplication - Set the replication of the specified\n     * file to the supplied value\n     * @param fs The configured filesystem handle.\n     * @param path The path of the file. \n     * @return Returns 0 on success, -1 on error. \n     */\n    LIBHDFS_EXTERNAL\n    int hdfsSetReplication(hdfsFS fs, const char* path, int16_t replication);\n\n\n    /** \n     * hdfsFileInfo - Information about a file/directory.\n     */\n    typedef struct  {\n        tObjectKind mKind;   /* file or directory */\n        char *mName;         /* the name of the file */\n        tTime mLastMod;      /* the last modification time for the file in seconds */\n        tOffset mSize;       /* the size of the file in bytes */\n        short mReplication;    /* the count of replicas */\n        tOffset mBlockSize;  /* the block size for the file */\n        char *mOwner;        /* the owner of the file */\n        char *mGroup;        /* the group associated with the file */\n        short mPermissions;  /* the permissions associated with the file */\n        tTime mLastAccess;    /* the last access time for the file in seconds */\n    } hdfsFileInfo;\n\n\n    /** \n     * hdfsListDirectory - Get list of files/directories for a given\n     * directory-path. hdfsFreeFileInfo should be called to deallocate memory. \n     * @param fs The configured filesystem handle.\n     * @param path The path of the directory. \n     * @param numEntries Set to the number of files/directories in path.\n     * @return Returns a dynamically-allocated array of hdfsFileInfo\n     * objects; NULL on error or empty directory.\n     * errno is set to non-zero on error or zero on success.\n     */\n    LIBHDFS_EXTERNAL\n    hdfsFileInfo *hdfsListDirectory(hdfsFS fs, const char* path,\n                                    int *numEntries);\n\n\n    /** \n     * hdfsGetPathInfo - Get information about a path as a (dynamically\n     * allocated) single hdfsFileInfo struct. hdfsFreeFileInfo should be\n     * called when the pointer is no longer needed.\n     * @param fs The configured filesystem handle.\n     * @param path The path of the file. \n     * @return Returns a dynamically-allocated hdfsFileInfo object;\n     * NULL on error.\n     */\n    LIBHDFS_EXTERNAL\n    hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char* path);\n\n\n    /** \n     * hdfsFreeFileInfo - Free up the hdfsFileInfo array (including fields) \n     * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo\n     * objects.\n     * @param numEntries The size of the array.\n     */\n    LIBHDFS_EXTERNAL\n    void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries);\n\n    /**\n     * hdfsFileIsEncrypted: determine if a file is encrypted based on its\n     * hdfsFileInfo.\n     * @return -1 if there was an error (errno will be set), 0 if the file is\n     *         not encrypted, 1 if the file is encrypted.\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsFileIsEncrypted(hdfsFileInfo *hdfsFileInfo);\n\n\n    /** \n     * hdfsGetHosts - Get hostnames where a particular block (determined by\n     * pos & blocksize) of a file is stored. The last element in the array\n     * is NULL. Due to replication, a single block could be present on\n     * multiple hosts.\n     * @param fs The configured filesystem handle.\n     * @param path The path of the file. \n     * @param start The start of the block.\n     * @param length The length of the block.\n     * @return Returns a dynamically-allocated 2-d array of blocks-hosts;\n     * NULL on error.\n     */\n    LIBHDFS_EXTERNAL\n    char*** hdfsGetHosts(hdfsFS fs, const char* path, \n            tOffset start, tOffset length);\n\n\n    /** \n     * hdfsFreeHosts - Free up the structure returned by hdfsGetHosts\n     * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo\n     * objects.\n     * @param numEntries The size of the array.\n     */\n    LIBHDFS_EXTERNAL\n    void hdfsFreeHosts(char ***blockHosts);\n\n\n    /** \n     * hdfsGetDefaultBlockSize - Get the default blocksize.\n     *\n     * @param fs            The configured filesystem handle.\n     * @deprecated          Use hdfsGetDefaultBlockSizeAtPath instead.\n     *\n     * @return              Returns the default blocksize, or -1 on error.\n     */\n    LIBHDFS_EXTERNAL\n    tOffset hdfsGetDefaultBlockSize(hdfsFS fs);\n\n\n    /** \n     * hdfsGetDefaultBlockSizeAtPath - Get the default blocksize at the\n     * filesystem indicated by a given path.\n     *\n     * @param fs            The configured filesystem handle.\n     * @param path          The given path will be used to locate the actual\n     *                      filesystem.  The full path does not have to exist.\n     *\n     * @return              Returns the default blocksize, or -1 on error.\n     */\n    LIBHDFS_EXTERNAL\n    tOffset hdfsGetDefaultBlockSizeAtPath(hdfsFS fs, const char *path);\n\n\n    /** \n     * hdfsGetCapacity - Return the raw capacity of the filesystem.  \n     * @param fs The configured filesystem handle.\n     * @return Returns the raw-capacity; -1 on error. \n     */\n    LIBHDFS_EXTERNAL\n    tOffset hdfsGetCapacity(hdfsFS fs);\n\n\n    /** \n     * hdfsGetUsed - Return the total raw size of all files in the filesystem.\n     * @param fs The configured filesystem handle.\n     * @return Returns the total-size; -1 on error. \n     */\n    LIBHDFS_EXTERNAL\n    tOffset hdfsGetUsed(hdfsFS fs);\n\n    /** \n     * Change the user and/or group of a file or directory.\n     *\n     * @param fs            The configured filesystem handle.\n     * @param path          the path to the file or directory\n     * @param owner         User string.  Set to NULL for 'no change'\n     * @param group         Group string.  Set to NULL for 'no change'\n     * @return              0 on success else -1\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsChown(hdfsFS fs, const char* path, const char *owner,\n                  const char *group);\n\n    /** \n     * hdfsChmod\n     * @param fs The configured filesystem handle.\n     * @param path the path to the file or directory\n     * @param mode the bitmask to set it to\n     * @return 0 on success else -1\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsChmod(hdfsFS fs, const char* path, short mode);\n\n    /** \n     * hdfsUtime\n     * @param fs The configured filesystem handle.\n     * @param path the path to the file or directory\n     * @param mtime new modification time or -1 for no change\n     * @param atime new access time or -1 for no change\n     * @return 0 on success else -1\n     */\n    LIBHDFS_EXTERNAL\n    int hdfsUtime(hdfsFS fs, const char* path, tTime mtime, tTime atime);\n\n    /**\n     * Allocate a zero-copy options structure.\n     *\n     * You must free all options structures allocated with this function using\n     * hadoopRzOptionsFree.\n     *\n     * @return            A zero-copy options structure, or NULL if one could\n     *                    not be allocated.  If NULL is returned, errno will\n     *                    contain the error number.\n     */\n    LIBHDFS_EXTERNAL\n    struct hadoopRzOptions *hadoopRzOptionsAlloc(void);\n\n    /**\n     * Determine whether we should skip checksums in read0.\n     *\n     * @param opts        The options structure.\n     * @param skip        Nonzero to skip checksums sometimes; zero to always\n     *                    check them.\n     *\n     * @return            0 on success; -1 plus errno on failure.\n     */\n    LIBHDFS_EXTERNAL\n    int hadoopRzOptionsSetSkipChecksum(\n            struct hadoopRzOptions *opts, int skip);\n\n    /**\n     * Set the ByteBufferPool to use with read0.\n     *\n     * @param opts        The options structure.\n     * @param className   If this is NULL, we will not use any\n     *                    ByteBufferPool.  If this is non-NULL, it will be\n     *                    treated as the name of the pool class to use.\n     *                    For example, you can use\n     *                    ELASTIC_BYTE_BUFFER_POOL_CLASS.\n     *\n     * @return            0 if the ByteBufferPool class was found and\n     *                    instantiated;\n     *                    -1 plus errno otherwise.\n     */\n    LIBHDFS_EXTERNAL\n    int hadoopRzOptionsSetByteBufferPool(\n            struct hadoopRzOptions *opts, const char *className);\n\n    /**\n     * Free a hadoopRzOptionsFree structure.\n     *\n     * @param opts        The options structure to free.\n     *                    Any associated ByteBufferPool will also be freed.\n     */\n    LIBHDFS_EXTERNAL\n    void hadoopRzOptionsFree(struct hadoopRzOptions *opts);\n\n    /**\n     * Perform a byte buffer read.\n     * If possible, this will be a zero-copy (mmap) read.\n     *\n     * @param file       The file to read from.\n     * @param opts       An options structure created by hadoopRzOptionsAlloc.\n     * @param maxLength  The maximum length to read.  We may read fewer bytes\n     *                   than this length.\n     *\n     * @return           On success, we will return a new hadoopRzBuffer.\n     *                   This buffer will continue to be valid and readable\n     *                   until it is released by readZeroBufferFree.  Failure to\n     *                   release a buffer will lead to a memory leak.\n     *                   You can access the data within the hadoopRzBuffer with\n     *                   hadoopRzBufferGet.  If you have reached EOF, the data\n     *                   within the hadoopRzBuffer will be NULL.  You must still\n     *                   free hadoopRzBuffer instances containing NULL.\n     *\n     *                   On failure, we will return NULL plus an errno code.\n     *                   errno = EOPNOTSUPP indicates that we could not do a\n     *                   zero-copy read, and there was no ByteBufferPool\n     *                   supplied.\n     */\n    LIBHDFS_EXTERNAL\n    struct hadoopRzBuffer* hadoopReadZero(hdfsFile file,\n            struct hadoopRzOptions *opts, int32_t maxLength);\n\n    /**\n     * Determine the length of the buffer returned from readZero.\n     *\n     * @param buffer     a buffer returned from readZero.\n     * @return           the length of the buffer.\n     */\n    LIBHDFS_EXTERNAL\n    int32_t hadoopRzBufferLength(const struct hadoopRzBuffer *buffer);\n\n    /**\n     * Get a pointer to the raw buffer returned from readZero.\n     *\n     * To find out how many bytes this buffer contains, call\n     * hadoopRzBufferLength.\n     *\n     * @param buffer     a buffer returned from readZero.\n     * @return           a pointer to the start of the buffer.  This will be\n     *                   NULL when end-of-file has been reached.\n     */\n    LIBHDFS_EXTERNAL\n    const void *hadoopRzBufferGet(const struct hadoopRzBuffer *buffer);\n\n    /**\n     * Release a buffer obtained through readZero.\n     *\n     * @param file       The hdfs stream that created this buffer.  This must be\n     *                   the same stream you called hadoopReadZero on.\n     * @param buffer     The buffer to release.\n     */\n    LIBHDFS_EXTERNAL\n    void hadoopRzBufferFree(hdfsFile file, struct hadoopRzBuffer *buffer);\n\n#ifdef __cplusplus\n}\n#endif\n\n#undef LIBHDFS_EXTERNAL\n#endif /*LIBHDFS_HDFS_H*/\n\n/**\n * vim: ts=4: sw=4: et\n */\n"
  },
  {
    "path": "src/libhdfs/jni_helper.c",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#include \"config.h\"\n#include \"exception.h\"\n#include \"jni_helper.h\"\n#include \"platform.h\"\n#include \"common/htable.h\"\n#include \"os/mutexes.h\"\n#include \"os/thread_local_storage.h\"\n\n#include <stdio.h> \n#include <string.h> \n\nstatic struct htable *gClassRefHTable = NULL;\n\n/** The Native return types that methods could return */\n#define JVOID         'V'\n#define JOBJECT       'L'\n#define JARRAYOBJECT  '['\n#define JBOOLEAN      'Z'\n#define JBYTE         'B'\n#define JCHAR         'C'\n#define JSHORT        'S'\n#define JINT          'I'\n#define JLONG         'J'\n#define JFLOAT        'F'\n#define JDOUBLE       'D'\n\n\n/**\n * MAX_HASH_TABLE_ELEM: The maximum no. of entries in the hashtable.\n * It's set to 4096 to account for (classNames + No. of threads)\n */\n#define MAX_HASH_TABLE_ELEM 4096\n\n/**\n * Length of buffer for retrieving created JVMs.  (We only ever create one.)\n */\n#define VM_BUF_LENGTH 1\n\nvoid destroyLocalReference(JNIEnv *env, jobject jObject)\n{\n  if (jObject)\n    (*env)->DeleteLocalRef(env, jObject);\n}\n\nstatic jthrowable validateMethodType(JNIEnv *env, MethType methType)\n{\n    if (methType != STATIC && methType != INSTANCE) {\n        return newRuntimeError(env, \"validateMethodType(methType=%d): \"\n            \"illegal method type.\\n\", methType);\n    }\n    return NULL;\n}\n\njthrowable newJavaStr(JNIEnv *env, const char *str, jstring *out)\n{\n    jstring jstr;\n\n    if (!str) {\n        /* Can't pass NULL to NewStringUTF: the result would be\n         * implementation-defined. */\n        *out = NULL;\n        return NULL;\n    }\n    jstr = (*env)->NewStringUTF(env, str);\n    if (!jstr) {\n        /* If NewStringUTF returns NULL, an exception has been thrown,\n         * which we need to handle.  Probaly an OOM. */\n        return getPendingExceptionAndClear(env);\n    }\n    *out = jstr;\n    return NULL;\n}\n\njthrowable newCStr(JNIEnv *env, jstring jstr, char **out)\n{\n    const char *tmp;\n\n    if (!jstr) {\n        *out = NULL;\n        return NULL;\n    }\n    tmp = (*env)->GetStringUTFChars(env, jstr, NULL);\n    if (!tmp) {\n        return getPendingExceptionAndClear(env);\n    }\n    *out = strdup(tmp);\n    (*env)->ReleaseStringUTFChars(env, jstr, tmp);\n    return NULL;\n}\n\njthrowable invokeMethod(JNIEnv *env, jvalue *retval, MethType methType,\n                 jobject instObj, const char *className,\n                 const char *methName, const char *methSignature, ...)\n{\n    va_list args;\n    jclass cls;\n    jmethodID mid;\n    jthrowable jthr;\n    const char *str; \n    char returnType;\n    \n    jthr = validateMethodType(env, methType);\n    if (jthr)\n        return jthr;\n    jthr = globalClassReference(className, env, &cls);\n    if (jthr)\n        return jthr;\n    jthr = methodIdFromClass(className, methName, methSignature, \n                            methType, env, &mid);\n    if (jthr)\n        return jthr;\n    str = methSignature;\n    while (*str != ')') str++;\n    str++;\n    returnType = *str;\n    va_start(args, methSignature);\n    if (returnType == JOBJECT || returnType == JARRAYOBJECT) {\n        jobject jobj = NULL;\n        if (methType == STATIC) {\n            jobj = (*env)->CallStaticObjectMethodV(env, cls, mid, args);\n        }\n        else if (methType == INSTANCE) {\n            jobj = (*env)->CallObjectMethodV(env, instObj, mid, args);\n        }\n        retval->l = jobj;\n    }\n    else if (returnType == JVOID) {\n        if (methType == STATIC) {\n            (*env)->CallStaticVoidMethodV(env, cls, mid, args);\n        }\n        else if (methType == INSTANCE) {\n            (*env)->CallVoidMethodV(env, instObj, mid, args);\n        }\n    }\n    else if (returnType == JBOOLEAN) {\n        jboolean jbool = 0;\n        if (methType == STATIC) {\n            jbool = (*env)->CallStaticBooleanMethodV(env, cls, mid, args);\n        }\n        else if (methType == INSTANCE) {\n            jbool = (*env)->CallBooleanMethodV(env, instObj, mid, args);\n        }\n        retval->z = jbool;\n    }\n    else if (returnType == JSHORT) {\n        jshort js = 0;\n        if (methType == STATIC) {\n            js = (*env)->CallStaticShortMethodV(env, cls, mid, args);\n        }\n        else if (methType == INSTANCE) {\n            js = (*env)->CallShortMethodV(env, instObj, mid, args);\n        }\n        retval->s = js;\n    }\n    else if (returnType == JLONG) {\n        jlong jl = -1;\n        if (methType == STATIC) {\n            jl = (*env)->CallStaticLongMethodV(env, cls, mid, args);\n        }\n        else if (methType == INSTANCE) {\n            jl = (*env)->CallLongMethodV(env, instObj, mid, args);\n        }\n        retval->j = jl;\n    }\n    else if (returnType == JINT) {\n        jint ji = -1;\n        if (methType == STATIC) {\n            ji = (*env)->CallStaticIntMethodV(env, cls, mid, args);\n        }\n        else if (methType == INSTANCE) {\n            ji = (*env)->CallIntMethodV(env, instObj, mid, args);\n        }\n        retval->i = ji;\n    }\n    va_end(args);\n\n    jthr = (*env)->ExceptionOccurred(env);\n    if (jthr) {\n        (*env)->ExceptionClear(env);\n        return jthr;\n    }\n    return NULL;\n}\n\njthrowable constructNewObjectOfClass(JNIEnv *env, jobject *out, const char *className, \n                                  const char *ctorSignature, ...)\n{\n    va_list args;\n    jclass cls;\n    jmethodID mid; \n    jobject jobj;\n    jthrowable jthr;\n\n    jthr = globalClassReference(className, env, &cls);\n    if (jthr)\n        return jthr;\n    jthr = methodIdFromClass(className, \"<init>\", ctorSignature, \n                            INSTANCE, env, &mid);\n    if (jthr)\n        return jthr;\n    va_start(args, ctorSignature);\n    jobj = (*env)->NewObjectV(env, cls, mid, args);\n    va_end(args);\n    if (!jobj)\n        return getPendingExceptionAndClear(env);\n    *out = jobj;\n    return NULL;\n}\n\n\njthrowable methodIdFromClass(const char *className, const char *methName, \n                            const char *methSignature, MethType methType, \n                            JNIEnv *env, jmethodID *out)\n{\n    jclass cls;\n    jthrowable jthr;\n    jmethodID mid = 0;\n\n    jthr = globalClassReference(className, env, &cls);\n    if (jthr)\n        return jthr;\n    jthr = validateMethodType(env, methType);\n    if (jthr)\n        return jthr;\n    if (methType == STATIC) {\n        mid = (*env)->GetStaticMethodID(env, cls, methName, methSignature);\n    }\n    else if (methType == INSTANCE) {\n        mid = (*env)->GetMethodID(env, cls, methName, methSignature);\n    }\n    if (mid == NULL) {\n        fprintf(stderr, \"could not find method %s from class %s with \"\n            \"signature %s\\n\", methName, className, methSignature);\n        return getPendingExceptionAndClear(env);\n    }\n    *out = mid;\n    return NULL;\n}\n\njthrowable globalClassReference(const char *className, JNIEnv *env, jclass *out)\n{\n    jthrowable jthr = NULL;\n    jclass local_clazz = NULL;\n    jclass clazz = NULL;\n    int ret;\n\n    mutexLock(&hdfsHashMutex);\n    if (!gClassRefHTable) {\n        gClassRefHTable = htable_alloc(MAX_HASH_TABLE_ELEM, ht_hash_string,\n            ht_compare_string);\n        if (!gClassRefHTable) {\n            jthr = newRuntimeError(env, \"htable_alloc failed\\n\");\n            goto done;\n        }\n    }\n    clazz = htable_get(gClassRefHTable, className);\n    if (clazz) {\n        *out = clazz;\n        goto done;\n    }\n    local_clazz = (*env)->FindClass(env,className);\n    if (!local_clazz) {\n        jthr = getPendingExceptionAndClear(env);\n        goto done;\n    }\n    clazz = (*env)->NewGlobalRef(env, local_clazz);\n    if (!clazz) {\n        jthr = getPendingExceptionAndClear(env);\n        goto done;\n    }\n    ret = htable_put(gClassRefHTable, (void*)className, clazz);\n    if (ret) {\n        jthr = newRuntimeError(env, \"htable_put failed with error \"\n                               \"code %d\\n\", ret);\n        goto done;\n    }\n    *out = clazz;\n    jthr = NULL;\ndone:\n    mutexUnlock(&hdfsHashMutex);\n    (*env)->DeleteLocalRef(env, local_clazz);\n    if (jthr && clazz) {\n        (*env)->DeleteGlobalRef(env, clazz);\n    }\n    return jthr;\n}\n\njthrowable classNameOfObject(jobject jobj, JNIEnv *env, char **name)\n{\n    jthrowable jthr;\n    jclass cls, clsClass = NULL;\n    jmethodID mid;\n    jstring str = NULL;\n    const char *cstr = NULL;\n    char *newstr;\n\n    cls = (*env)->GetObjectClass(env, jobj);\n    if (cls == NULL) {\n        jthr = getPendingExceptionAndClear(env);\n        goto done;\n    }\n    clsClass = (*env)->FindClass(env, \"java/lang/Class\");\n    if (clsClass == NULL) {\n        jthr = getPendingExceptionAndClear(env);\n        goto done;\n    }\n    mid = (*env)->GetMethodID(env, clsClass, \"getName\", \"()Ljava/lang/String;\");\n    if (mid == NULL) {\n        jthr = getPendingExceptionAndClear(env);\n        goto done;\n    }\n    str = (*env)->CallObjectMethod(env, cls, mid);\n    if (str == NULL) {\n        jthr = getPendingExceptionAndClear(env);\n        goto done;\n    }\n    cstr = (*env)->GetStringUTFChars(env, str, NULL);\n    if (!cstr) {\n        jthr = getPendingExceptionAndClear(env);\n        goto done;\n    }\n    newstr = strdup(cstr);\n    if (newstr == NULL) {\n        jthr = newRuntimeError(env, \"classNameOfObject: out of memory\");\n        goto done;\n    }\n    *name = newstr;\n    jthr = NULL;\n\ndone:\n    destroyLocalReference(env, cls);\n    destroyLocalReference(env, clsClass);\n    if (str) {\n        if (cstr)\n            (*env)->ReleaseStringUTFChars(env, str, cstr);\n        (*env)->DeleteLocalRef(env, str);\n    }\n    return jthr;\n}\n\n\n/**\n * Get the global JNI environemnt.\n *\n * We only have to create the JVM once.  After that, we can use it in\n * every thread.  You must be holding the jvmMutex when you call this\n * function.\n *\n * @return          The JNIEnv on success; error code otherwise\n */\nstatic JNIEnv* getGlobalJNIEnv(void)\n{\n    JavaVM* vmBuf[VM_BUF_LENGTH]; \n    JNIEnv *env;\n    jint rv = 0; \n    jint noVMs = 0;\n    jthrowable jthr;\n    char *hadoopClassPath;\n    const char *hadoopClassPathVMArg = \"-Djava.class.path=\";\n    size_t optHadoopClassPathLen;\n    char *optHadoopClassPath;\n    int noArgs = 1;\n    char *hadoopJvmArgs;\n    char jvmArgDelims[] = \" \";\n    char *str, *token, *savePtr;\n    JavaVMInitArgs vm_args;\n    JavaVM *vm;\n    JavaVMOption *options;\n\n    rv = JNI_GetCreatedJavaVMs(&(vmBuf[0]), VM_BUF_LENGTH, &noVMs);\n    if (rv != 0) {\n        fprintf(stderr, \"JNI_GetCreatedJavaVMs failed with error: %d\\n\", rv);\n        return NULL;\n    }\n\n    if (noVMs == 0) {\n        //Get the environment variables for initializing the JVM\n        hadoopClassPath = getenv(\"CLASSPATH\");\n        if (hadoopClassPath == NULL) {\n            fprintf(stderr, \"Environment variable CLASSPATH not set!\\n\");\n            return NULL;\n        } \n        optHadoopClassPathLen = strlen(hadoopClassPath) + \n          strlen(hadoopClassPathVMArg) + 1;\n        optHadoopClassPath = malloc(sizeof(char)*optHadoopClassPathLen);\n        snprintf(optHadoopClassPath, optHadoopClassPathLen,\n                \"%s%s\", hadoopClassPathVMArg, hadoopClassPath);\n\n        // Determine the # of LIBHDFS_OPTS args\n        hadoopJvmArgs = getenv(\"LIBHDFS_OPTS\");\n        if (hadoopJvmArgs != NULL)  {\n          hadoopJvmArgs = strdup(hadoopJvmArgs);\n          for (noArgs = 1, str = hadoopJvmArgs; ; noArgs++, str = NULL) {\n            token = strtok_r(str, jvmArgDelims, &savePtr);\n            if (NULL == token) {\n              break;\n            }\n          }\n          free(hadoopJvmArgs);\n        }\n\n        // Now that we know the # args, populate the options array\n        options = calloc(noArgs, sizeof(JavaVMOption));\n        if (!options) {\n          fputs(\"Call to calloc failed\\n\", stderr);\n          free(optHadoopClassPath);\n          return NULL;\n        }\n        options[0].optionString = optHadoopClassPath;\n        hadoopJvmArgs = getenv(\"LIBHDFS_OPTS\");\n\tif (hadoopJvmArgs != NULL)  {\n          hadoopJvmArgs = strdup(hadoopJvmArgs);\n          for (noArgs = 1, str = hadoopJvmArgs; ; noArgs++, str = NULL) {\n            token = strtok_r(str, jvmArgDelims, &savePtr);\n            if (NULL == token) {\n              break;\n            }\n            options[noArgs].optionString = token;\n          }\n        }\n\n        //Create the VM\n        vm_args.version = JNI_VERSION_1_2;\n        vm_args.options = options;\n        vm_args.nOptions = noArgs; \n        vm_args.ignoreUnrecognized = 1;\n\n        rv = JNI_CreateJavaVM(&vm, (void*)&env, &vm_args);\n\n        if (hadoopJvmArgs != NULL)  {\n          free(hadoopJvmArgs);\n        }\n        free(optHadoopClassPath);\n        free(options);\n\n        if (rv != 0) {\n            fprintf(stderr, \"Call to JNI_CreateJavaVM failed \"\n                    \"with error: %d\\n\", rv);\n            return NULL;\n        }\n        jthr = invokeMethod(env, NULL, STATIC, NULL,\n                         \"org/apache/hadoop/fs/FileSystem\",\n                         \"loadFileSystems\", \"()V\");\n        if (jthr) {\n            printExceptionAndFree(env, jthr, PRINT_EXC_ALL, \"loadFileSystems\");\n        }\n    }\n    else {\n        //Attach this thread to the VM\n        vm = vmBuf[0];\n        rv = (*vm)->AttachCurrentThread(vm, (void*)&env, 0);\n        if (rv != 0) {\n            fprintf(stderr, \"Call to AttachCurrentThread \"\n                    \"failed with error: %d\\n\", rv);\n            return NULL;\n        }\n    }\n\n    return env;\n}\n\n/**\n * getJNIEnv: A helper function to get the JNIEnv* for the given thread.\n * If no JVM exists, then one will be created. JVM command line arguments\n * are obtained from the LIBHDFS_OPTS environment variable.\n *\n * Implementation note: we rely on POSIX thread-local storage (tls).\n * This allows us to associate a destructor function with each thread, that\n * will detach the thread from the Java VM when the thread terminates.  If we\n * failt to do this, it will cause a memory leak.\n *\n * However, POSIX TLS is not the most efficient way to do things.  It requires a\n * key to be initialized before it can be used.  Since we don't know if this key\n * is initialized at the start of this function, we have to lock a mutex first\n * and check.  Luckily, most operating systems support the more efficient\n * __thread construct, which is initialized by the linker.\n *\n * @param: None.\n * @return The JNIEnv* corresponding to the thread.\n */\nJNIEnv* getJNIEnv(void)\n{\n    JNIEnv *env;\n    THREAD_LOCAL_STORAGE_GET_QUICK();\n    mutexLock(&jvmMutex);\n    if (threadLocalStorageGet(&env)) {\n      mutexUnlock(&jvmMutex);\n      return NULL;\n    }\n    if (env) {\n      mutexUnlock(&jvmMutex);\n      return env;\n    }\n\n    env = getGlobalJNIEnv();\n    mutexUnlock(&jvmMutex);\n    if (!env) {\n      fprintf(stderr, \"getJNIEnv: getGlobalJNIEnv failed\\n\");\n      return NULL;\n    }\n    if (threadLocalStorageSet(env)) {\n      return NULL;\n    }\n    THREAD_LOCAL_STORAGE_SET_QUICK(env);\n    return env;\n}\n\nint javaObjectIsOfClass(JNIEnv *env, jobject obj, const char *name)\n{\n    jclass clazz;\n    int ret;\n\n    clazz = (*env)->FindClass(env, name);\n    if (!clazz) {\n        printPendingExceptionAndFree(env, PRINT_EXC_ALL,\n            \"javaObjectIsOfClass(%s)\", name);\n        return -1;\n    }\n    ret = (*env)->IsInstanceOf(env, obj, clazz);\n    (*env)->DeleteLocalRef(env, clazz);\n    return ret == JNI_TRUE ? 1 : 0;\n}\n\njthrowable hadoopConfSetStr(JNIEnv *env, jobject jConfiguration,\n        const char *key, const char *value)\n{\n    jthrowable jthr;\n    jstring jkey = NULL, jvalue = NULL;\n\n    jthr = newJavaStr(env, key, &jkey);\n    if (jthr)\n        goto done;\n    jthr = newJavaStr(env, value, &jvalue);\n    if (jthr)\n        goto done;\n    jthr = invokeMethod(env, NULL, INSTANCE, jConfiguration,\n            \"org/apache/hadoop/conf/Configuration\", \"set\", \n            \"(Ljava/lang/String;Ljava/lang/String;)V\",\n            jkey, jvalue);\n    if (jthr)\n        goto done;\ndone:\n    (*env)->DeleteLocalRef(env, jkey);\n    (*env)->DeleteLocalRef(env, jvalue);\n    return jthr;\n}\n\njthrowable fetchEnumInstance(JNIEnv *env, const char *className,\n                         const char *valueName, jobject *out)\n{\n    jclass clazz;\n    jfieldID fieldId;\n    jobject jEnum;\n    char prettyClass[256];\n\n    clazz = (*env)->FindClass(env, className);\n    if (!clazz) {\n        return newRuntimeError(env, \"fetchEnum(%s, %s): failed to find class.\",\n                className, valueName);\n    }\n    if (snprintf(prettyClass, sizeof(prettyClass), \"L%s;\", className)\n          >= sizeof(prettyClass)) {\n        return newRuntimeError(env, \"fetchEnum(%s, %s): class name too long.\",\n                className, valueName);\n    }\n    fieldId = (*env)->GetStaticFieldID(env, clazz, valueName, prettyClass);\n    if (!fieldId) {\n        return getPendingExceptionAndClear(env);\n    }\n    jEnum = (*env)->GetStaticObjectField(env, clazz, fieldId);\n    if (!jEnum) {\n        return getPendingExceptionAndClear(env);\n    }\n    *out = jEnum;\n    return NULL;\n}\n\n"
  },
  {
    "path": "src/libhdfs/jni_helper.h",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#ifndef LIBHDFS_JNI_HELPER_H\n#define LIBHDFS_JNI_HELPER_H\n\n#include <jni.h>\n#include <stdio.h>\n\n#include <stdlib.h>\n#include <stdarg.h>\n#include <errno.h>\n\n#define PATH_SEPARATOR ':'\n\n\n/** Denote the method we want to invoke as STATIC or INSTANCE */\ntypedef enum {\n    STATIC,\n    INSTANCE\n} MethType;\n\n/**\n * Create a new malloc'ed C string from a Java string.\n *\n * @param env       The JNI environment\n * @param jstr      The Java string\n * @param out       (out param) the malloc'ed C string\n *\n * @return          NULL on success; the exception otherwise\n */\njthrowable newCStr(JNIEnv *env, jstring jstr, char **out);\n\n/**\n * Create a new Java string from a C string.\n *\n * @param env       The JNI environment\n * @param str       The C string\n * @param out       (out param) the java string\n *\n * @return          NULL on success; the exception otherwise\n */\njthrowable newJavaStr(JNIEnv *env, const char *str, jstring *out);\n\n/**\n * Helper function to destroy a local reference of java.lang.Object\n * @param env: The JNIEnv pointer. \n * @param jFile: The local reference of java.lang.Object object\n * @return None.\n */\nvoid destroyLocalReference(JNIEnv *env, jobject jObject);\n\n/** invokeMethod: Invoke a Static or Instance method.\n * className: Name of the class where the method can be found\n * methName: Name of the method\n * methSignature: the signature of the method \"(arg-types)ret-type\"\n * methType: The type of the method (STATIC or INSTANCE)\n * instObj: Required if the methType is INSTANCE. The object to invoke\n   the method on.\n * env: The JNIEnv pointer\n * retval: The pointer to a union type which will contain the result of the\n   method invocation, e.g. if the method returns an Object, retval will be\n   set to that, if the method returns boolean, retval will be set to the\n   value (JNI_TRUE or JNI_FALSE), etc.\n * exc: If the methods throws any exception, this will contain the reference\n * Arguments (the method arguments) must be passed after methSignature\n * RETURNS: -1 on error and 0 on success. If -1 is returned, exc will have \n   a valid exception reference, and the result stored at retval is undefined.\n */\njthrowable invokeMethod(JNIEnv *env, jvalue *retval, MethType methType,\n                 jobject instObj, const char *className, const char *methName, \n                 const char *methSignature, ...);\n\njthrowable constructNewObjectOfClass(JNIEnv *env, jobject *out, const char *className, \n                                  const char *ctorSignature, ...);\n\njthrowable methodIdFromClass(const char *className, const char *methName, \n                            const char *methSignature, MethType methType, \n                            JNIEnv *env, jmethodID *out);\n\njthrowable globalClassReference(const char *className, JNIEnv *env, jclass *out);\n\n/** classNameOfObject: Get an object's class name.\n * @param jobj: The object.\n * @param env: The JNIEnv pointer.\n * @param name: (out param) On success, will contain a string containing the\n * class name. This string must be freed by the caller.\n * @return NULL on success, or the exception\n */\njthrowable classNameOfObject(jobject jobj, JNIEnv *env, char **name);\n\n/** getJNIEnv: A helper function to get the JNIEnv* for the given thread.\n * If no JVM exists, then one will be created. JVM command line arguments\n * are obtained from the LIBHDFS_OPTS environment variable.\n * @param: None.\n * @return The JNIEnv* corresponding to the thread.\n * */\nJNIEnv* getJNIEnv(void);\n\n/**\n * Figure out if a Java object is an instance of a particular class.\n *\n * @param env  The Java environment.\n * @param obj  The object to check.\n * @param name The class name to check.\n *\n * @return     -1 if we failed to find the referenced class name.\n *             0 if the object is not of the given class.\n *             1 if the object is of the given class.\n */\nint javaObjectIsOfClass(JNIEnv *env, jobject obj, const char *name);\n\n/**\n * Set a value in a configuration object.\n *\n * @param env               The JNI environment\n * @param jConfiguration    The configuration object to modify\n * @param key               The key to modify\n * @param value             The value to set the key to\n *\n * @return                  NULL on success; exception otherwise\n */\njthrowable hadoopConfSetStr(JNIEnv *env, jobject jConfiguration,\n        const char *key, const char *value);\n\n/**\n * Fetch an instance of an Enum.\n *\n * @param env               The JNI environment.\n * @param className         The enum class name.\n * @param valueName         The name of the enum value\n * @param out               (out param) on success, a local reference to an\n *                          instance of the enum object.  (Since Java enums are\n *                          singletons, this is also the only instance.)\n *\n * @return                  NULL on success; exception otherwise\n */\njthrowable fetchEnumInstance(JNIEnv *env, const char *className,\n                             const char *valueName, jobject *out);\n\n#endif /*LIBHDFS_JNI_HELPER_H*/\n\n/**\n * vim: ts=4: sw=4: et:\n */\n\n"
  },
  {
    "path": "src/libhdfs/os/mutexes.h",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#ifndef LIBHDFS_MUTEXES_H\n#define LIBHDFS_MUTEXES_H\n\n/*\n * Defines abstraction over platform-specific mutexes.  libhdfs has no formal\n * initialization function that users would call from a single-threaded context\n * to initialize the library.  This creates a challenge for bootstrapping the\n * mutexes.  To address this, all required mutexes are pre-defined here with\n * external storage.  Platform-specific implementations must guarantee that the\n * mutexes are initialized via static initialization.\n */\n\n#include \"platform.h\"\n\n/** Mutex protecting the class reference hash table. */\nextern mutex hdfsHashMutex;\n\n/** Mutex protecting singleton JVM instance. */\nextern mutex jvmMutex;\n\n/**\n * Locks a mutex.\n *\n * @param m mutex\n * @return 0 if successful, non-zero otherwise\n */\nint mutexLock(mutex *m);\n\n/**\n * Unlocks a mutex.\n *\n * @param m mutex\n * @return 0 if successful, non-zero otherwise\n */\nint mutexUnlock(mutex *m);\n\n#endif\n"
  },
  {
    "path": "src/libhdfs/os/posix/mutexes.c",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#include \"os/mutexes.h\"\n\n#include <pthread.h>\n#include <stdio.h>\n\nmutex hdfsHashMutex = PTHREAD_MUTEX_INITIALIZER;\nmutex jvmMutex = PTHREAD_MUTEX_INITIALIZER;\n\nint mutexLock(mutex *m) {\n  int ret = pthread_mutex_lock(m);\n  if (ret) {\n    fprintf(stderr, \"mutexLock: pthread_mutex_lock failed with error %d\\n\",\n      ret);\n  }\n  return ret;\n}\n\nint mutexUnlock(mutex *m) {\n  int ret = pthread_mutex_unlock(m);\n  if (ret) {\n    fprintf(stderr, \"mutexUnlock: pthread_mutex_unlock failed with error %d\\n\",\n      ret);\n  }\n  return ret;\n}\n"
  },
  {
    "path": "src/libhdfs/os/posix/platform.h",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#ifndef LIBHDFS_PLATFORM_H\n#define LIBHDFS_PLATFORM_H\n\n#include <pthread.h>\n\n/* Use gcc type-checked format arguments. */\n#define TYPE_CHECKED_PRINTF_FORMAT(formatArg, varArgs) \\\n  __attribute__((format(printf, formatArg, varArgs)))\n\n/*\n * Mutex and thread data types defined by pthreads.\n */\ntypedef pthread_mutex_t mutex;\ntypedef pthread_t threadId;\n\n#endif\n"
  },
  {
    "path": "src/libhdfs/os/posix/thread.c",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#include \"os/thread.h\"\n\n#include <pthread.h>\n#include <stdio.h>\n\n/**\n * Defines a helper function that adapts function pointer provided by caller to\n * the type required by pthread_create.\n *\n * @param toRun thread to run\n * @return void* result of running thread (always NULL)\n */\nstatic void* runThread(void *toRun) {\n  const thread *t = toRun;\n  t->start(t->arg);\n  return NULL;\n}\n\nint threadCreate(thread *t) {\n  int ret;\n  ret = pthread_create(&t->id, NULL, runThread, t);\n  if (ret) {\n    fprintf(stderr, \"threadCreate: pthread_create failed with error %d\\n\", ret);\n  }\n  return ret;\n}\n\nint threadJoin(const thread *t) {\n  int ret = pthread_join(t->id, NULL);\n  if (ret) {\n    fprintf(stderr, \"threadJoin: pthread_join failed with error %d\\n\", ret);\n  }\n  return ret;\n}\n"
  },
  {
    "path": "src/libhdfs/os/posix/thread_local_storage.c",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#include \"os/thread_local_storage.h\"\n\n#include <jni.h>\n#include <pthread.h>\n#include <stdio.h>\n\n/** Key that allows us to retrieve thread-local storage */\nstatic pthread_key_t gTlsKey;\n\n/** nonzero if we succeeded in initializing gTlsKey. Protected by the jvmMutex */\nstatic int gTlsKeyInitialized = 0;\n\n/**\n * The function that is called whenever a thread with libhdfs thread local data\n * is destroyed.\n *\n * @param v         The thread-local data\n */\nstatic void hdfsThreadDestructor(void *v)\n{\n  JavaVM *vm;\n  JNIEnv *env = v;\n  jint ret;\n\n  ret = (*env)->GetJavaVM(env, &vm);\n  if (ret) {\n    fprintf(stderr, \"hdfsThreadDestructor: GetJavaVM failed with error %d\\n\",\n      ret);\n    (*env)->ExceptionDescribe(env);\n  } else {\n    (*vm)->DetachCurrentThread(vm);\n  }\n}\n\nint threadLocalStorageGet(JNIEnv **env)\n{\n  int ret = 0;\n  if (!gTlsKeyInitialized) {\n    ret = pthread_key_create(&gTlsKey, hdfsThreadDestructor);\n    if (ret) {\n      fprintf(stderr,\n        \"threadLocalStorageGet: pthread_key_create failed with error %d\\n\",\n        ret);\n      return ret;\n    }\n    gTlsKeyInitialized = 1;\n  }\n  *env = pthread_getspecific(gTlsKey);\n  return ret;\n}\n\nint threadLocalStorageSet(JNIEnv *env)\n{\n  int ret = pthread_setspecific(gTlsKey, env);\n  if (ret) {\n    fprintf(stderr,\n      \"threadLocalStorageSet: pthread_setspecific failed with error %d\\n\",\n      ret);\n    hdfsThreadDestructor(env);\n  }\n  return ret;\n}\n"
  },
  {
    "path": "src/libhdfs/os/thread.h",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#ifndef LIBHDFS_THREAD_H\n#define LIBHDFS_THREAD_H\n\n/*\n * Defines abstraction over platform-specific threads.\n */\n\n#include \"platform.h\"\n\n/** Pointer to function to run in thread. */\ntypedef void (*threadProcedure)(void *);\n\n/** Structure containing a thread's ID, starting address and argument. */\ntypedef struct {\n  threadId id;\n  threadProcedure start;\n  void *arg;\n} thread;\n\n/**\n * Creates and immediately starts a new thread.\n *\n * @param t thread to create\n * @return 0 if successful, non-zero otherwise\n */\nint threadCreate(thread *t);\n\n/**\n * Joins to the given thread, blocking if necessary.\n *\n * @param t thread to join\n * @return 0 if successful, non-zero otherwise\n */\nint threadJoin(const thread *t);\n\n#endif\n"
  },
  {
    "path": "src/libhdfs/os/thread_local_storage.h",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#ifndef LIBHDFS_THREAD_LOCAL_STORAGE_H\n#define LIBHDFS_THREAD_LOCAL_STORAGE_H\n\n/*\n * Defines abstraction over platform-specific thread-local storage.  libhdfs\n * currently only needs thread-local storage for a single piece of data: the\n * thread's JNIEnv.  For simplicity, this interface is defined in terms of\n * JNIEnv, not general-purpose thread-local storage of any arbitrary data.\n */\n\n#include <jni.h>\n\n/*\n * Most operating systems support the more efficient __thread construct, which\n * is initialized by the linker.  The following macros use this technique on the\n * operating systems that support it.\n */\n#ifdef HAVE_BETTER_TLS\n  #define THREAD_LOCAL_STORAGE_GET_QUICK() \\\n    static __thread JNIEnv *quickTlsEnv = NULL; \\\n    { \\\n      if (quickTlsEnv) { \\\n        return quickTlsEnv; \\\n      } \\\n    }\n\n  #define THREAD_LOCAL_STORAGE_SET_QUICK(env) \\\n    { \\\n      quickTlsEnv = (env); \\\n    }\n#else\n  #define THREAD_LOCAL_STORAGE_GET_QUICK()\n  #define THREAD_LOCAL_STORAGE_SET_QUICK(env)\n#endif\n\n/**\n * Gets the JNIEnv in thread-local storage for the current thread.  If the call\n * succeeds, and there is a JNIEnv associated with this thread, then returns 0\n * and populates env.  If the call succeeds, but there is no JNIEnv associated\n * with this thread, then returns 0 and sets JNIEnv to NULL.  If the call fails,\n * then returns non-zero.  Only one thread at a time may execute this function.\n * The caller is responsible for enforcing mutual exclusion.\n *\n * @param env JNIEnv out parameter\n * @return 0 if successful, non-zero otherwise\n */\nint threadLocalStorageGet(JNIEnv **env);\n\n/**\n * Sets the JNIEnv in thread-local storage for the current thread.\n *\n * @param env JNIEnv to set\n * @return 0 if successful, non-zero otherwise\n */\nint threadLocalStorageSet(JNIEnv *env);\n\n#endif\n"
  },
  {
    "path": "src/libhdfs/os/windows/inttypes.h",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#ifndef LIBHDFS_INTTYPES_H\n#define LIBHDFS_INTTYPES_H\n\n/* On Windows, inttypes.h does not exist, so manually define what we need. */\n\n#define PRId64 \"I64d\"\n#define PRIu64 \"I64u\"\ntypedef unsigned __int64 uint64_t;\n\n#endif\n"
  },
  {
    "path": "src/libhdfs/os/windows/mutexes.c",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#include \"os/mutexes.h\"\n\n#include <windows.h>\n\nmutex hdfsHashMutex;\nmutex jvmMutex;\n\n/**\n * Unfortunately, there is no simple static initializer for a critical section.\n * Instead, the API requires calling InitializeCriticalSection.  Since libhdfs\n * lacks an explicit initialization function, there is no obvious existing place\n * for the InitializeCriticalSection calls.  To work around this, we define an\n * initialization function and instruct the linker to set a pointer to that\n * function as a user-defined global initializer.  See discussion of CRT\n * Initialization:\n * http://msdn.microsoft.com/en-us/library/bb918180.aspx\n */\nstatic void __cdecl initializeMutexes(void) {\n  InitializeCriticalSection(&hdfsHashMutex);\n  InitializeCriticalSection(&jvmMutex);\n}\n#pragma section(\".CRT$XCU\", read)\n__declspec(allocate(\".CRT$XCU\"))\nconst void (__cdecl *pInitialize)(void) = initializeMutexes;\n\nint mutexLock(mutex *m) {\n  EnterCriticalSection(m);\n  return 0;\n}\n\nint mutexUnlock(mutex *m) {\n  LeaveCriticalSection(m);\n  return 0;\n}\n"
  },
  {
    "path": "src/libhdfs/os/windows/platform.h",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#ifndef LIBHDFS_PLATFORM_H\n#define LIBHDFS_PLATFORM_H\n\n#include <stdio.h>\n#include <windows.h>\n#include <winsock.h>\n\n/*\n * O_ACCMODE defined to match Linux definition.\n */\n#ifndef O_ACCMODE\n#define O_ACCMODE 0x0003\n#endif\n\n/*\n * Windows has a different name for its maximum path length constant.\n */\n#ifndef PATH_MAX\n#define PATH_MAX MAX_PATH\n#endif\n\n/*\n * Windows does not define EDQUOT and ESTALE in errno.h.  The closest equivalents\n * are these constants from winsock.h.\n */\n#ifndef EDQUOT\n#define EDQUOT WSAEDQUOT\n#endif\n\n#ifndef ESTALE\n#define ESTALE WSAESTALE\n#endif\n\n/*\n * gcc-style type-checked format arguments are not supported on Windows, so just\n * stub this macro.\n */\n#define TYPE_CHECKED_PRINTF_FORMAT(formatArg, varArgs)\n\n/*\n * Define macros for various string formatting functions not defined on Windows.\n * Where possible, we reroute to one of the secure CRT variants.  On Windows,\n * the preprocessor does support variadic macros, even though they weren't\n * defined until C99.\n */\n#define snprintf(str, size, format, ...) \\\n  _snprintf_s((str), (size), _TRUNCATE, (format), __VA_ARGS__)\n#define strncpy(dest, src, n) \\\n  strncpy_s((dest), (n), (src), _TRUNCATE)\n#define strtok_r(str, delim, saveptr) \\\n  strtok_s((str), (delim), (saveptr))\n#define vsnprintf(str, size, format, ...) \\\n  vsnprintf_s((str), (size), _TRUNCATE, (format), __VA_ARGS__)\n\n/*\n * Mutex data type defined as Windows CRITICAL_SECTION.   A critical section (not\n * Windows mutex) is used, because libhdfs only needs synchronization of multiple\n * threads within a single process, not synchronization across process\n * boundaries.\n */\ntypedef CRITICAL_SECTION mutex;\n\n/*\n * Thread data type defined as HANDLE to a Windows thread.\n */\ntypedef HANDLE threadId;\n\n#endif\n"
  },
  {
    "path": "src/libhdfs/os/windows/thread.c",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#include \"os/thread.h\"\n\n#include <stdio.h>\n#include <windows.h>\n\n/**\n * Defines a helper function that adapts function pointer provided by caller to\n * the type required by CreateThread.\n *\n * @param toRun thread to run\n * @return DWORD result of running thread (always 0)\n */\nstatic DWORD WINAPI runThread(LPVOID toRun) {\n  const thread *t = toRun;\n  t->start(t->arg);\n  return 0;\n}\n\nint threadCreate(thread *t) {\n  DWORD ret = 0;\n  HANDLE h;\n  h = CreateThread(NULL, 0, runThread, t, 0, NULL);\n  if (h) {\n    t->id = h;\n  } else {\n    ret = GetLastError();\n    fprintf(stderr, \"threadCreate: CreateThread failed with error %d\\n\", ret);\n  }\n  return ret;\n}\n\nint threadJoin(const thread *t) {\n  DWORD ret = WaitForSingleObject(t->id, INFINITE);\n  switch (ret) {\n  case WAIT_OBJECT_0:\n    break;\n  case WAIT_FAILED:\n    ret = GetLastError();\n    fprintf(stderr, \"threadJoin: WaitForSingleObject failed with error %d\\n\",\n      ret);\n    break;\n  default:\n    fprintf(stderr, \"threadJoin: WaitForSingleObject unexpected error %d\\n\",\n      ret);\n    break;\n  }\n  return ret;\n}\n"
  },
  {
    "path": "src/libhdfs/os/windows/thread_local_storage.c",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#include \"os/thread_local_storage.h\"\n\n#include <jni.h>\n#include <stdio.h>\n#include <windows.h>\n\n/** Key that allows us to retrieve thread-local storage */\nstatic DWORD gTlsIndex = TLS_OUT_OF_INDEXES;\n\n/**\n * If the current thread has a JNIEnv in thread-local storage, then detaches the\n * current thread from the JVM.\n */\nstatic void detachCurrentThreadFromJvm()\n{\n  JNIEnv *env = NULL;\n  JavaVM *vm;\n  jint ret;\n  if (threadLocalStorageGet(&env) || !env) {\n    return;\n  }\n  ret = (*env)->GetJavaVM(env, &vm);\n  if (ret) {\n    fprintf(stderr,\n      \"detachCurrentThreadFromJvm: GetJavaVM failed with error %d\\n\",\n      ret);\n    (*env)->ExceptionDescribe(env);\n  } else {\n    (*vm)->DetachCurrentThread(vm);\n  }\n}\n\n/**\n * Unlike pthreads, the Windows API does not seem to provide a convenient way to\n * hook a callback onto thread shutdown.  However, the Windows portable\n * executable format does define a concept of thread-local storage callbacks.\n * Here, we define a function and instruct the linker to set a pointer to that\n * function in the segment for thread-local storage callbacks.  See page 85 of\n * Microsoft Portable Executable and Common Object File Format Specification:\n * http://msdn.microsoft.com/en-us/gg463119.aspx\n * This technique only works for implicit linking (OS loads DLL on demand), not\n * for explicit linking (user code calls LoadLibrary directly).  This effectively\n * means that we have a known limitation: libhdfs may not work correctly if a\n * Windows application attempts to use it via explicit linking.\n *\n * @param h module handle\n * @param reason the reason for calling the callback\n * @param pv reserved, unused\n */\nstatic void NTAPI tlsCallback(PVOID h, DWORD reason, PVOID pv)\n{\n  DWORD tlsIndex;\n  switch (reason) {\n  case DLL_THREAD_DETACH:\n    detachCurrentThreadFromJvm();\n    break;\n  case DLL_PROCESS_DETACH:\n    detachCurrentThreadFromJvm();\n    tlsIndex = gTlsIndex;\n    gTlsIndex = TLS_OUT_OF_INDEXES;\n    if (!TlsFree(tlsIndex)) {\n      fprintf(stderr, \"tlsCallback: TlsFree failed with error %d\\n\",\n        GetLastError());\n    }\n    break;\n  default:\n    break;\n  }\n}\n\n/*\n * A variable named _tls_used contains the TLS directory, which contains a list\n * of pointers to callback functions.  Normally, the linker won't retain this\n * variable unless the executable has implicit thread-local variables, defined\n * using the __declspec(thread) extended storage-class modifier.  libhdfs\n * doesn't use __declspec(thread), and we have no guarantee that the executable\n * linked to libhdfs will use __declspec(thread).  By forcing the linker to\n * reference _tls_used, we guarantee that the binary retains the TLS directory.\n * See Microsoft Visual Studio 10.0/VC/crt/src/tlssup.c .\n */\n#ifdef _WIN64\n#pragma comment(linker, \"/INCLUDE:_tls_used\")\n#else\n#pragma comment(linker, \"/INCLUDE:__tls_used\")\n#endif\n\n/*\n * We must retain a pointer to the callback function.  Force the linker to keep\n * this symbol, even though it appears that nothing in our source code uses it.\n */\n#ifdef _WIN64\n#pragma comment(linker, \"/INCLUDE:pTlsCallback\")\n#else\n#pragma comment(linker, \"/INCLUDE:_pTlsCallback\")\n#endif\n\n/*\n * Define constant pointer to our callback, and tell the linker to pin it into\n * the TLS directory so that it receives thread callbacks.  Use external linkage\n * to protect against the linker discarding the seemingly unused symbol.\n */\n#pragma const_seg(\".CRT$XLB\")\nextern const PIMAGE_TLS_CALLBACK pTlsCallback;\nconst PIMAGE_TLS_CALLBACK pTlsCallback = tlsCallback;\n#pragma const_seg()\n\nint threadLocalStorageGet(JNIEnv **env)\n{\n  LPVOID tls;\n  DWORD ret;\n  if (TLS_OUT_OF_INDEXES == gTlsIndex) {\n    gTlsIndex = TlsAlloc();\n    if (TLS_OUT_OF_INDEXES == gTlsIndex) {\n      fprintf(stderr,\n        \"threadLocalStorageGet: TlsAlloc failed with error %d\\n\",\n        TLS_OUT_OF_INDEXES);\n      return TLS_OUT_OF_INDEXES;\n    }\n  }\n  tls = TlsGetValue(gTlsIndex);\n  if (tls) {\n    *env = tls;\n    return 0;\n  } else {\n    ret = GetLastError();\n    if (ERROR_SUCCESS == ret) {\n      /* Thread-local storage contains NULL, because we haven't set it yet. */\n      *env = NULL;\n      return 0;\n    } else {\n      /*\n       * The API call failed.  According to documentation, TlsGetValue cannot\n       * fail as long as the index is a valid index from a successful TlsAlloc\n       * call.  This error handling is purely defensive.\n       */\n      fprintf(stderr,\n        \"threadLocalStorageGet: TlsGetValue failed with error %d\\n\", ret);\n      return ret;\n    }\n  }\n}\n\nint threadLocalStorageSet(JNIEnv *env)\n{\n  DWORD ret = 0;\n  if (!TlsSetValue(gTlsIndex, (LPVOID)env)) {\n    ret = GetLastError();\n    fprintf(stderr,\n      \"threadLocalStorageSet: TlsSetValue failed with error %d\\n\",\n      ret);\n    detachCurrentThreadFromJvm(env);\n  }\n  return ret;\n}\n"
  },
  {
    "path": "src/libhdfs/os/windows/unistd.h",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#ifndef LIBHDFS_UNISTD_H\n#define LIBHDFS_UNISTD_H\n\n/* On Windows, unistd.h does not exist, so manually define what we need. */\n\n#include <process.h> /* Declares getpid(). */\n#include <windows.h>\n\n/* Re-route sleep to Sleep, converting units from seconds to milliseconds. */\n#define sleep(seconds) Sleep((seconds) * 1000)\n#endif\n"
  },
  {
    "path": "src/native_core_hdfs/hdfs_file.cc",
    "content": "/* BEGIN_COPYRIGHT\n *\n * Copyright 2009-2026 CRS4.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n * use this file except in compliance with the License. You may obtain a copy\n * of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n * License for the specific language governing permissions and limitations\n * under the License.\n *\n * END_COPYRIGHT\n */\n\n#include \"hdfs_file.h\"\n#include <stdio.h>\n\n#define PYDOOP_TEXT_ENCODING  \"utf-8\"\n\n\nPyObject* FileClass_new(PyTypeObject *type, PyObject *args, PyObject *kwds)\n{\n    FileInfo *self = NULL;\n\n    self = (FileInfo *)type->tp_alloc(type, 0);\n    if (self != NULL) {\n        self->fs = NULL;\n        self->file = NULL;\n        if (NULL == (self->name = PyUnicode_FromString(\"\"))) {\n            Py_DECREF(self);\n            return NULL;\n        }\n        if (NULL == (self->mode = PyUnicode_FromString(\"\"))) {\n            Py_DECREF(self);\n            return NULL;\n        }\n        self->size = 0;\n        self->buff_size = 0;\n        self->replication = 1;\n        self->blocksize = 0;\n        self->closed = 0;\n    }\n    return (PyObject *)self;\n}\n\n\nvoid FileClass_dealloc(FileInfo* self)\n{\n    self->file = NULL;\n    Py_TYPE(self)->tp_free((PyObject*)self);\n}\n\n\nint FileClass_init(FileInfo *self, PyObject *args, PyObject *kwds)\n{\n    PyObject *name = NULL, *mode = NULL, *tmp = NULL;\n\n    if (!PyArg_ParseTuple(args, \"OOOO\",\n                          &(self->fs), &(self->file), &name, &mode)) {\n        return -1;\n    }\n\n    if (name) {\n\ttmp = self->name;\n\tPy_INCREF(name);\n\tself->name = name;\n\tPy_XDECREF(tmp);\n    }\n    if (mode) {\n\ttmp = self->mode;\n\tPy_INCREF(mode);\n\tself->mode = mode;\n\tPy_XDECREF(tmp);\n    }\n\n    return 0;\n}\n\n\nint FileClass_init_internal(FileInfo *self, hdfsFS fs, hdfsFile file)\n{\n    self->fs = fs;\n    self->file = file;\n\n    return 0;\n}\n\n\nPyObject* FileClass_close(FileInfo* self){\n    int result = hdfsCloseFile(self->fs, self->file);\n    if (result < 0) {\n        return PyErr_SetFromErrno(PyExc_IOError);\n    } else {\n        self->closed = 1;\n        return PyBool_FromLong(1);\n    }\n}\n\n\nPyObject* FileClass_getclosed(FileInfo* self, void* closure) {\n  return PyBool_FromLong(self->closed);\n}\n\n\nPyObject* FileClass_getbuff_size(FileInfo* self, void* closure) {\n  return PyLong_FromLong(self->buff_size);\n}\n\n\nPyObject* FileClass_getname(FileInfo* self, void* closure) {\n    Py_INCREF(self->name);\n    return self->name;\n}\n\n\nPyObject* FileClass_getmode(FileInfo* self, void* closure) {\n    Py_INCREF(self->mode);\n    return self->mode;\n}\n\n\nPyObject* FileClass_readable(FileInfo* self) {\n  return PyBool_FromLong(hdfsFileIsOpenForRead(self->file));\n}\n\n\nPyObject* FileClass_writable(FileInfo* self) {\n  return PyBool_FromLong(hdfsFileIsOpenForWrite(self->file));\n}\n\n\nPyObject* FileClass_seekable(FileInfo* self) {\n  return PyBool_FromLong(hdfsFileIsOpenForRead(self->file));\n}\n\n\nPyObject* FileClass_available(FileInfo *self){\n    int available = hdfsAvailable(self->fs, self->file);\n    if (available < 0)\n        return PyErr_SetFromErrno(PyExc_IOError);\n    else\n        return PyLong_FromLong(available);\n}\n\nstatic int _ensure_open_for_reading(FileInfo* self) {\n    if (!hdfsFileIsOpenForRead(self->file)) {\n        PyErr_SetString(PyExc_IOError, \"File is not opened in READ ('r') mode\");\n        return 0; // False\n    }\n\n    return 1; // True\n}\n\nstatic Py_ssize_t _read_into_pybuf(FileInfo *self, char* buf, Py_ssize_t nbytes) {\n\n    if (nbytes < 0) {\n        PyErr_SetString(PyExc_ValueError, \"nbytes must be >= 0\");\n        return -1;\n    }\n\n    tSize bytes_read;\n    Py_BEGIN_ALLOW_THREADS;\n        bytes_read = hdfsRead(self->fs, self->file, buf, nbytes);\n    Py_END_ALLOW_THREADS;\n\n    if (bytes_read < 0) { // error\n        PyErr_SetFromErrno(PyExc_IOError);\n        return -1;\n    }\n\n    return bytes_read;\n}\n\nstatic PyObject* _read_new_pybuf(FileInfo* self, Py_ssize_t nbytes) {\n\n    if (nbytes < 0) {\n        PyErr_SetString(PyExc_ValueError, \"nbytes must be >= 0\");\n        return NULL;\n    }\n\n    // Allocate an uninitialized buffer object.\n    // We then access and directly modify the buffer's internal memory. This is\n    // ok until we release this string \"into the wild\".\n\n    PyObject* retval = _PyBuf_FromStringAndSize(NULL, nbytes);    \n    if (!retval) return PyErr_NoMemory();\n\n    Py_ssize_t bytes_read = _read_into_pybuf(self, _PyBuf_AS_STRING(retval),\n                                             nbytes);\n\n    if (bytes_read >= 0) {\n        // If bytes_read >= 0, read worked properly. But, if bytes_read < nbytes\n        // we got fewer bytes than requested (maybe we reached EOF?).  We need\n        // to shrink the string to the correct length.  In case of error the\n        // call to _PyString_Resize frees the original string, sets the\n        // appropriate python exception and returns -1.\n        if (bytes_read >= nbytes || _PyBuf_Resize(&retval, bytes_read) >= 0)  \n            return retval; // all good\n    }\n\n    // If we get here something's gone wrong.  The exception should already be set.\n    Py_DECREF(retval);\n    return NULL;\n}\n\n/*\n * Seek to `pos` and read `nbytes` bytes into a the provided buffer.\n *\n * \\return: Number of bytes read. In case of error this function sets\n * the appropriate Python exception and returns -1.\n */\nstatic Py_ssize_t _pread_into_pybuf(FileInfo *self, char* buffer, Py_ssize_t pos,\n                                    Py_ssize_t nbytes) {\n\n    Py_ssize_t orig_position = hdfsTell(self->fs, self->file);\n    if (orig_position < 0) {\n        PyErr_SetFromErrno(PyExc_IOError);\n        return -1;\n    }\n\n    if (hdfsSeek(self->fs, self->file, pos) < 0) {\n        PyErr_SetFromErrno(PyExc_IOError);\n        return -1;\n    }\n\n    tSize bytes_read = _read_into_pybuf(self, buffer, nbytes);\n\n    if (bytes_read < 0) {\n        PyErr_SetFromErrno(PyExc_IOError);\n        return -1;\n    }\n\n    if (hdfsSeek(self->fs, self->file, orig_position) < 0) {\n        PyErr_SetFromErrno(PyExc_IOError);\n        return -1;\n    }\n\n    return bytes_read;\n}\n\nstatic PyObject* _pread_new_pybuf(FileInfo* self, Py_ssize_t pos, Py_ssize_t nbytes) {\n\n    if (nbytes < 0) {\n        PyErr_SetString(PyExc_ValueError, \"nbytes must be >= 0\");\n        return NULL;\n    }\n\n    // Allocate an uninitialized string object.\n    PyObject* retval = _PyBuf_FromStringAndSize(NULL, nbytes);    \n    if (!retval) return PyErr_NoMemory();\n\n    Py_ssize_t bytes_read = _pread_into_pybuf(self, _PyBuf_AS_STRING(retval),\n                                              pos, nbytes);\n\n    if (bytes_read >= 0) {\n        // If bytes_read >= 0, read worked properly. But, if bytes_read < nbytes\n        // we got fewer bytes than requested (maybe we reached EOF?).  We need\n        // to shrink the string to the correct length.  In case of error the\n        // call to _PyString_Resize frees the original string, sets the\n        // appropriate python exception and returns -1.\n        if (bytes_read >= nbytes || _PyBuf_Resize(&retval, bytes_read) >= 0)\n            return retval; // all good\n    }\n\n    // If we get here something's gone wrong.  The exception should already be set.\n    Py_DECREF(retval);\n    return NULL;\n}\n\n\nPyObject* FileClass_read(FileInfo *self, PyObject *args, PyObject *kwds){\n\n    Py_ssize_t nbytes = 0;\n\n    if (!_ensure_open_for_reading(self))\n        return NULL;\n\n    if (! PyArg_ParseTuple(args, \"n\", &(nbytes)))\n        return NULL;\n\n    if (nbytes < 0) {\n        PyErr_SetString(PyExc_ValueError, \"nbytes must be >= 0\");\n        return NULL;\n    }\n    else if (nbytes == 0) {\n      return _PyBuf_FromString(\"\");\n    }\n    // else nbytes > 0\n\n    return _read_new_pybuf(self, nbytes);\n}\n\n\nPyObject* FileClass_read_chunk(FileInfo *self, PyObject *args, PyObject *kwds){\n\n    Py_buffer buffer = {NULL, NULL};\n\n    if (!_ensure_open_for_reading(self))\n        return NULL;\n\n    if (! PyArg_ParseTuple(args, \"w*\",  &buffer))\n        return NULL;\n\n    Py_ssize_t bytes_read = _read_into_pybuf(self, (char*)buffer.buf, buffer.len);\n    PyBuffer_Release(&buffer);\n\n    if (bytes_read >= 0)\n        return Py_BuildValue(\"n\", bytes_read);\n    else\n        return NULL;\n}\n\n\nPyObject* FileClass_pread(FileInfo *self, PyObject *args, PyObject *kwds){\n\n    Py_ssize_t position = 0;\n    Py_ssize_t nbytes = 0;\n\n    if (!_ensure_open_for_reading(self))\n        return NULL;\n\n    if (! PyArg_ParseTuple(args, \"nn\", &position, &nbytes))\n        return NULL;\n\n    if (position < 0) {\n        errno = EINVAL;\n        PyErr_SetFromErrno(PyExc_IOError);\n        errno = 0;\n        return NULL;\n    }\n\n    if (nbytes == 0)\n      return _PyBuf_FromString(\"\");\n\n    // else\n\n    return _pread_new_pybuf(self, position, nbytes);\n}\n\n\nPyObject* FileClass_pread_chunk(FileInfo *self, PyObject *args, PyObject *kwds){\n\n    Py_buffer buffer = {NULL, NULL};\n    Py_ssize_t position = 0;\n\n    if (!_ensure_open_for_reading(self))\n        return NULL;\n\n    if (! PyArg_ParseTuple(args, \"nw*\", &position, &buffer))\n        return NULL;\n\n    if (position < 0) {\n        errno = EINVAL;\n        PyErr_SetFromErrno(PyExc_IOError);\n        errno = 0;\n        return NULL;\n    }\n\n    Py_ssize_t bytes_read = _pread_into_pybuf(self, (char*)buffer.buf, position,\n                                              buffer.len);\n    PyBuffer_Release(&buffer);\n\n    if (bytes_read >= 0)\n        return Py_BuildValue(\"n\", bytes_read);\n    else\n        return NULL;\n}\n\n\nPyObject* FileClass_seek(FileInfo *self, PyObject *args, PyObject *kwds) {\n\n    tOffset position = 0, curpos = 0;\n    int whence = SEEK_SET;\n\n    if (!PyArg_ParseTuple(args, \"n|i\", &position, &whence))\n        return NULL;\n\n    switch (whence) {\n    case SEEK_SET:\n        break;\n    case SEEK_CUR:\n        curpos = hdfsTell(self->fs, self->file);\n        if (curpos < 0) {\n            return PyErr_SetFromErrno(PyExc_IOError);\n        }\n        position += curpos;\n        break;\n    case SEEK_END:\n        position += self->size;\n        break;\n    default:\n        PyErr_SetString(PyExc_ValueError, \"unsupported whence value\");\n        return NULL;\n    }\n\n    /* HDFS does not support seeking past end of file */\n    if (position < 0 || position > self->size) {\n        errno = EINVAL;\n        PyErr_SetFromErrno(PyExc_IOError);\n        errno = 0;\n        return NULL;\n    }\n\n    if (hdfsSeek(self->fs, self->file, position) < 0) {\n\treturn PyErr_SetFromErrno(PyExc_IOError);\n    }\n    return PyLong_FromLong(position);\n}\n\n\nPyObject* FileClass_tell(FileInfo *self, PyObject *args, PyObject *kwds){\n\n    tOffset offset = hdfsTell(self->fs, self->file);\n    if (offset >= 0)\n        return Py_BuildValue(\"n\", offset);\n    else {\n        PyErr_SetFromErrno(PyExc_IOError);\n        return NULL;\n    }\n}\n\n\n\nPyObject* FileClass_write(FileInfo* self, PyObject *args, PyObject *kwds) {\n    PyObject *input = NULL;\n    Py_buffer buffer = {NULL, NULL};\n\n    if (!hdfsFileIsOpenForWrite(self->file)) {\n        PyErr_SetString(PyExc_IOError, \"not writable\");\n        return NULL;\n    }\n    if (!PyArg_ParseTuple(args, \"O\",  &input)) {\n        return NULL;\n    }\n    if (PyObject_GetBuffer(input, &buffer, PyBUF_SIMPLE) < 0) {\n        PyErr_SetString(PyExc_TypeError, \"Argument not accessible as a buffer\");\n        return NULL;\n    }\n\n    Py_ssize_t written;\n    Py_BEGIN_ALLOW_THREADS;\n    written = hdfsWrite(self->fs, self->file, buffer.buf, buffer.len);\n    Py_END_ALLOW_THREADS;\n    PyBuffer_Release(&buffer);\n    if (written < 0) {\n        PyErr_SetFromErrno(PyExc_IOError);\n        return NULL;\n    }\n    return Py_BuildValue(\"n\", written);\n}\n\n\nPyObject* FileClass_flush(FileInfo *self){\n    if (!hdfsFileIsOpenForWrite(self->file)) {\n      Py_RETURN_NONE;\n    }\n    int result = hdfsFlush(self->fs, self->file);\n\n    if (result >= 0) {\n        Py_RETURN_NONE;\n    }\n    else {\n        PyErr_SetFromErrno(PyExc_IOError);\n        return NULL;\n    }\n}\n"
  },
  {
    "path": "src/native_core_hdfs/hdfs_file.h",
    "content": "/* BEGIN_COPYRIGHT\n *\n * Copyright 2009-2026 CRS4.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n * use this file except in compliance with the License. You may obtain a copy\n * of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n * License for the specific language governing permissions and limitations\n * under the License.\n *\n * END_COPYRIGHT\n */\n\n#ifndef PYTHON_HDFS_FILE_TYPE\n#define PYTHON_HDFS_FILE_TYPE\n\n#include <Python.h>\n\n#include <string>\n#include <map>\n#include <utility>  // std::pair support\n#include <iostream>\n#include <errno.h>\n#include <typeinfo>\n\n#include <hdfs/hdfs.h>\n\n#include <structmember.h>\n#include \"../py3k_compat.h\"\n\n\ntypedef struct {\n    PyObject_HEAD\n    hdfsFS fs;\n    hdfsFile file;\n    PyObject *name;\n    PyObject *mode;\n    tOffset size;\n    int buff_size;\n    short replication;\n    int blocksize;\n    int closed;\n} FileInfo;\n\n\nPyObject* FileClass_new(PyTypeObject *type, PyObject *args, PyObject *kwds);\n\nvoid FileClass_dealloc(FileInfo* self);\n\nint FileClass_init(FileInfo *self, PyObject *args, PyObject *kwds);\n\nint FileClass_init_internal(FileInfo *self, hdfsFS fs, hdfsFile file);\n\nPyObject* FileClass_close(FileInfo* self);\n\nPyObject* FileClass_getclosed(FileInfo* self, void* closure);\n\nPyObject* FileClass_getbuff_size(FileInfo* self, void* closure);\n\nPyObject* FileClass_getname(FileInfo* self, void* closure);\n\nPyObject* FileClass_getmode(FileInfo* self, void* closure);\n\nPyObject* FileClass_readable(FileInfo* self);\n\nPyObject* FileClass_writable(FileInfo* self);\n\nPyObject* FileClass_seekable(FileInfo* self);\n\nPyObject* FileClass_mode(FileInfo* self);\n\nPyObject* FileClass_write(FileInfo* self, PyObject *args, PyObject *kwds);\n\nPyObject* FileClass_get_mode(FileInfo *self);\n\nPyObject* FileClass_available(FileInfo *self);\n\nPyObject* FileClass_read(FileInfo *self, PyObject *args, PyObject *kwds);\n\nPyObject* FileClass_read_chunk(FileInfo *self, PyObject *args, PyObject *kwds);\n\nPyObject* FileClass_pread(FileInfo *self, PyObject *args, PyObject *kwds);\n\nPyObject* FileClass_pread_chunk(FileInfo *self, PyObject *args, PyObject *kwds);\n\nPyObject* FileClass_seek(FileInfo *self, PyObject *args, PyObject *kwds);\n\nPyObject* FileClass_tell(FileInfo *self, PyObject *args, PyObject *kwds);\n\nPyObject* FileClass_flush(FileInfo *self);\n\n#endif\n"
  },
  {
    "path": "src/native_core_hdfs/hdfs_fs.cc",
    "content": "/* BEGIN_COPYRIGHT\n *\n * Copyright 2009-2026 CRS4.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n * use this file except in compliance with the License. You may obtain a copy\n * of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n * License for the specific language governing permissions and limitations\n * under the License.\n *\n * END_COPYRIGHT\n */\n\n#include \"hdfs_fs.h\"\n#include \"hdfs_file.h\"\n\n#include <sstream>\n#include <hdfs/hdfs.h>\n#include <unicodeobject.h>\n#include <errno.h>\n#include <string.h>\n\n#define MAX_WD_BUFFSIZE 2048\n\n#define str_empty(s) ((s) == NULL || (*(s) == '\\0'))\n\nPyObject* FsClass_new(PyTypeObject *type, PyObject *args, PyObject *kwds)\n{\n    FsInfo *self = NULL;\n\n    self = (FsInfo *)type->tp_alloc(type, 0);\n    if (self != NULL) {\n        self->host = NULL;\n        self->port = 0;\n        self->user = NULL;\n        self->group = NULL;\n        self->_fs = NULL;\n    }\n\n    return (PyObject *)self;\n}\n\n\nvoid FsClass_dealloc(FsInfo* self)\n{\n    Py_TYPE(self)->tp_free((PyObject*)self);\n}\n\n\nint FsClass_init(FsInfo *self, PyObject *args, PyObject *kwds)\n{\n\n    // XXX: This call to PyArg_ParseTuple doesn't support non-ASCII characters in\n    // the input strings (host, user, group)\n    if (! PyArg_ParseTuple(args, \"z|izz\",\n            &(self->host), &(self->port),\n            &(self->user), &(self->group)))\n        return -1;\n\n    if (str_empty(self->host))\n        self->host = NULL;\n\n    if (str_empty(self->user))\n        self->user = NULL;\n\n    if (str_empty(self->group))\n        self->group = NULL;\n\n    // Connect cycles and retries more than once if necessary.  Better let\n    // other Python threads through.\n    Py_BEGIN_ALLOW_THREADS;\n        if (self->user != NULL) {\n            self->_fs = hdfsConnectAsUser(self->host, self->port, self->user);\n\n        } else {\n            self->_fs = hdfsConnect(self->host, self->port);\n        }\n    Py_END_ALLOW_THREADS;\n\n    if (!self->_fs) {\n        PyErr_SetFromErrno(PyExc_RuntimeError);\n        return -1;\n    }\n\n    return 0;\n}\n\n\nPyObject* FsClass_close(FsInfo* self)\n{\n    hdfsDisconnect(self->_fs);\n    Py_RETURN_NONE;\n}\n\n\nPyObject* FsClass_get_working_directory(FsInfo* self) {\n\n    const size_t bufferSize = MAX_WD_BUFFSIZE;\n    char *buffer = (char*)PyMem_Malloc(bufferSize);\n    if (!buffer)\n        return PyErr_NoMemory();\n\n    if (hdfsGetWorkingDirectory(self->_fs, buffer, bufferSize) == NULL) {\n        PyErr_SetString(PyExc_RuntimeError, \"Cannot get working directory.\");\n        PyMem_Free(buffer);\n        return NULL;\n    }\n\n    PyObject* result = PyUnicode_FromString(buffer);\n    PyMem_Free(buffer);\n    if (!result)\n        return PyErr_NoMemory();\n\n    return result;\n}\n\nPyObject* FsClass_get_path_info(FsInfo* self, PyObject *args, PyObject *kwds) {\n\n    char* path = NULL;\n    PyObject* retval = NULL;\n    hdfsFileInfo* info = NULL;\n\n    if (!PyArg_ParseTuple(args, \"es\", \"utf-8\",  &path)) {\n        return NULL;\n    }\n\n    if (str_empty(path)) {\n        PyMem_Free(path);\n        PyErr_SetString(PyExc_ValueError, \"Empty path\");\n        return NULL;\n    }\n\n    Py_BEGIN_ALLOW_THREADS;\n        info = hdfsGetPathInfo(self->_fs, path);\n    Py_END_ALLOW_THREADS;\n    if (info == NULL) {\n        PyMem_Free(path);\n        return PyErr_SetFromErrno(PyExc_IOError);\n    }\n\n    retval =\n        Py_BuildValue(\"{s:O,s:s,s:s,s:i,s:i,s:h,s:s,s:h,s:i,s:O,s:L}\",\n            \"name\", PyUnicode_FromString(info->mName),\n            \"kind\", info->mKind == kObjectKindDirectory ? \"directory\" : \"file\",\n            \"group\", info->mGroup,\n            \"last_mod\", info->mLastMod,\n            \"last_access\", info->mLastAccess,\n            \"replication\", info->mReplication,\n            \"owner\", info->mOwner,\n            \"permissions\", info->mPermissions,\n            \"block_size\", info->mBlockSize,\n            \"path\", PyUnicode_FromString(info->mName),\n            \"size\", info->mSize\n    );\n    PyMem_Free(path);\n    hdfsFreeFileInfo(info, 1);\n    return retval;\n}\n\n\nPyObject* FsClass_get_hosts(FsInfo* self, PyObject *args, PyObject *kwds) {\n\n    Py_ssize_t start = 0, length = 0;\n    PyObject* result = NULL;\n    char* path = NULL;\n    char*** hosts = NULL;\n\n    if (!PyArg_ParseTuple(args, \"esnn\", \"utf-8\", &path, &start, &length)) {\n        return NULL;\n    }\n\n    if (str_empty(path)) {\n        PyMem_Free(path);\n        PyErr_SetString(PyExc_ValueError, \"Empty path\");\n        return NULL;\n    }\n\n    if (start < 0 || length < 0) {\n        PyMem_Free(path);\n        PyErr_SetString(PyExc_ValueError,\n                        \"Start position and length must be >= 0\");\n        return NULL;\n    }\n\n    Py_BEGIN_ALLOW_THREADS;\n        hosts = hdfsGetHosts(self->_fs, path, start, length);\n    Py_END_ALLOW_THREADS;\n    PyMem_Free(path);\n    if (!hosts) {\n        return PyErr_SetFromErrno(PyExc_RuntimeError);\n    }\n\n    result = PyList_New(0);\n    if (!result) goto mem_error;\n\n    for (int blockNumber = 0; hosts[blockNumber] != NULL; ++blockNumber)\n    {\n        PyObject* blockHosts = PyList_New(0);\n        if (!blockHosts) goto mem_error;\n\n        for (int iBlockHost = 0; hosts[blockNumber][iBlockHost] != NULL; ++iBlockHost)\n        {\n            PyObject* str = PyUnicode_FromString(hosts[blockNumber][iBlockHost]);\n            if (!str) goto mem_error;\n            if (PyList_Append(blockHosts, str) < 0) goto mem_error;\n        }\n\n        if (PyList_Append(result, blockHosts) < 0) goto mem_error;\n    }\n    goto done; // skip the mem_error section\n\nmem_error:\n    PyErr_SetString(PyExc_MemoryError, \"Error allocating host structure\");\n    Py_XDECREF(result);\n    result = NULL;\n    // fall through\ndone:\n    if (hosts) hdfsFreeHosts(hosts);\n    return result;\n}\n\nPyObject* FsClass_get_default_block_size(FsInfo* self) {\n    tOffset size = hdfsGetDefaultBlockSize(self->_fs);\n    return PyLong_FromSsize_t(size);\n}\n\nPyObject* FsClass_get_used(FsInfo* self) {\n    tOffset size = hdfsGetUsed(self->_fs);\n    return PyLong_FromSsize_t(size);\n}\n\nPyObject* FsClass_set_replication(FsInfo* self, PyObject* args, PyObject* kwds) {\n\n    char* path = NULL;\n    short replication = 0;\n    int result = 0;\n\n    if (!PyArg_ParseTuple(args, \"esh\", \"utf-8\", &path, &replication))\n        return NULL;\n\n    if (str_empty(path)) {\n        PyMem_Free(path);\n        PyErr_SetString(PyExc_ValueError, \"Empty path\");\n        return NULL;\n    }\n\n    Py_BEGIN_ALLOW_THREADS;\n        result = hdfsSetReplication(self->_fs, path, replication);\n    Py_END_ALLOW_THREADS;\n    PyMem_Free(path);\n    if (result < 0) {\n\treturn PyErr_SetFromErrno(PyExc_IOError);\n    }\n    Py_RETURN_NONE;\n}\n\n\nPyObject* FsClass_set_working_directory(FsInfo* self, PyObject* args, PyObject* kwds) {\n\n    char* path = NULL;\n    int result = 0;\n\n    if (!PyArg_ParseTuple(args, \"es\", \"utf-8\", &path))\n        return NULL;\n\n    if (str_empty(path)) {\n        PyMem_Free(path);\n        PyErr_SetString(PyExc_ValueError, \"Empty path\");\n        return NULL;\n    }\n\n    Py_BEGIN_ALLOW_THREADS;\n        result = hdfsSetWorkingDirectory(self->_fs, path);\n    Py_END_ALLOW_THREADS;\n    PyMem_Free(path);\n    if (result < 0) {\n\treturn PyErr_SetFromErrno(PyExc_IOError);\n    }\n    Py_RETURN_NONE;\n}\n\n\nPyObject* FsClass_open_file(FsInfo* self, PyObject *args, PyObject *kwds)\n{\n    PyObject* retval = NULL;\n    char* path = NULL;\n    const char* mode = MODE_READ;\n    int flags = 0;\n    int buff_size = 0;\n    int blocksize = 0;\n    short replication = 0;\n    hdfsFile file = NULL;\n    tOffset size = 0;\n    hdfsFileInfo* info = NULL;\n\n    if (!PyArg_ParseTuple(args, \"es|sihi\",\n                          \"utf-8\", &path, &mode, &buff_size, &replication,\n                          &blocksize)) {\n        return NULL;\n    }\n\n    if (str_empty(path)) {\n        PyMem_Free(path);\n        PyErr_SetString(PyExc_ValueError, \"Empty path\");\n        return NULL;\n    }\n\n    if (strcmp(mode, MODE_READ) == 0) {\n        flags = O_RDONLY;\n    } else if (strcmp(mode, MODE_WRITE) == 0) {\n        flags = O_WRONLY;\n    } else if (strcmp(mode, MODE_APPEND) == 0) {\n        flags = O_WRONLY | O_APPEND;\n    } else {\n        PyMem_Free(path);\n        PyErr_SetString(PyExc_ValueError, \"Invalid mode\");\n        return NULL;\n    }\n\n    Py_BEGIN_ALLOW_THREADS;\n        file = hdfsOpenFile(self->_fs, path, flags,\n                            buff_size, replication, blocksize);\n    Py_END_ALLOW_THREADS;\n    if (file == NULL) {\n        PyMem_Free(path);\n        return PyErr_SetFromErrno(PyExc_IOError);\n    }\n\n    PyObject* module = PyImport_ImportModule(\"pydoop.native_core_hdfs\");\n    if (NULL == module) {\n        PyMem_Free(path);\n\tfree(file);\n\treturn NULL;\n    }\n    PyObject *name = PyUnicode_FromString(path);\n    PyObject *pymode = PyUnicode_FromString(mode);\n    retval = PyObject_CallMethod(module, \"CoreHdfsFile\", \"OOOO\",\n\t\t\t\t self->_fs, file, name, pymode);\n    Py_XDECREF(pymode);\n    Py_XDECREF(name);\n    Py_XDECREF(module);\n    if (NULL == retval) {\n        PyMem_Free(path);\n        free(file);\n        return NULL;\n    }\n\n    /* get file size for the SEEK_END variant of seek */\n    if (flags == O_RDONLY) {\n        Py_BEGIN_ALLOW_THREADS;\n            info = hdfsGetPathInfo(self->_fs, path);\n        Py_END_ALLOW_THREADS;\n        if (info == NULL) {\n            PyMem_Free(path);\n            return PyErr_SetFromErrno(PyExc_IOError);\n        }\n        size = info->mSize;\n        hdfsFreeFileInfo(info, 1);\n    }\n    PyMem_Free(path);\n\n    FileInfo *fileInfo = ((FileInfo*) retval);\n    fileInfo->size = size;\n    fileInfo->buff_size = buff_size;\n    fileInfo->blocksize = blocksize;\n    fileInfo->replication = replication;\n    return retval;\n}\n\n\nPyObject *FsClass_get_capacity(FsInfo *self) {\n    tOffset capacity;\n\n    Py_BEGIN_ALLOW_THREADS;\n        errno = 0; // hdfsGetCapacity forgets to clear errno\n        capacity = hdfsGetCapacity(self->_fs);\n    Py_END_ALLOW_THREADS;\n\n    if (capacity < 0) {\n        // two error cases are contemplated by the code in hdfsGetCapacity:\n        // 1) exception from the Java method\n        // 2) FS instance is not a DistributedFileSystem.\n        // Here we copy their error textually.\n        if (errno)\n            PyErr_SetFromErrno(PyExc_IOError);\n        else {\n            PyErr_SetString(PyExc_RuntimeError,\n                    \"hdfsGetCapacity works only on a DistributedFileSystem\");\n        }\n\n        return NULL;\n    }\n    return PyLong_FromSsize_t(capacity);\n}\n\n\nPyObject* FsClass_copy(FsInfo* self, PyObject *args, PyObject *kwds)\n{\n    FsInfo* to_hdfs = NULL;\n    char *from_path = NULL, *to_path = NULL;\n    int result = 0;\n\n    if (! PyArg_ParseTuple(args, \"esOes\", \"utf-8\", &from_path,\n                &to_hdfs, \"utf-8\", &to_path)) {\n        return NULL;\n    }\n\n    if (str_empty(from_path) || str_empty(to_path)) {\n        PyMem_Free(from_path);\n        PyMem_Free(to_path);\n        PyErr_SetString(PyExc_ValueError, \"Empty path\");\n        return NULL;\n    }\n\n    Py_BEGIN_ALLOW_THREADS;\n        result = hdfsCopy(self->_fs, from_path, to_hdfs->_fs, to_path);\n    Py_END_ALLOW_THREADS;\n    PyMem_Free(from_path);\n    PyMem_Free(to_path);\n    if (result < 0) {\n        return PyErr_SetFromErrno(PyExc_IOError);\n    }\n    return PyLong_FromLong(result);\n}\n\n\nPyObject *FsClass_exists(FsInfo *self, PyObject *args, PyObject *kwds) {\n\n    char* path = NULL;\n    int result = 0;\n\n    if (! PyArg_ParseTuple(args, \"es\", \"utf-8\", &path))\n        return NULL;\n\n    if (str_empty(path)) {\n        PyMem_Free(path);\n        PyErr_SetString(PyExc_ValueError, \"Empty path\");\n        return NULL;\n    }\n\n    Py_BEGIN_ALLOW_THREADS;\n        result = hdfsExists(self->_fs, path);\n    Py_END_ALLOW_THREADS;\n    PyMem_Free(path);\n\n    // LP: hdfsExists (in some cases?) sets errno to ENOENT \"[Errno 2] No such\n    // file or directory\" when the path doesn't exist or EEXIST in other cases.\n    // I don't know why.  Since that's what we're trying to test, I'll skip\n    // checking errno here.  The consequence is that when we return false it\n    // may be because of an error and not because the path doesn't exist.\n    //\n    // if (result < 0 && errno) return PyErr_SetFromErrno(PyExc_IOError);\n\n    return PyBool_FromLong(result >= 0 ? 1 : 0);\n}\n\n\nPyObject *FsClass_create_directory(FsInfo *self, PyObject *args, PyObject *kwds) {\n\n    char* path = NULL;\n    int result = 0;\n\n    if (! PyArg_ParseTuple(args, \"es\", \"utf-8\", &path)) {\n        return NULL;\n    }\n\n    if (str_empty(path)) {\n        PyMem_Free(path);\n        PyErr_SetString(PyExc_ValueError, \"Empty path\");\n        return NULL;\n    }\n\n    Py_BEGIN_ALLOW_THREADS;\n        result = hdfsCreateDirectory(self->_fs, path);\n    Py_END_ALLOW_THREADS;\n    PyMem_Free(path);\n    if (result < 0) {\n        return PyErr_SetFromErrno(PyExc_IOError);\n    }\n    Py_RETURN_NONE;\n}\n\n/*\n * Works on borrowed reference `dict`.\n *\n * \\return 0 if successful\n * \\return -1 if there was a problem. In that case, dict may contain\n * some values, but will be incomplete and should be discarded.\n */\nstatic int setPathInfo(PyObject* dict, hdfsFileInfo* fileInfo) {\n\n    if (dict == NULL || fileInfo == NULL) return -1;\n    int error_code = 0;\n\n    const char*const keys[] = {\n        \"name\",\n        \"kind\",\n        \"group\",\n        \"last_mod\",\n        \"last_access\",\n        \"replication\",\n        \"owner\",\n        \"permissions\",\n        \"block_size\",\n        \"path\",\n        \"size\"\n    };\n\n    const int n_fields = sizeof(keys) / sizeof(keys[0]);\n\n    PyObject* values[n_fields];\n    int i = 0;\n    // Prepare the values.  We'll check for all errors in the \"set\" loop below\n    // The order of these values MUST match the order of the keys above\n    values[i++] = PyUnicode_FromString(fileInfo->mName);\n    values[i++] = PyUnicode_FromString(fileInfo->mKind == kObjectKindDirectory ? \"directory\" : \"file\");\n    values[i++] = PyUnicode_FromString(fileInfo->mGroup);\n    values[i++] = PyLong_FromLong(fileInfo->mLastMod);\n    values[i++] = PyLong_FromLong(fileInfo->mLastAccess);\n    values[i++] = PyLong_FromSize_t(fileInfo->mReplication);\n    values[i++] = PyUnicode_FromString(fileInfo->mOwner);\n    values[i++] = PyLong_FromSize_t(fileInfo->mPermissions);\n    values[i++] = PyLong_FromLong(fileInfo->mBlockSize);\n    values[i++] = PyUnicode_FromString(fileInfo->mName);\n    values[i++] = PyLong_FromLongLong(fileInfo->mSize);\n\n    for (i = 0; i < n_fields; ++i) {\n        if (values[i] == NULL || PyDict_SetItemString(dict, keys[i], values[i]) < 0) {\n            error_code = -1;\n            break;\n            // Don't DECREF here.  The error handling code goes through the entire array\n            // and thus we'd end up DECREFing some objects twice.\n        }\n    }\n\n    for (i = 0; i < n_fields; ++i) {\n        Py_XDECREF(values[i]); // some values may be null (if there was an error\n    }\n\n    return error_code;\n}\n\nPyObject *FsClass_list_directory(FsInfo *self, PyObject *args, PyObject *kwds) {\n    PyObject* retval = NULL;\n    char* path = NULL;\n    hdfsFileInfo* pathList = NULL;\n    int numEntries = 0;\n    hdfsFileInfo* pathInfo = NULL;\n\n    if (!PyArg_ParseTuple(args, \"es\", \"utf-8\",  &path))\n        return NULL;\n\n    if (str_empty(path)) {\n        PyMem_Free(path);\n        PyErr_SetString(PyExc_ValueError, \"Empty path\");\n        return NULL;\n    }\n\n    Py_BEGIN_ALLOW_THREADS;\n        pathInfo = hdfsGetPathInfo(self->_fs, path);\n        PyMem_Free(path);\n        if (!pathInfo) {\n            Py_BLOCK_THREADS; // later we 'goto' skipping over END_ALLOW_THREADS\n            PyErr_SetFromErrno(PyExc_IOError);\n            goto error;\n        }\n\n        if (pathInfo->mKind == kObjectKindDirectory) {\n\n            pathList = hdfsListDirectory(self->_fs, pathInfo->mName, &numEntries);\n\n            // hdfsListDirectory returns NULL when a directory is empty, so to determine\n            // whether there's been an error we also need to check errno\n            if (!pathList && errno) {\n                Py_BLOCK_THREADS; // later we 'goto' skipping over END_ALLOW_THREADS\n                PyErr_SetFromErrno(PyExc_IOError);\n                goto error;\n            }\n        }\n        else {\n            numEntries = 1;\n            pathList = pathInfo;\n            pathInfo = NULL;\n        }\n    Py_END_ALLOW_THREADS;\n\n    retval = PyList_New(numEntries);\n    if (!retval) goto mem_error;\n\n    for (Py_ssize_t i = 0; i < numEntries; i++) {\n        PyObject* infoDict = PyDict_New();\n        if (!infoDict) goto mem_error;\n        PyList_SET_ITEM(retval, i, infoDict);\n        if (setPathInfo(infoDict, &pathList[i]) < 0) {\n            PyErr_SetString(PyExc_IOError, \"Error getting file info\");\n            goto error;\n        }\n    }\n\n    goto done; // skip the error section\n\nmem_error:\n    PyErr_SetString(PyExc_MemoryError, \"Error allocating structures\");\n    // fall through\nerror:\n    // in case of error DECREF our retval structure and return NULL\n    if (retval != NULL) {\n        Py_XDECREF(retval);\n        retval = NULL;\n    }\n\ndone:\n    // all code paths go through the 'done' section\n    if (pathInfo != NULL)\n        hdfsFreeFileInfo(pathInfo, 1);\n    if (pathList != NULL)\n        hdfsFreeFileInfo(pathList, numEntries);\n\n    return retval;\n}\n\nPyObject *FsClass_move(FsInfo *self, PyObject *args, PyObject *kwds) {\n\n    FsInfo* to_hdfs = NULL;\n    char *from_path = NULL, *to_path = NULL;\n    int result = 0;\n\n    if (! PyArg_ParseTuple(args, \"esOes\", \"utf-8\", &from_path,\n                &to_hdfs, \"utf-8\", &to_path)) {\n        return NULL;\n    }\n\n    if (str_empty(from_path) || str_empty(to_path)) {\n        PyMem_Free(from_path);\n        PyMem_Free(to_path);\n        PyErr_SetString(PyExc_ValueError, \"Empty path\");\n        return NULL;\n    }\n\n    Py_BEGIN_ALLOW_THREADS;\n        result = hdfsMove(self->_fs, from_path, to_hdfs->_fs, to_path);\n    Py_END_ALLOW_THREADS;\n    PyMem_Free(from_path);\n    PyMem_Free(to_path);\n\n    if (result < 0) {\n        return PyErr_SetFromErrno(PyExc_IOError);\n    }\n    Py_RETURN_NONE;\n}\n\n\nPyObject *FsClass_rename(FsInfo *self, PyObject *args, PyObject *kwds) {\n\n    char *from_path = NULL, *to_path = NULL;\n    int result = 0;\n\n    if (! PyArg_ParseTuple(args, \"eses\", \"utf-8\", &from_path, \"utf-8\", &to_path))\n        return NULL;\n\n    if (str_empty(from_path) || str_empty(to_path)) {\n        PyMem_Free(from_path);\n        PyMem_Free(to_path);\n        PyErr_SetString(PyExc_ValueError, \"Empty path\");\n        return NULL;\n    }\n\n    Py_BEGIN_ALLOW_THREADS;\n        result = hdfsRename(self->_fs, from_path, to_path);\n    Py_END_ALLOW_THREADS;\n    PyMem_Free(from_path);\n    PyMem_Free(to_path);\n    if (result < 0) {\n        return PyErr_SetFromErrno(PyExc_IOError);\n    }\n    Py_RETURN_NONE;\n}\n\n\nPyObject *FsClass_delete(FsInfo *self, PyObject *args, PyObject *kwds) {\n\n    char* path = NULL;\n    int recursive = 1;\n    int result = 0;\n\n    if (!PyArg_ParseTuple(args, \"es|i\", \"utf-8\", &path, &recursive)) {\n        return NULL;\n    }\n\n    if (str_empty(path)) {\n        PyMem_Free(path);\n        PyErr_SetString(PyExc_ValueError, \"Empty path\");\n        return NULL;\n    }\n\n    Py_BEGIN_ALLOW_THREADS;\n        result = hdfsDelete(self->_fs, path, recursive);\n    Py_END_ALLOW_THREADS;\n    PyMem_Free(path);\n\n    if (result < 0) {\n        return PyErr_SetFromErrno(PyExc_IOError);\n    }\n    Py_RETURN_NONE;\n}\n\n\nPyObject *FsClass_chmod(FsInfo *self, PyObject *args, PyObject *kwds) {\n\n    char* path = NULL;\n    short mode = 1;\n    int result = 0;\n\n    if (!PyArg_ParseTuple(args, \"esh\", \"utf-8\", &path, &mode)) {\n        return NULL;\n    }\n\n    if (str_empty(path)) {\n        PyMem_Free(path);\n        PyErr_SetString(PyExc_ValueError, \"Empty path\");\n        return NULL;\n    }\n\n    Py_BEGIN_ALLOW_THREADS;\n        // hdfsChmod doesn't always set errno in case of error.  We clear it\n        // here so that after the call we'll be sure we're not looking at an old value\n        errno = 0;\n        result = hdfsChmod(self->_fs, path, mode);\n    Py_END_ALLOW_THREADS;\n    PyMem_Free(path);\n\n    if (result >= 0) {\n        Py_RETURN_NONE;\n    }\n    else {\n        // there's been an error\n        if (errno) {\n            return PyErr_SetFromErrno(PyExc_IOError);\n\t}\n        else {\n            PyErr_SetString(PyExc_IOError, \"Unknown error\");\n\t    return NULL;\n        }\n    }\n}\n\n\nPyObject *FsClass_chown(FsInfo *self, PyObject *args, PyObject *kwds) {\n\n    char *path = NULL, *input_user = NULL, *input_group = NULL;\n    int result = 0;\n    hdfsFileInfo* fileInfo = NULL;\n\n    if (! PyArg_ParseTuple(args, \"es|eses\",\n                \"utf-8\", &path, \"utf-8\", &input_user, \"utf-8\", &input_group)) {\n        return NULL;\n    }\n\n    if (str_empty(path)) {\n        PyMem_Free(path);\n        PyMem_Free(input_user);\n        PyMem_Free(input_group);\n        PyErr_SetString(PyExc_ValueError, \"Empty path\");\n        return NULL;\n    }\n\n    Py_BEGIN_ALLOW_THREADS;\n        fileInfo = hdfsGetPathInfo(self->_fs, path);\n        if (NULL == fileInfo) {\n            PyMem_Free(path);\n            PyMem_Free(input_user);\n            PyMem_Free(input_group);\n\t    return PyErr_SetFromErrno(PyExc_IOError);\n        }\n        const char* new_user = str_empty(input_user) ? fileInfo->mOwner : input_user;\n        const char* new_group = str_empty(input_group) ? fileInfo->mGroup : input_group;\n        result = hdfsChown(self->_fs, path, new_user, new_group);\n    Py_END_ALLOW_THREADS;\n    PyMem_Free(path);\n    PyMem_Free(input_user);\n    PyMem_Free(input_group);\n    hdfsFreeFileInfo(fileInfo, 1);\n    if (result < 0) {\n        return PyErr_SetFromErrno(PyExc_IOError);\n    }\n    Py_RETURN_NONE;\n}\n\n\nPyObject *FsClass_utime(FsInfo *self, PyObject *args, PyObject *kwds) {\n\n    char* path = NULL;\n    tTime mtime = 0, atime = 0;\n    int result = 0;\n\n    if (! PyArg_ParseTuple(args, \"esll\", \"utf-8\", &path, &mtime, &atime)) {\n        return NULL;\n    }\n\n    if (str_empty(path)) {\n        PyMem_Free(path);\n        PyErr_SetString(PyExc_ValueError, \"Empty path\");\n        return NULL;\n    }\n\n    Py_BEGIN_ALLOW_THREADS;\n        result = hdfsUtime(self->_fs, path, mtime, atime);\n    Py_END_ALLOW_THREADS;\n    PyMem_Free(path);\n\n    if (result < 0) {\n        return PyErr_SetFromErrno(PyExc_IOError);\n    }\n    Py_RETURN_NONE;\n}\n"
  },
  {
    "path": "src/native_core_hdfs/hdfs_fs.h",
    "content": "/* BEGIN_COPYRIGHT\n *\n * Copyright 2009-2026 CRS4.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n * use this file except in compliance with the License. You may obtain a copy\n * of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n * License for the specific language governing permissions and limitations\n * under the License.\n *\n * END_COPYRIGHT\n */\n\n#ifndef PYTHON_HDFS_FS_TYPE\n#define PYTHON_HDFS_FS_TYPE\n\n#include <Python.h>\n#include <string>\n#include <map>\n#include <utility>  // std::pair support\n#include <iostream>\n#include <errno.h>\n#include <typeinfo>\n\n#include <hdfs/hdfs.h>\n#include <structmember.h>\n\n#include \"../py3k_compat.h\"\n\n\n#define MODE_READ \"r\"\n#define MODE_WRITE \"w\"\n#define MODE_APPEND \"a\"\n\n\ntypedef struct {\n    PyObject_HEAD\n    char *host;\n    int port;\n    char *user;\n    char *group;\n    hdfsFS _fs;\n} FsInfo;\n\n\nPyObject* FsClass_new(PyTypeObject* type, PyObject *args, PyObject *kwds);\n\nvoid FsClass_dealloc(FsInfo* self);\n\nint FsClass_init(FsInfo* self, PyObject *args, PyObject *kwds);\n\nPyObject* FsClass_close(FsInfo* self);\n\nPyObject* FsClass_get_working_directory(FsInfo* self);\n\nPyObject* FsClass_get_default_block_size(FsInfo* self);\n\nPyObject* FsClass_get_path_info(FsInfo* self, PyObject *args, PyObject *kwds);\n\nPyObject* FsClass_get_hosts(FsInfo* self, PyObject *args, PyObject *kwds);\n\nPyObject* FsClass_get_used(FsInfo* self);\n\nPyObject* FsClass_get_capacity(FsInfo* self);\n\nPyObject* FsClass_set_replication(FsInfo* self, PyObject *args, PyObject *kwds);\n\nPyObject* FsClass_set_working_directory(FsInfo* self, PyObject *args, PyObject *kwds);\n\nPyObject* FsClass_open_file(FsInfo* self, PyObject *args, PyObject *kwds);\n\nPyObject* FsClass_copy(FsInfo* self, PyObject *args, PyObject *kwds);\n\nPyObject* FsClass_exists(FsInfo* self, PyObject *args, PyObject *kwds);\n\nPyObject*FsClass_list_directory(FsInfo *self, PyObject *args, PyObject *kwds);\n\nPyObject* FsClass_create_directory(FsInfo* self, PyObject *args, PyObject *kwds);\n\nPyObject* FsClass_rename(FsInfo* self, PyObject *args, PyObject *kwds);\n\nPyObject* FsClass_move(FsInfo* self, PyObject *args, PyObject *kwds);\n\nPyObject* FsClass_delete(FsInfo* self, PyObject *args, PyObject *kwds);\n\nPyObject* FsClass_chmod(FsInfo* self, PyObject *args, PyObject *kwds);\n\nPyObject* FsClass_chown(FsInfo* self, PyObject *args, PyObject *kwds);\n\nPyObject* FsClass_utime(FsInfo* self, PyObject *args, PyObject *kwds);\n\n#endif\n"
  },
  {
    "path": "src/native_core_hdfs/hdfs_module.cc",
    "content": "/* BEGIN_COPYRIGHT\n *\n * Copyright 2009-2026 CRS4.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n * use this file except in compliance with the License. You may obtain a copy\n * of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n * License for the specific language governing permissions and limitations\n * under the License.\n *\n * END_COPYRIGHT\n */\n\n#include <Python.h>\n\n#if PY_MAJOR_VERSION >= 3\n#define IS_PY3K 1\n#endif\n\n#include \"hdfs_fs.h\"\n#include \"hdfs_file.h\"\n#include <jni.h>\n\nstatic char* module__name__ = \"native_core_hdfs\";\nstatic char* module__doc__ = \"native_hdfs_core implementation\";\n\n/* FsType */\nstatic PyMemberDef FsClass_members[] = {\n  {NULL}  /* Sentinel */\n};\n\nstatic PyMethodDef FsClass_methods[] = {\n  {\"get_working_directory\", (PyCFunction) FsClass_get_working_directory,\n   METH_NOARGS, \"Get the current working directory\"},\n  {\"get_path_info\", (PyCFunction) FsClass_get_path_info, METH_VARARGS,\n   \"Get information on a file or directory\"},\n  {\"get_default_block_size\", (PyCFunction) FsClass_get_default_block_size,\n   METH_NOARGS, \"Get the default block size\"},\n  {\"get_hosts\", (PyCFunction) FsClass_get_hosts, METH_VARARGS,\n   \"Get the names of the hosts where a file is stored\"},\n  {\"get_capacity\", (PyCFunction) FsClass_get_capacity, METH_VARARGS,\n   \"Get the raw capacity of the filesystem\"},\n  {\"get_used\", (PyCFunction) FsClass_get_used, METH_NOARGS,\n   \"Get the total raw size of all files in the filesystem.\"},\n  {\"set_replication\", (PyCFunction) FsClass_set_replication, METH_VARARGS,\n   \"Set the replication factor for a file\"},\n  {\"set_working_directory\", (PyCFunction) FsClass_set_working_directory,\n   METH_VARARGS, \"Set the current working directory\"},\n  {\"open_file\", (PyCFunction) FsClass_open_file, METH_VARARGS, \"Open a file\"},\n  {\"close\", (PyCFunction) FsClass_close, METH_NOARGS,\n   \"Close the HDFS connection\"},\n  {\"copy\", (PyCFunction) FsClass_copy, METH_VARARGS, \"Copy the given file\"},\n  {\"create_directory\", (PyCFunction) FsClass_create_directory, METH_VARARGS,\n   \"Create a directory with the given name\"},\n  {\"list_directory\", (PyCFunction) FsClass_list_directory, METH_VARARGS,\n   \"Get the contents of a directory\"},\n  {\"move\", (PyCFunction) FsClass_move, METH_VARARGS, \"Move the given file\"},\n  {\"rename\", (PyCFunction) FsClass_rename, METH_VARARGS,\n   \"Rename the given file\"},\n  {\"delete\", (PyCFunction) FsClass_delete, METH_VARARGS,\n   \"Delete the given file or directory\"},\n  {\"exists\", (PyCFunction) FsClass_exists, METH_VARARGS,\n   \"Check if the given path exists on the filesystem\"},\n  {\"chmod\", (PyCFunction) FsClass_chmod, METH_VARARGS, \"Change file mode\"},\n  {\"chown\", (PyCFunction) FsClass_chown, METH_VARARGS,\n   \"Change file owner and group\"},\n  {\"utime\", (PyCFunction) FsClass_utime, METH_VARARGS,\n   \"Change file last access and modification time\"},\n  {NULL}  /* Sentinel */\n};\n\nstatic PyTypeObject FsType = {\n  PyVarObject_HEAD_INIT(NULL, 0)  \n  \"native_core_hdfs.CoreHdfsFs\",            /* tp_name */\n  sizeof(FsInfo),                           /* tp_basicsize */\n  0,                                        /* tp_itemsize */\n  (destructor) FsClass_dealloc,             /* tp_dealloc */\n  0,                                        /* tp_print */\n  0,                                        /* tp_getattr */\n  0,                                        /* tp_setattr */\n  0,                                        /* tp_compare */\n  0,                                        /* tp_repr */\n  0,                                        /* tp_as_number */\n  0,                                        /* tp_as_sequence */\n  0,                                        /* tp_as_mapping */\n  0,                                        /* tp_hash */\n  0,                                        /* tp_call */\n  0,                                        /* tp_str */\n  0,                                        /* tp_getattro */\n  0,                                        /* tp_setattro */\n  0,                                        /* tp_as_buffer */\n  Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */\n  \"Hdfs FS objects\",                        /* tp_doc */\n  0,                                        /* tp_traverse */\n  0,                                        /* tp_clear */\n  0,                                        /* tp_richcompare */\n  0,                                        /* tp_weaklistoffset */\n  0,                                        /* tp_iter */\n  0,                                        /* tp_iternext */\n  FsClass_methods,                          /* tp_methods */\n  FsClass_members,                          /* tp_members */\n  0,                                        /* tp_getset */\n  0,                                        /* tp_base */\n  0,                                        /* tp_dict */\n  0,                                        /* tp_descr_get */\n  0,                                        /* tp_descr_set */\n  0,                                        /* tp_dictoffset */\n  (initproc) FsClass_init,                  /* tp_init */\n  0,                                        /* tp_alloc */\n  FsClass_new,                              /* tp_new */\n};\n\n\n/* FileType */\nstatic PyMemberDef FileClass_members[] = {\n  {NULL}  /* Sentinel */\n};\n\nstatic PyGetSetDef FileClass_getseters[] = {\n  {\"closed\", (getter)FileClass_getclosed, NULL, NULL},\n  {\"buff_size\", (getter)FileClass_getbuff_size, NULL, NULL},\n  {\"name\", (getter)FileClass_getname, NULL, NULL},\n  {\"mode\", (getter)FileClass_getmode, NULL, NULL},\n  {NULL}  /* Sentinel */\n};\n\nstatic PyMethodDef FileClass_methods[] = {\n  {\"close\", (PyCFunction)FileClass_close, METH_NOARGS, \"Close the file\"},\n  {\"readable\", (PyCFunction)FileClass_readable, METH_NOARGS,\n   \"True if the file can be read from\"},\n  {\"writable\", (PyCFunction)FileClass_writable, METH_NOARGS,\n   \"True if the file can be written to\"},\n  {\"seekable\", (PyCFunction)FileClass_seekable, METH_NOARGS,\n   \"True if the file support random access (it does if it's readable)\"},\n  {\"available\", (PyCFunction) FileClass_available, METH_NOARGS,\n   \"Number of bytes that can be read without blocking\"},\n  {\"write\", (PyCFunction)FileClass_write, METH_VARARGS, \"Write to the file\"},\n  {\"flush\", (PyCFunction) FileClass_flush, METH_NOARGS,\n   \"Force any buffered output to be written\"},\n  {\"read\", (PyCFunction) FileClass_read, METH_VARARGS, \"Read from the file\"},\n  {\"read_chunk\", (PyCFunction) FileClass_read_chunk, METH_VARARGS,\n   \"Like read, but store data to the given buffer\"},\n  /* Also export read_chunk as readinto for compatibility with Python io */\n  {\"readinto\", (PyCFunction) FileClass_read_chunk, METH_VARARGS,\n   \"Like read, but store data to the given buffer\"},\n  {\"pread\", (PyCFunction) FileClass_pread, METH_VARARGS,\n   \"Read starting from the given position\"},\n  {\"pread_chunk\", (PyCFunction) FileClass_pread_chunk, METH_VARARGS,\n   \"Like pread, but store data to the given buffer\"},\n  {\"seek\", (PyCFunction) FileClass_seek, METH_VARARGS,\n   \"Seek to the given position\"},\n  {\"tell\", (PyCFunction) FileClass_tell, METH_NOARGS,\n   \"Get the current position\"},\n  {NULL}  /* Sentinel */\n};\n\nstatic PyTypeObject FileType = {\n  PyVarObject_HEAD_INIT(NULL, 0)  \n  \"native_core_hdfs.CoreHdfsFile\",          /* tp_name */\n  sizeof(FileInfo),                         /* tp_basicsize */\n  0,                                        /* tp_itemsize */\n  (destructor)FileClass_dealloc,            /* tp_dealloc */\n  0,                                        /* tp_print */\n  0,                                        /* tp_getattr */\n  0,                                        /* tp_setattr */\n  0,                                        /* tp_compare */\n  0,                                        /* tp_repr */\n  0,                                        /* tp_as_number */\n  0,                                        /* tp_as_sequence */\n  0,                                        /* tp_as_mapping */\n  0,                                        /* tp_hash */\n  0,                                        /* tp_call */\n  0,                                        /* tp_str */\n  0,                                        /* tp_getattro */\n  0,                                        /* tp_setattro */\n  0,                                        /* tp_as_buffer */\n  Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */\n  \"Hdfs File objects\",                      /* tp_doc */\n  0,                                        /* tp_traverse */\n  0,                                        /* tp_clear */\n  0,                                        /* tp_richcompare */\n  0,                                        /* tp_weaklistoffset */\n  0,                                        /* tp_iter */\n  0,                                        /* tp_iternext */\n  FileClass_methods,                        /* tp_methods */\n  FileClass_members,                        /* tp_members */\n  FileClass_getseters,                      /* tp_getset */\n  0,                                        /* tp_base */\n  0,                                        /* tp_dict */\n  0,                                        /* tp_descr_get */\n  0,                                        /* tp_descr_set */\n  0,                                        /* tp_dictoffset */\n  (initproc)FileClass_init,                 /* tp_init */\n  0,                                        /* tp_alloc */\n  FileClass_new,                            /* tp_new */\n};\n\n\nstatic PyMethodDef module_methods[] = {\n        {NULL}  /* Sentinel */\n};\n\n\n#ifndef PyMODINIT_FUNC\t/* declarations for DLL import/export */\n#define PyMODINIT_FUNC void\n#endif\n\n\n#if IS_PY3K\nstatic struct PyModuleDef module_def = {\n  PyModuleDef_HEAD_INIT,\n  module__name__, /* m_name */\n  module__doc__,  /* m_doc */\n  -1,                  /* m_size */\n  module_methods,    /* m_methods */\n  NULL,                /* m_reload */\n  NULL,                /* m_traverse */\n  NULL,                /* m_clear */\n  NULL,                /* m_free */\n};\n#endif\n\n\n#if IS_PY3K\n\nPyMODINIT_FUNC\nPyInit_native_core_hdfs(void)\n{\n  PyObject* m;\n\n  if (PyType_Ready(&FsType) < 0)\n    return NULL;\n  if (PyType_Ready(&FileType) < 0)\n    return NULL;\n  m = PyModule_Create(&module_def);\n  if (m == NULL)\n    return NULL;\n\n  Py_INCREF(&FsType);\n  Py_INCREF(&FileType);\n  PyModule_AddObject(m, \"CoreHdfsFs\", (PyObject *)&FsType);\n  PyModule_AddObject(m, \"CoreHdfsFile\", (PyObject *)&FileType);\n\n  return m;\n}\n\n#else\n\nPyMODINIT_FUNC\ninitnative_core_hdfs(void)\n{\n  PyObject* m;\n\n  if (PyType_Ready(&FsType) < 0)\n    return;\n  if (PyType_Ready(&FileType) < 0)\n    return;\n  m = Py_InitModule3(module__name__, module_methods,\n                     module__doc__);\n  if (m == NULL)\n    return;\n\n  Py_INCREF(&FsType);\n  Py_INCREF(&FileType);\n  PyModule_AddObject(m, \"CoreHdfsFs\", (PyObject *)&FsType);\n  PyModule_AddObject(m, \"CoreHdfsFile\", (PyObject *)&FileType);\n\n  PyModule_AddStringConstant(m, \"MODE_READ\", MODE_READ);\n  PyModule_AddStringConstant(m, \"MODE_WRITE\", MODE_WRITE);\n  PyModule_AddStringConstant(m, \"MODE_APPEND\", MODE_APPEND);\n}\n#endif\n\n"
  },
  {
    "path": "src/py3k_compat.h",
    "content": "/* BEGIN_COPYRIGHT\n *\n * Copyright 2009-2026 CRS4.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n * use this file except in compliance with the License. You may obtain a copy\n * of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n * License for the specific language governing permissions and limitations\n * under the License.\n *\n * END_COPYRIGHT\n */\n\n#ifndef PY3K_COMPAT_H\n#define PY3K_COMPAT_H\n\n#if PY_MAJOR_VERSION >= 3\n#define IS_PY3K 1\n#endif\n\n#include \"buf_macros.h\"\n#include \"Py_macros.h\"\n\n#endif \n"
  },
  {
    "path": "src/sercore/HadoopUtils/SerialUtils.cc",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n#include \"SerialUtils.hh\"\n\n#include <errno.h>\n#include <rpc/types.h>\n#include <rpc/xdr.h>\n#include <string>\n#include <string.h>\n\nusing std::string;\n\nnamespace HadoopUtils {\n\n  Error::Error(const std::string& msg): error(msg) {\n  }\n\n  Error::Error(const std::string& msg,\n               const std::string& file, int line,\n               const std::string& function) {\n    error = msg + \" at \" + file + \":\" + std::to_string(line) +\n            \" in \" + function;\n  }\n\n  const std::string& Error::getMessage() const {\n    return error;\n  }\n\n  FileInStream::FileInStream()\n  {\n    mFile = NULL;\n    isOwned = false;\n  }\n\n  bool FileInStream::open(const std::string& name)\n  {\n    mFile = fopen(name.c_str(), \"rb\");\n    isOwned = true;\n    return (mFile != NULL);\n  }\n\n  bool FileInStream::open(FILE* file)\n  {\n    mFile = file;\n    isOwned = false;\n    return (mFile != NULL);\n  }\n\n  void FileInStream::read(void *buf, size_t len)\n  {\n    size_t result = fread(buf, len, 1, mFile);\n    if (result == 0) {\n      if (feof(mFile)) {\n        HADOOP_ASSERT(false, \"end of file\");\n      } else {\n        HADOOP_ASSERT(false, string(\"read error on file: \") + strerror(errno));\n      }\n    }\n  }\n\n  bool FileInStream::skip(size_t nbytes)\n  {\n    return (0==fseek(mFile, nbytes, SEEK_CUR));\n  }\n\n  bool FileInStream::close()\n  {\n    int ret = 0;\n    if (mFile != NULL && isOwned) {\n      ret = fclose(mFile);\n    }\n    mFile = NULL;\n    return (ret==0);\n  }\n\n  FileInStream::~FileInStream()\n  {\n    if (mFile != NULL) {\n      close();\n    }\n  }\n\n  FileOutStream::FileOutStream()\n  {\n    mFile = NULL;\n    isOwned = false;\n  }\n\n  bool FileOutStream::open(const std::string& name, bool overwrite)\n  {\n    if (!overwrite) {\n      mFile = fopen(name.c_str(), \"rb\");\n      if (mFile != NULL) {\n        fclose(mFile);\n        return false;\n      }\n    }\n    mFile = fopen(name.c_str(), \"wb\");\n    isOwned = true;\n    return (mFile != NULL);\n  }\n\n  bool FileOutStream::open(FILE* file)\n  {\n    mFile = file;\n    isOwned = false;\n    return (mFile != NULL);\n  }\n\n  void FileOutStream::write(const void* buf, size_t len)\n  {\n    size_t result = fwrite(buf, len, 1, mFile);\n    HADOOP_ASSERT(result == 1,\n                  string(\"write error to file: \") + strerror(errno));\n  }\n\n  bool FileOutStream::advance(size_t nbytes)\n  {\n    return (0==fseek(mFile, nbytes, SEEK_CUR));\n  }\n\n  bool FileOutStream::close()\n  {\n    int ret = 0;\n    if (mFile != NULL && isOwned) {\n      ret = fclose(mFile);\n    }\n    mFile = NULL;\n    return (ret == 0);\n  }\n\n  void FileOutStream::flush()\n  {\n    fflush(mFile);\n  }\n\n  FileOutStream::~FileOutStream()\n  {\n    if (mFile != NULL) {\n      close();\n    }\n  }\n\n  StringInStream::StringInStream(const std::string& str): buffer(str) {\n    itr = buffer.begin();\n  }\n\n  void StringInStream::read(void *buf, size_t buflen) {\n    size_t bytes = 0;\n    char* output = (char*) buf;\n    std::string::const_iterator end = buffer.end();\n    while (bytes < buflen) {\n      output[bytes++] = *itr;\n      ++itr;\n      if (itr == end) {\n        break;\n      }\n    }\n    HADOOP_ASSERT(bytes == buflen, \"unexpected end of string reached\");\n  }\n\n  void serializeInt(int32_t t, OutStream& stream) {\n    serializeLong(t, stream);\n  }\n\n  void serializeLong(int64_t t, OutStream& stream)\n  {\n    if (t >= -112 && t <= 127) {\n      int8_t b = t;\n      stream.write(&b, 1);\n      return;\n    }\n\n    int8_t len = -112;\n    if (t < 0) {\n      t ^= -1ll; // reset the sign bit\n      len = -120;\n    }\n\n    uint64_t tmp = t;\n    while (tmp != 0) {\n      tmp = tmp >> 8;\n      len--;\n    }\n\n    stream.write(&len, 1);\n    len = (len < -120) ? -(len + 120) : -(len + 112);\n\n    for (uint32_t idx = len; idx != 0; idx--) {\n      uint32_t shiftbits = (idx - 1) * 8;\n      uint64_t mask = 0xFFll << shiftbits;\n      uint8_t b = (t & mask) >> shiftbits;\n      stream.write(&b, 1);\n    }\n  }\n\n  int32_t deserializeInt(InStream& stream) {\n    return deserializeLong(stream);\n  }\n\n  int64_t deserializeLong(InStream& stream)\n  {\n    int8_t b;\n    stream.read(&b, 1);\n    if (b >= -112) {\n      return b;\n    }\n    bool negative;\n    int len;\n    if (b < -120) {\n      negative = true;\n      len = -120 - b;\n    } else {\n      negative = false;\n      len = -112 - b;\n    }\n    uint8_t barr[len];\n    stream.read(barr, len);\n    int64_t t = 0;\n    for (int idx = 0; idx < len; idx++) {\n      t = t << 8;\n      t |= (barr[idx] & 0xFF);\n    }\n    if (negative) {\n      t ^= -1ll;\n    }\n    return t;\n  }\n\n  void serializeFloat(float t, OutStream& stream)\n  {\n    char buf[sizeof(float)];\n    XDR xdrs;\n    xdrmem_create(&xdrs, buf, sizeof(float), XDR_ENCODE);\n    xdr_float(&xdrs, &t);\n    stream.write(buf, sizeof(float));\n  }\n\n  float deserializeFloat(InStream& stream)\n  {\n    float f;\n    deserializeFloat(f, stream);\n    return f;\n  }\n\n  void deserializeFloat(float& t, InStream& stream)\n  {\n    char buf[sizeof(float)];\n    stream.read(buf, sizeof(float));\n    XDR xdrs;\n    xdrmem_create(&xdrs, buf, sizeof(float), XDR_DECODE);\n    xdr_float(&xdrs, &t);\n  }\n\n  void serializeString(const std::string& t, OutStream& stream)\n  {\n    serializeInt(t.length(), stream);\n    if (t.length() > 0) {\n      stream.write(t.data(), t.length());\n    }\n  }\n\n  void deserializeString(std::string& t, InStream& stream)\n  {\n    int32_t len = deserializeInt(stream);\n    if (len > 0) {\n      // resize the string to the right length\n      t.resize(len);\n      // read into the string in 64k chunks\n      const int bufSize = 65536;\n      int offset = 0;\n      char buf[bufSize];\n      while (len > 0) {\n        int chunkLength = len > bufSize ? bufSize : len;\n        stream.read(buf, chunkLength);\n        t.replace(offset, chunkLength, buf, chunkLength);\n        offset += chunkLength;\n        len -= chunkLength;\n      }\n    } else {\n      t.clear();\n    }\n  }\n\n}\n"
  },
  {
    "path": "src/sercore/HadoopUtils/SerialUtils.hh",
    "content": "/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n#ifndef HADOOP_SERIAL_UTILS_HH\n#define HADOOP_SERIAL_UTILS_HH\n\n#include <string>\n#include <stdint.h>\n\nnamespace HadoopUtils {\n\n  /**\n   * A simple exception class that records a message for the user.\n   */\n  class Error {\n  private:\n    std::string error;\n  public:\n\n    /**\n     * Create an error object with the given message.\n     */\n    Error(const std::string& msg);\n\n    /**\n     * Construct an error object with the given message that was created on\n     * the given file, line, and functino.\n     */\n    Error(const std::string& msg,\n          const std::string& file, int line, const std::string& function);\n\n    /**\n     * Get the error message.\n     */\n    const std::string& getMessage() const;\n  };\n\n  /**\n   * Check to make sure that the condition is true, and throw an exception\n   * if it is not. The exception will contain the message and a description\n   * of the source location.\n   */\n  #define HADOOP_ASSERT(CONDITION, MESSAGE) \\\n    { \\\n      if (!(CONDITION)) { \\\n        throw HadoopUtils::Error((MESSAGE), __FILE__, __LINE__, \\\n                                    __func__); \\\n      } \\\n    }\n\n  /**\n   * An interface for an input stream.\n   */\n  class InStream {\n  public:\n    /**\n     * Reads len bytes from the stream into the buffer.\n     * @param buf the buffer to read into\n     * @param buflen the length of the buffer\n     * @throws Error if there are problems reading\n     */\n    virtual void read(void *buf, size_t len) = 0;\n    virtual ~InStream() {}\n  };\n\n  /**\n   * An interface for an output stream.\n   */\n  class OutStream {\n  public:\n    /**\n     * Write the given buffer to the stream.\n     * @param buf the data to write\n     * @param len the number of bytes to write\n     * @throws Error if there are problems writing\n     */\n    virtual void write(const void *buf, size_t len) = 0;\n    /**\n     * Flush the data to the underlying store.\n     */\n    virtual void flush() = 0;\n    virtual ~OutStream() {}\n  };\n\n  /**\n   * A class to read a file as a stream.\n   */\n  class FileInStream : public InStream {\n  public:\n    FileInStream();\n    bool open(const std::string& name);\n    bool open(FILE* file);\n    void read(void *buf, size_t buflen);\n    bool skip(size_t nbytes);\n    bool close();\n    virtual ~FileInStream();\n  private:\n    /**\n     * The file to write to.\n     */\n    FILE *mFile;\n    /**\n     * Does is this class responsible for closing the FILE*?\n     */\n    bool isOwned;\n  };\n\n  /**\n   * A class to write a stream to a file.\n   */\n  class FileOutStream: public OutStream {\n  public:\n\n    /**\n     * Create a stream that isn't bound to anything.\n     */\n    FileOutStream();\n\n    /**\n     * Create the given file, potentially overwriting an existing file.\n     */\n    bool open(const std::string& name, bool overwrite);\n    bool open(FILE* file);\n    void write(const void* buf, size_t len);\n    bool advance(size_t nbytes);\n    void flush();\n    bool close();\n    virtual ~FileOutStream();\n  private:\n    FILE *mFile;\n    bool isOwned;\n  };\n\n  /**\n   * A stream that reads from a string.\n   */\n  class StringInStream: public InStream {\n  public:\n    StringInStream(const std::string& str);\n    virtual void read(void *buf, size_t buflen);\n  private:\n    const std::string& buffer;\n    std::string::const_iterator itr;\n  };\n\n  void serializeInt(int32_t t, OutStream& stream);\n  int32_t deserializeInt(InStream& stream);\n  void serializeLong(int64_t t, OutStream& stream);\n  int64_t deserializeLong(InStream& stream);\n  void serializeFloat(float t, OutStream& stream);\n  void deserializeFloat(float& t, InStream& stream);\n  float deserializeFloat(InStream& stream);\n  void serializeString(const std::string& t, OutStream& stream);\n  void deserializeString(std::string& t, InStream& stream);\n}\n\n#endif\n"
  },
  {
    "path": "src/sercore/hu_extras.cpp",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\n#include \"HadoopUtils/SerialUtils.hh\"\n\n#define INT64_SIZE sizeof(int64_t)\n\n\nint64_t deserializeLongWritable(HadoopUtils::InStream& stream) {\n  int64_t rval = 0;\n  unsigned char bytes[INT64_SIZE];\n  stream.read(bytes, INT64_SIZE);\n  for (std::size_t i = 0; i < INT64_SIZE; ++i) {\n    rval = (rval << INT64_SIZE) | bytes[i];\n  }\n  return rval;\n}\n"
  },
  {
    "path": "src/sercore/hu_extras.h",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\n#pragma once\n\n#include \"HadoopUtils/SerialUtils.hh\"\n\n/**\n * Read a hadoop.io.LongWritable (java.io.DataInput.readLong).\n */\nint64_t deserializeLongWritable(HadoopUtils::InStream& stream);\n"
  },
  {
    "path": "src/sercore/sercore.cpp",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\n#include <Python.h>\n\n#include \"hu_extras.h\"\n#include \"streams.h\"\n\nconst char* m_name = \"sercore\";\nconst char* m_doc = \"core serialization utils\";\n\n#if PY_MAJOR_VERSION >= 3\n#define PY3\n#define INIT_RETURN(V) return V;\n#else\n#define INIT_RETURN(V) return;\n#endif\n\n\n// Deserializes a hadoop.(mapred|mapreduce.lib.input).FileSplit\nstatic PyObject *\ndeserializeFileSplit(PyObject *self, PyObject *args) {\n  PyObject *data, *rval;\n  Py_buffer buffer = {NULL, NULL};\n  PyThreadState *state;\n  if (!PyArg_ParseTuple(args, \"O\", &data)) {\n    return NULL;\n  }\n  if (PyObject_GetBuffer(data, &buffer, PyBUF_SIMPLE) < 0) {\n    PyErr_SetString(PyExc_TypeError, \"data not accessible as a buffer\");\n    return NULL;\n  }\n\n  // deserialize fields\n  std::string s((const char*)buffer.buf, buffer.len);\n  HadoopUtils::StringInStream stream(s);\n  std::string fname;\n  int64_t offset, length;\n  state = PyEval_SaveThread();\n  try {\n    HadoopUtils::deserializeString(fname, stream);\n    offset = deserializeLongWritable(stream);\n    length = deserializeLongWritable(stream);\n  } catch (HadoopUtils::Error e) {\n    PyEval_RestoreThread(state);\n    PyBuffer_Release(&buffer);\n    PyErr_SetString(PyExc_IOError, e.getMessage().c_str());\n    return NULL;\n  }\n  PyEval_RestoreThread(state);\n  PyBuffer_Release(&buffer);\n\n  // build output tuple\n  PyObject *_fname, *_offset, *_length;\n  if (!(_fname = PyUnicode_FromStringAndSize(fname.c_str(), fname.size()))) {\n    return NULL;\n  }\n  if (!(_offset = Py_BuildValue(\"L\", offset))) {\n    return NULL;\n  }\n  if (!(_length = Py_BuildValue(\"L\", length))) {\n    return NULL;\n  }\n  if (!(rval = PyTuple_New(3))) {\n    return NULL;\n  }\n  PyTuple_SET_ITEM(rval, 0, _fname);\n  PyTuple_SET_ITEM(rval, 1, _offset);\n  PyTuple_SET_ITEM(rval, 2, _length);\n  return rval;\n}\n\n\nstatic PyMethodDef SercoreMethods[] = {\n  {\"deserialize_file_split\", deserializeFileSplit, METH_VARARGS,\n   \"deserialize_file_split(data): deserialize a Hadoop FileSplit\"},\n  {NULL}\n};\n\n\n#ifdef PY3\nstatic struct PyModuleDef module_def = {\n  PyModuleDef_HEAD_INIT,\n  m_name,\n  m_doc,\n  0,\n  SercoreMethods,\n  NULL,\n  NULL,\n  NULL,\n  NULL\n};\n#endif\n\n\nPyMODINIT_FUNC\n#ifdef PY3\nPyInit_sercore(void) {\n#else\ninitsercore(void) {\n#endif\n  PyObject *m;\n  FileInStreamType.tp_new = PyType_GenericNew;\n  if (PyType_Ready(&FileInStreamType) < 0) {\n    INIT_RETURN(NULL);;\n  }\n  FileOutStreamType.tp_new = PyType_GenericNew;\n  if (PyType_Ready(&FileOutStreamType) < 0) {\n    INIT_RETURN(NULL);;\n  }\n#ifdef PY3\n  m = PyModule_Create(&module_def);\n#else\n  m = Py_InitModule3(m_name, SercoreMethods, m_doc);\n#endif\n  if (!m) {\n    INIT_RETURN(NULL);;\n  }\n  Py_INCREF(&FileInStreamType);\n  PyModule_AddObject(m, \"FileInStream\", (PyObject *)&FileInStreamType);\n  Py_INCREF(&FileOutStreamType);\n  PyModule_AddObject(m, \"FileOutStream\", (PyObject *)&FileOutStreamType);\n  INIT_RETURN(m);\n}\n"
  },
  {
    "path": "src/sercore/streams.cpp",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\n// WARNING: types defined here are **NOT** designed for inheritance. For\n// instance, FileInStream_readTuple calls other FileInStream_read* methods\n// directly at the C++ level. Since they are not part of the public API ---\n// exactly one input and one output stream are used in the pipes protocol, and\n// that's it --- we can make the code simpler and more efficient.\n\n#define PY_SSIZE_T_CLEAN  // must be defined before including Python.h\n\n#include <Python.h>\n\n#include <string>\n#include <memory>\n#include <cstdlib>\n#include <cstdint>\n#include <cstdio>\n\n#include \"hu_extras.h\"\n#include \"streams.h\"\n\n#define OUTPUT 50\n#define PARTITIONED_OUTPUT 51\n\n\n// This can only be used in functions that return a PyObject*\n# define _ASSERT_STREAM_OPEN {                                           \\\n  if (self->closed) {                                                    \\\n    PyErr_SetString(PyExc_ValueError, \"I/O operation on closed stream\"); \\\n    return NULL;                                                         \\\n  }                                                                      \\\n}\n\n// PyFile_AsFile is only available in Python 2, for \"old style\" file objects\n// This should work on anything associated to a file descriptor\nFILE *\n_PyFile_AsFile(PyObject *f, const char* mode) {\n  int fd, newfd;\n  FILE *fp;\n  PyThreadState *state;\n  if ((fd = PyObject_AsFileDescriptor(f)) == -1) {\n    return NULL;\n  }\n  state = PyEval_SaveThread();\n  if ((newfd = dup(fd)) == -1) {\n    goto error;\n  }\n  if (!(fp = fdopen(newfd, mode))) {\n    goto error;\n  }\n  PyEval_RestoreThread(state);\n  return fp;\n\nerror:\n    PyEval_RestoreThread(state);\n    PyErr_SetFromErrno(PyExc_IOError);\n    return NULL;\n}\n\n\nstatic int\nFileInStream_init(FileInStreamObj *self, PyObject *args, PyObject *kwds) {\n  const char *filename;\n  PyThreadState *state;\n  self->stream = std::make_shared<HadoopUtils::FileInStream>();\n  if (PyArg_ParseTuple(args, \"es\", \"utf-8\", &filename)) {\n    state = PyEval_SaveThread();\n    if (!self->stream->open(std::string(filename))) {\n      PyEval_RestoreThread(state);\n      PyErr_SetFromErrno(PyExc_IOError);\n      PyMem_Free((void*)filename);\n      return -1;\n    }\n    PyEval_RestoreThread(state);\n    PyMem_Free((void*)filename);\n  } else {\n    PyErr_Clear();\n    PyObject *inarg;\n    if (!PyArg_ParseTuple(args, \"O\", &inarg)) {\n      return -1;\n    }\n    if (!(self->fp = _PyFile_AsFile(inarg, \"rb\"))) {\n      return -1;\n    }\n    self->stream->open(self->fp);  // this variant just stores a reference\n  }\n  self->closed = false;\n  return 0;\n}\n\n\nstatic PyObject *\nFileInStream_close(FileInStreamObj *self) {\n  PyThreadState *state;\n  if (self->closed) {\n    Py_RETURN_NONE;\n  }\n  state = PyEval_SaveThread();\n  if (self->fp) {\n    fclose(self->fp);\n  }\n  bool res = self->stream->close();\n  if (!res) {\n    PyEval_RestoreThread(state);\n    return PyErr_SetFromErrno(PyExc_IOError);\n  }\n  PyEval_RestoreThread(state);\n  self->closed = true;\n  Py_RETURN_NONE;\n}\n\n\nstatic PyObject *\nFileInStream_enter(FileInStreamObj *self) {\n  _ASSERT_STREAM_OPEN;\n  Py_INCREF(self);\n  return (PyObject*)self;\n}\n\n\nstatic PyObject *\nFileInStream_exit(FileInStreamObj *self, PyObject *args) {\n  return FileInStream_close(self);\n}\n\n\nstatic PyObject *\nFileInStream_read(FileInStreamObj *self, PyObject *args) {\n  size_t len;\n  PyObject *rval;\n  PyThreadState *state;\n  _ASSERT_STREAM_OPEN;\n  if (!PyArg_ParseTuple(args, \"n\", &len)) {\n    return NULL;\n  }\n  if (!(rval = PyBytes_FromStringAndSize(NULL, len))) {\n    return NULL;\n  }\n  state = PyEval_SaveThread();\n  try {\n    self->stream->read(PyBytes_AS_STRING(rval), len);\n  } catch (HadoopUtils::Error e) {\n    PyEval_RestoreThread(state);\n    PyErr_SetString(PyExc_IOError, e.getMessage().c_str());\n    Py_DECREF(rval);\n    return NULL;\n  }\n  PyEval_RestoreThread(state);\n  return rval;\n}\n\n\nstatic PyObject *\nFileInStream_readVInt(FileInStreamObj *self) {\n  int32_t rval;\n  PyThreadState *state;\n  _ASSERT_STREAM_OPEN;\n  state = PyEval_SaveThread();\n  try {\n    rval = HadoopUtils::deserializeInt(*self->stream);\n  } catch (HadoopUtils::Error e) {\n    PyEval_RestoreThread(state);\n    PyErr_SetString(PyExc_IOError, e.getMessage().c_str());\n    return NULL;\n  }\n  PyEval_RestoreThread(state);\n  return Py_BuildValue(\"i\", rval);\n}\n\n\nstatic PyObject *\nFileInStream_readVLong(FileInStreamObj *self) {\n  int64_t rval;\n  PyThreadState *state;\n  _ASSERT_STREAM_OPEN;\n  state = PyEval_SaveThread();\n  try {\n    rval = HadoopUtils::deserializeLong(*self->stream);\n  } catch (HadoopUtils::Error e) {\n    PyEval_RestoreThread(state);\n    PyErr_SetString(PyExc_IOError, e.getMessage().c_str());\n    return NULL;\n  }\n  PyEval_RestoreThread(state);\n  return Py_BuildValue(\"L\", rval);\n}\n\n\nstatic PyObject *\nFileInStream_readFloat(FileInStreamObj *self) {\n  float rval;\n  PyThreadState *state;\n  _ASSERT_STREAM_OPEN;\n  state = PyEval_SaveThread();\n  try {\n    rval = HadoopUtils::deserializeFloat(*self->stream);\n  } catch (HadoopUtils::Error e) {\n    PyEval_RestoreThread(state);\n    PyErr_SetString(PyExc_IOError, e.getMessage().c_str());\n    return NULL;\n  }\n  PyEval_RestoreThread(state);\n  return PyFloat_FromDouble(rval);\n}\n\n\nstd::string\n_FileInStream_read_cppstring(FileInStreamObj *self) {\n  std::string rval;\n  PyThreadState *state;\n  state = PyEval_SaveThread();\n  try {\n    HadoopUtils::deserializeString(rval, *self->stream);\n  } catch (HadoopUtils::Error e) {\n    PyEval_RestoreThread(state);\n    PyErr_SetString(PyExc_IOError, e.getMessage().c_str());\n    throw;\n  }\n  PyEval_RestoreThread(state);\n  return rval;\n}\n\n\nstatic PyObject *\nFileInStream_readString(FileInStreamObj *self) {\n  _ASSERT_STREAM_OPEN;\n  std::string s;\n  try {\n    s = _FileInStream_read_cppstring(self);\n  } catch (HadoopUtils::Error e) {\n    return NULL;\n  }\n  return PyUnicode_FromStringAndSize(s.c_str(), s.size());\n}\n\n\nstatic PyObject *\nFileInStream_readBytes(FileInStreamObj *self) {\n  _ASSERT_STREAM_OPEN;\n  std::string s;\n  try {\n    s = _FileInStream_read_cppstring(self);\n  } catch (HadoopUtils::Error e) {\n    return NULL;\n  }\n  return PyBytes_FromStringAndSize(s.c_str(), s.size());\n}\n\n\nstatic PyObject *\nFileInStream_readTuple(FileInStreamObj *self, PyObject *args) {\n  char *fmt;\n  PyObject *rval;\n  _ASSERT_STREAM_OPEN;\n  if (!PyArg_ParseTuple(args, \"s\", &fmt)) {\n    return NULL;\n  }\n  std::size_t nitems = strlen(fmt);\n  if (!(rval = PyTuple_New(nitems))) {\n    return NULL;\n  }\n  PyObject *item;\n  for (std::size_t i = 0; i < nitems; ++i) {\n    switch(fmt[i]) {\n    case 'i':\n      if (!(item = FileInStream_readVInt(self))) goto error;\n      break;\n    case 'l':\n      if (!(item = FileInStream_readVLong(self))) goto error;\n      break;\n    case 'f':\n      if (!(item = FileInStream_readFloat(self))) goto error;\n      break;\n    case 's':\n      if (!(item = FileInStream_readString(self))) goto error;\n      break;\n    case 'b':\n      if (!(item = FileInStream_readBytes(self))) goto error;\n      break;\n    default:\n      Py_DECREF(rval);\n      return PyErr_Format(PyExc_ValueError, \"Unknown format '%c'\", fmt[i]);\n    }\n    PyTuple_SET_ITEM(rval, i, item);\n  }\n  return rval;\n\nerror:\n  Py_DECREF(rval);\n  return NULL;\n}\n\n\nstatic PyObject *\nFileInStream_skip(FileInStreamObj *self, PyObject *args) {\n  size_t len;\n  PyThreadState *state;\n  _ASSERT_STREAM_OPEN;\n  if (!PyArg_ParseTuple(args, \"n\", &len)) {\n    return NULL;\n  }\n  state = PyEval_SaveThread();\n  bool res = self->stream->skip(len);\n  if (!res) {\n    PyEval_RestoreThread(state);\n    return PyErr_SetFromErrno(PyExc_IOError);\n  }\n  PyEval_RestoreThread(state);\n  Py_RETURN_NONE;\n}\n\n\n// Extra types not used directly by the protocol, but that may appear as a\n// result of serializing objects such as keys, values and input splits.\n// **NOTE**: within the command stream, each serialized object starts with a\n// VInt that specifies its length. For instance, to read a LongWritable key:\n//     assert stream.read_vint() == 8\n//     key = stream.read_long_writable()\n// Equivalent, but probably less efficient:\n//     key_bytes = stream.read_bytes()\n//     assert len(key_bytes) == 8\n//     key = struct.unpack(\">q\", key_bytes)[0]\n\nstatic PyObject *\nFileInStream_readLongWritable(FileInStreamObj *self) {\n  int64_t rval = 0;\n  PyThreadState *state;\n  _ASSERT_STREAM_OPEN;\n  state = PyEval_SaveThread();\n  try {\n    rval = deserializeLongWritable(*self->stream);\n  } catch (HadoopUtils::Error e) {\n    PyEval_RestoreThread(state);\n    PyErr_SetString(PyExc_IOError, e.getMessage().c_str());\n    return NULL;\n  }\n  PyEval_RestoreThread(state);\n  return Py_BuildValue(\"L\", rval);\n}\n\n\nstatic PyMethodDef FileInStream_methods[] = {\n  {\"close\", (PyCFunction)FileInStream_close, METH_NOARGS,\n   \"close(): close the currently open file\"},\n  {\"read\", (PyCFunction)FileInStream_read, METH_VARARGS,\n   \"read(len): read len bytes from the stream\"},\n  {\"read_vint\", (PyCFunction)FileInStream_readVInt, METH_NOARGS,\n   \"read_vint(): read a variable length integer from the stream\"},\n  {\"read_vlong\", (PyCFunction)FileInStream_readVLong, METH_NOARGS,\n   \"read_vlong(): read a variable length long integer from the stream\"},\n  {\"read_float\", (PyCFunction)FileInStream_readFloat, METH_NOARGS,\n   \"read_float(): read a float from the stream\"},\n  {\"read_string\", (PyCFunction)FileInStream_readString, METH_NOARGS,\n   \"read_string(): read a string from the stream\"},\n  {\"read_bytes\", (PyCFunction)FileInStream_readBytes, METH_NOARGS,\n   \"read_bytes(): read a bytes object from the stream\"},\n  {\"read_tuple\", (PyCFunction)FileInStream_readTuple, METH_VARARGS,\n   \"read_tuple(fmt): read len(fmt) values, where fmt specifies types\"},\n  {\"skip\", (PyCFunction)FileInStream_skip, METH_VARARGS,\n   \"skip(len): skip len bytes\"},\n  {\"__enter__\", (PyCFunction)FileInStream_enter, METH_NOARGS},\n  {\"__exit__\", (PyCFunction)FileInStream_exit, METH_VARARGS},\n  {\"read_long_writable\", (PyCFunction)FileInStream_readLongWritable,\n   METH_NOARGS, \"read_long_writable(): read a hadoop.io.LongWritable\"},\n  {NULL}  /* Sentinel */\n};\n\n\nPyTypeObject FileInStreamType = {\n    PyVarObject_HEAD_INIT(NULL, 0)\n    \"sercore.FileInStream\",                           /* tp_name */\n    sizeof(FileInStreamObj),                          /* tp_basicsize */\n    0,                                                /* tp_itemsize */\n    0,                                                /* tp_dealloc */\n    0,                                                /* tp_print */\n    0,                                                /* tp_getattr */\n    0,                                                /* tp_setattr */\n    0,                                                /* tp_compare */\n    0,                                                /* tp_repr */\n    0,                                                /* tp_as_number */\n    0,                                                /* tp_as_sequence */\n    0,                                                /* tp_as_mapping */\n    0,                                                /* tp_hash */\n    0,                                                /* tp_call */\n    0,                                                /* tp_str */\n    0,                                                /* tp_getattro */\n    0,                                                /* tp_setattro */\n    0,                                                /* tp_as_buffer */\n    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,         /* tp_flags */\n    \"A class to read a file as a stream\",             /* tp_doc */\n    0,                                                /* tp_traverse */\n    0,                                                /* tp_clear */\n    0,                                                /* tp_richcompare */\n    0,                                                /* tp_weaklistoffset */\n    0,                                                /* tp_iter */\n    0,                                                /* tp_iternext */\n    FileInStream_methods,                             /* tp_methods */\n    0,                                                /* tp_members */\n    0,                                                /* tp_getset */\n    0,                                                /* tp_base */\n    0,                                                /* tp_dict */\n    0,                                                /* tp_descr_get */\n    0,                                                /* tp_descr_set */\n    0,                                                /* tp_dictoffset */\n    (initproc)FileInStream_init,                      /* tp_init */\n    0,                                                /* tp_alloc */\n    0,                                                /* tp_new */\n};\n\n\nstatic int\nFileOutStream_init(FileOutStreamObj *self, PyObject *args, PyObject *kwds) {\n  const char *filename;\n  PyThreadState *state;\n  self->stream = std::make_shared<HadoopUtils::FileOutStream>();\n  if (PyArg_ParseTuple(args, \"es\", \"utf-8\", &filename)) {\n    state = PyEval_SaveThread();\n    if (!self->stream->open(std::string(filename), true)) {\n      PyEval_RestoreThread(state);\n      PyErr_SetFromErrno(PyExc_IOError);\n      PyMem_Free((void*)filename);\n      return -1;\n    }\n    PyEval_RestoreThread(state);\n    PyMem_Free((void*)filename);\n  } else {\n    PyErr_Clear();\n    PyObject *inarg;\n    if (!PyArg_ParseTuple(args, \"O\", &inarg)) {\n      return -1;\n    }\n    if (!(self->fp = _PyFile_AsFile(inarg, \"wb\"))) {\n      return -1;\n    }\n    self->stream->open(self->fp);  // this variant just stores a reference\n  }\n  self->closed = false;\n  return 0;\n}\n\n\nstatic PyObject *\nFileOutStream_close(FileOutStreamObj *self) {\n  PyThreadState *state;\n  if (self->closed) {\n    Py_RETURN_NONE;\n  }\n  state = PyEval_SaveThread();\n  if (self->fp) {\n    fclose(self->fp);\n  }\n  bool res = self->stream->close();\n  if (!res) {\n    PyEval_RestoreThread(state);\n    return PyErr_SetFromErrno(PyExc_IOError);\n  }\n  PyEval_RestoreThread(state);\n  self->closed = true;\n  Py_RETURN_NONE;\n}\n\n\nstatic PyObject *\nFileOutStream_enter(FileOutStreamObj *self) {\n  _ASSERT_STREAM_OPEN;\n  Py_INCREF(self);\n  return (PyObject*)self;\n}\n\n\nstatic PyObject *\nFileOutStream_exit(FileOutStreamObj *self, PyObject *args) {\n  return FileOutStream_close(self);\n}\n\n\nstatic PyObject *\nFileOutStream_write(FileOutStreamObj *self, PyObject *args) {\n  PyObject* data = NULL;\n  Py_buffer buffer = {NULL, NULL};\n  PyThreadState *state;\n  _ASSERT_STREAM_OPEN;\n  if (!PyArg_ParseTuple(args, \"O\", &data)) {\n    return NULL;\n  }\n  if (PyObject_GetBuffer(data, &buffer, PyBUF_SIMPLE) < 0) {\n    PyErr_SetString(PyExc_TypeError, \"data not accessible as a buffer\");\n    return NULL;\n  }\n  state = PyEval_SaveThread();\n  try {\n    self->stream->write(buffer.buf, buffer.len);\n  } catch (HadoopUtils::Error e) {\n    PyEval_RestoreThread(state);\n    PyErr_SetString(PyExc_IOError, e.getMessage().c_str());\n    return NULL;\n  }\n  PyEval_RestoreThread(state);\n  Py_RETURN_NONE;\n}\n\n\nstatic PyObject *\nFileOutStream_writeVInt(FileOutStreamObj *self, PyObject *args) {\n  int val = 0;\n  PyThreadState *state;\n  _ASSERT_STREAM_OPEN;\n  if (!PyArg_ParseTuple(args, \"i\", &val)) {\n    return NULL;\n  }\n  state = PyEval_SaveThread();\n  try {\n    HadoopUtils::serializeInt(val, *self->stream);\n  } catch (HadoopUtils::Error e) {\n    PyEval_RestoreThread(state);\n    PyErr_SetString(PyExc_IOError, e.getMessage().c_str());\n    return NULL;\n  }\n  PyEval_RestoreThread(state);\n  Py_RETURN_NONE;\n}\n\n\nstatic PyObject *\nFileOutStream_writeVLong(FileOutStreamObj *self, PyObject *args) {\n  long long val;\n  PyThreadState *state;\n  _ASSERT_STREAM_OPEN;\n  if (!PyArg_ParseTuple(args, \"L\", &val)) {\n    return NULL;\n  }\n  state = PyEval_SaveThread();\n  try {\n    HadoopUtils::serializeLong(val, *self->stream);\n  } catch (HadoopUtils::Error e) {\n    PyEval_RestoreThread(state);\n    PyErr_SetString(PyExc_IOError, e.getMessage().c_str());\n    return NULL;\n  }\n  PyEval_RestoreThread(state);\n  Py_RETURN_NONE;\n}\n\n\nstatic PyObject *\nFileOutStream_writeFloat(FileOutStreamObj *self, PyObject *args) {\n  float val;\n  PyThreadState *state;\n  _ASSERT_STREAM_OPEN;\n  if (!PyArg_ParseTuple(args, \"f\", &val)) {\n    return NULL;\n  }\n  state = PyEval_SaveThread();\n  try {\n    HadoopUtils::serializeFloat(val, *self->stream);\n  } catch (HadoopUtils::Error e) {\n    PyEval_RestoreThread(state);\n    PyErr_SetString(PyExc_IOError, e.getMessage().c_str());\n    return NULL;\n  }\n  PyEval_RestoreThread(state);\n  Py_RETURN_NONE;\n}\n\n\nstatic PyObject*\n_FileOutStream_write_cppstring(FileOutStreamObj *self, std::string s) {\n  PyThreadState *state = PyEval_SaveThread();\n  try {\n    HadoopUtils::serializeString(s, *self->stream);\n  } catch (HadoopUtils::Error e) {\n    PyEval_RestoreThread(state);\n    PyErr_SetString(PyExc_IOError, e.getMessage().c_str());\n    return NULL;\n  }\n  PyEval_RestoreThread(state);\n  Py_RETURN_NONE;\n}\n\n\nstatic PyObject *\nFileOutStream_writeString(FileOutStreamObj *self, PyObject *args) {\n  _ASSERT_STREAM_OPEN;\n#if PY_MAJOR_VERSION < 3\n  // default encoding is ASCII, so \"s#\" would not work here\n  PyObject *pystring, *pybytes;\n  if (!PyArg_ParseTuple(args, \"O\", &pystring)) {\n    return NULL;\n  }\n  std::string s;\n  if (PyBytes_Check(pystring)) {\n    s = std::string(PyBytes_AS_STRING(pystring), PyBytes_GET_SIZE(pystring));\n  } else {\n    if (!(pybytes = PyUnicode_AsUTF8String(pystring))) {\n      return NULL;\n    }\n    s = std::string(PyBytes_AS_STRING(pybytes), PyBytes_GET_SIZE(pybytes));\n    Py_DECREF(pybytes);\n  }\n  return _FileOutStream_write_cppstring(self, s);\n#else\n  const char* buf;\n  Py_ssize_t len;\n  if (!PyArg_ParseTuple(args, \"s#\", &buf,  &len)) {\n    return NULL;\n  }\n  return _FileOutStream_write_cppstring(self, std::string(buf, len));\n#endif\n}\n\n\nstatic PyObject *\nFileOutStream_writeBytes(FileOutStreamObj *self, PyObject *args) {\n  _ASSERT_STREAM_OPEN;\n#if PY_MAJOR_VERSION < 3\n  // \"y#\" not available\n  PyObject *pyval, *rval;\n  Py_buffer buffer = {NULL, NULL};\n  if (!PyArg_ParseTuple(args, \"O\", &pyval)) {\n    return NULL;\n  }\n  if (PyObject_GetBuffer(pyval, &buffer, PyBUF_SIMPLE) < 0) {\n    return NULL;\n  }\n  std::string s((const char*)buffer.buf, buffer.len);\n  rval = _FileOutStream_write_cppstring(self, s);\n  PyBuffer_Release(&buffer);\n  return rval;\n#else\n  const char* buf;\n  Py_ssize_t len;\n  if (!PyArg_ParseTuple(args, \"y#\", &buf,  &len)) {\n    return NULL;\n  }\n  return _FileOutStream_write_cppstring(self, std::string(buf, len));\n#endif\n}\n\n\nstatic PyObject *\nFileOutStream_writeTuple(FileOutStreamObj *self, PyObject *args) {\n  PyObject *inarg, *iterator, *item;\n  char *fmt;\n  _ASSERT_STREAM_OPEN;\n  if (!PyArg_ParseTuple(args, \"sO\", &fmt, &inarg)) {\n    return NULL;\n  }\n  if (!(iterator = PyObject_GetIter(inarg))) {\n    return NULL;\n  }\n  for (std::size_t i = 0; i < strlen(fmt); ++i) {\n    if (!(item = PyIter_Next(iterator))) {\n      if (!PyErr_Occurred()) {\n\tPyErr_SetString(PyExc_ValueError, \"not enough items\");\n      }\n      Py_DECREF(iterator);\n      return NULL;\n    }\n    switch(fmt[i]) {\n    case 'i':\n      if (!FileOutStream_writeVInt(self, PyTuple_Pack(1, item))) goto error;\n      break;\n    case 'l':\n      if (!FileOutStream_writeVLong(self, PyTuple_Pack(1, item))) goto error;\n      break;\n    case 'f':\n      if (!FileOutStream_writeFloat(self, PyTuple_Pack(1, item))) goto error;\n      break;\n    case 's':\n      if (!FileOutStream_writeString(self, PyTuple_Pack(1, item))) goto error;\n      break;\n    case 'b':\n      if (!FileOutStream_writeBytes(self, PyTuple_Pack(1, item))) goto error;\n      break;\n    default:\n      PyErr_Format(PyExc_ValueError, \"Unknown format '%c'\", fmt[i]);\n      goto error;\n    }\n    Py_DECREF(item);\n  }\n  Py_DECREF(iterator);\n  Py_RETURN_NONE;\n\nerror:\n  Py_DECREF(item);\n  Py_DECREF(iterator);\n  return NULL;\n}\n\n\n// Same as write_tuple(\"ibb\", (OUTPUT, k, v)) or, when part is specified,\n// write_tuple(\"iibb\", (PARTITIONED_OUTPUT, part, k, v)), but more efficient.\n// Optimizing other commands in this way is probably worthless.\nstatic PyObject *\nFileOutStream_writeOutput(FileOutStreamObj *self, PyObject *args) {\n  int part = -1;\n  PyThreadState *state;\n  _ASSERT_STREAM_OPEN;\n#if PY_MAJOR_VERSION < 3\n  // \"y#\" not available\n  PyObject *pykey, *pyval;\n  Py_buffer kbuf = {NULL, NULL};\n  Py_buffer vbuf = {NULL, NULL};\n  if (!PyArg_ParseTuple(args, \"OO|i\", &pykey, &pyval, &part)) {\n    return NULL;\n  }\n  if (PyObject_GetBuffer(pykey, &kbuf, PyBUF_SIMPLE) < 0) {\n    return NULL;\n  }\n  if (PyObject_GetBuffer(pyval, &vbuf, PyBUF_SIMPLE) < 0) {\n    PyBuffer_Release(&kbuf);\n    return NULL;\n  }\n  std::string ks((const char*)kbuf.buf, kbuf.len);\n  std::string vs((const char*)vbuf.buf, vbuf.len);\n#else\n  const char *key, *val;\n  Py_ssize_t klen, vlen;\n  if (!PyArg_ParseTuple(args, \"y#y#|i\", &key, &klen, &val, &vlen, &part)) {\n    return NULL;\n  }\n  std::string ks(key, klen);\n  std::string vs(val, vlen);\n#endif\n  state = PyEval_SaveThread();\n  try {\n    if (part >= 0) {\n      HadoopUtils::serializeInt(PARTITIONED_OUTPUT, *self->stream);\n      HadoopUtils::serializeInt(part, *self->stream);\n    } else {\n      HadoopUtils::serializeInt(OUTPUT, *self->stream);\n    }\n    HadoopUtils::serializeString(ks, *self->stream);\n    HadoopUtils::serializeString(vs, *self->stream);\n  } catch (HadoopUtils::Error e) {\n    PyEval_RestoreThread(state);\n    PyErr_SetString(PyExc_IOError, e.getMessage().c_str());\n#if PY_MAJOR_VERSION < 3\n    PyBuffer_Release(&kbuf);\n    PyBuffer_Release(&vbuf);\n#endif\n    return NULL;\n  }\n  PyEval_RestoreThread(state);\n#if PY_MAJOR_VERSION < 3\n  PyBuffer_Release(&kbuf);\n  PyBuffer_Release(&vbuf);\n#endif\n  Py_RETURN_NONE;\n}\n\n\nstatic PyObject *\nFileOutStream_advance(FileOutStreamObj *self, PyObject *args) {\n  size_t len;\n  PyThreadState *state;\n  _ASSERT_STREAM_OPEN;\n  if (!PyArg_ParseTuple(args, \"n\", &len)) {\n    return NULL;\n  }\n  state = PyEval_SaveThread();\n  bool res = self->stream->advance(len);\n  if (!res) {\n    PyEval_RestoreThread(state);\n    return PyErr_SetFromErrno(PyExc_IOError);\n  }\n  PyEval_RestoreThread(state);\n  Py_RETURN_NONE;\n}\n\n\nstatic PyObject *\nFileOutStream_flush(FileOutStreamObj *self) {\n  PyThreadState *state;\n  _ASSERT_STREAM_OPEN;\n  state = PyEval_SaveThread();\n  self->stream->flush();\n  PyEval_RestoreThread(state);\n  Py_RETURN_NONE;\n}\n\n\nstatic PyMethodDef FileOutStream_methods[] = {\n  {\"close\", (PyCFunction)FileOutStream_close, METH_NOARGS,\n   \"close(): close the currently open file\"},\n  {\"write\", (PyCFunction)FileOutStream_write, METH_VARARGS,\n   \"write(data): write data to the stream\"},\n  {\"write_vint\", (PyCFunction)FileOutStream_writeVInt, METH_VARARGS,\n   \"write_vint(n): write a variable length integer to the stream\"},\n  {\"write_vlong\", (PyCFunction)FileOutStream_writeVLong, METH_VARARGS,\n   \"write_vlong(n): write a variable length long integer to the stream\"},\n  {\"write_float\", (PyCFunction)FileOutStream_writeFloat, METH_VARARGS,\n   \"write_float(n): write a float to the stream\"},\n  {\"write_string\", (PyCFunction)FileOutStream_writeString, METH_VARARGS,\n   \"write_string(n): write a string to the stream\"},\n  {\"write_bytes\", (PyCFunction)FileOutStream_writeBytes, METH_VARARGS,\n   \"write_bytes(n): write a bytes object to the stream\"},\n  {\"write_tuple\", (PyCFunction)FileOutStream_writeTuple, METH_VARARGS,\n   \"write_tuple(fmt, t): write values from iterable t according to fmt\"},\n  {\"write_output\", (PyCFunction)FileOutStream_writeOutput, METH_VARARGS,\n   \"write_output(key, value[, part]): write pipes [partitioned] output\"},\n  {\"advance\", (PyCFunction)FileOutStream_advance, METH_VARARGS,\n   \"advance(len): advance len bytes\"},\n  {\"flush\", (PyCFunction)FileOutStream_flush, METH_NOARGS,\n   \"flush(): flush the stream\"},\n  {\"__enter__\", (PyCFunction)FileOutStream_enter, METH_NOARGS},\n  {\"__exit__\", (PyCFunction)FileOutStream_exit, METH_VARARGS},  \n  {NULL}  /* Sentinel */\n};\n\n\nPyTypeObject FileOutStreamType = {\n    PyVarObject_HEAD_INIT(NULL, 0)\n    \"sercore.FileOutStream\",                          /* tp_name */\n    sizeof(FileOutStreamObj),                         /* tp_basicsize */\n    0,                                                /* tp_itemsize */\n    0,                                                /* tp_dealloc */\n    0,                                                /* tp_print */\n    0,                                                /* tp_getattr */\n    0,                                                /* tp_setattr */\n    0,                                                /* tp_compare */\n    0,                                                /* tp_repr */\n    0,                                                /* tp_as_number */\n    0,                                                /* tp_as_sequence */\n    0,                                                /* tp_as_mapping */\n    0,                                                /* tp_hash */\n    0,                                                /* tp_call */\n    0,                                                /* tp_str */\n    0,                                                /* tp_getattro */\n    0,                                                /* tp_setattro */\n    0,                                                /* tp_as_buffer */\n    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,         /* tp_flags */\n    \"A class to write a stream to a file\",            /* tp_doc */\n    0,                                                /* tp_traverse */\n    0,                                                /* tp_clear */\n    0,                                                /* tp_richcompare */\n    0,                                                /* tp_weaklistoffset */\n    0,                                                /* tp_iter */\n    0,                                                /* tp_iternext */\n    FileOutStream_methods,                            /* tp_methods */\n    0,                                                /* tp_members */\n    0,                                                /* tp_getset */\n    0,                                                /* tp_base */\n    0,                                                /* tp_dict */\n    0,                                                /* tp_descr_get */\n    0,                                                /* tp_descr_set */\n    0,                                                /* tp_dictoffset */\n    (initproc)FileOutStream_init,                     /* tp_init */\n    0,                                                /* tp_alloc */\n    0,                                                /* tp_new */\n};\n"
  },
  {
    "path": "src/sercore/streams.h",
    "content": "// BEGIN_COPYRIGHT\n//\n// Copyright 2009-2026 CRS4.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n// use this file except in compliance with the License. You may obtain a copy\n// of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n// License for the specific language governing permissions and limitations\n// under the License.\n//\n// END_COPYRIGHT\n\n#pragma once\n\n#include <Python.h>\n#include <cstdio>\n#include <memory>\n#include <string>\n#include \"HadoopUtils/SerialUtils.hh\"\n\ntypedef struct {\n    PyObject_HEAD\n    FILE *fp;\n    bool closed;\n    std::shared_ptr<HadoopUtils::FileInStream> stream;\n} FileInStreamObj;\n\ntypedef struct {\n    PyObject_HEAD\n    FILE *fp;\n    bool closed;\n    std::shared_ptr<HadoopUtils::FileOutStream> stream;\n} FileOutStreamObj;\n\nextern PyTypeObject FileInStreamType;\nextern PyTypeObject FileOutStreamType;\n"
  },
  {
    "path": "test/__init__.py",
    "content": ""
  },
  {
    "path": "test/all_tests.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport unittest\nimport os\nimport importlib\n\n\n_TEST_DIRS = (\n    \"app\",\n    \"common\",\n    \"mapreduce\",\n    \"hdfs\",  # run these last, in case HDFS needs time to be fully up\n)\n\n\ndef suite():\n    suites = []\n    for dir_ in _TEST_DIRS:\n        module = importlib.import_module(\"%s.%s\" % (dir_, \"all_tests\"))\n        sys.path.insert(0, dir_)\n        path = [os.path.abspath(\"./%s\" % dir_)]\n        suites.append(getattr(module, \"suite\")(path))\n        sys.path.pop(0)\n    return unittest.TestSuite(tuple(suites))\n\n\nif __name__ == '__main__':\n    import sys\n    _RESULT = unittest.TextTestRunner(verbosity=2).run(suite())\n    sys.exit(not _RESULT.wasSuccessful())\n"
  },
  {
    "path": "test/app/__init__.py",
    "content": ""
  },
  {
    "path": "test/app/all_tests.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport unittest\nfrom pydoop.test_utils import get_module\n\n\nTEST_MODULE_NAMES = [\n    'test_submit',\n]\n\n\ndef suite(path=None):\n    suites = []\n    for module in TEST_MODULE_NAMES:\n        suites.append(get_module(module, path).suite())\n    return unittest.TestSuite(suites)\n\n\nif __name__ == '__main__':\n    import sys\n    _RESULT = unittest.TextTestRunner(verbosity=2).run(suite())\n    sys.exit(not _RESULT.wasSuccessful())\n"
  },
  {
    "path": "test/app/test_submit.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport unittest\nimport shutil\nimport tempfile\nimport os\nimport re\nimport sys\nfrom io import StringIO, BytesIO\n\nimport pydoop.app.main as app\nfrom pydoop.app.submit import PydoopSubmitter\n\n\ndef nop(x=None):\n    pass\n\n\nclass Args(object):\n    def __init__(self, **kwargs):\n        for k, v in kwargs.items():\n            setattr(self, k, v)\n\n    def __getattr__(self, _):\n        \"\"\"\n        If we don't have the requested attribute return None.\n        \"\"\"\n        return None\n\n\nclass TestAppSubmit(unittest.TestCase):\n\n    def setUp(self):\n        self.submitter = PydoopSubmitter()\n\n    @staticmethod\n    def _gen_default_args():\n        return Args(\n            entry_point='__main__',\n            log_level='INFO',\n            module='the_module',\n            no_override_env=False,\n            no_override_home=False,\n            python_program='python',\n            input=\"input_path\",\n            output=\"output_path\",\n            job_name=\"job_name\",\n            num_reducers=0,\n        )\n\n    def test_help(self):\n        parser = app.make_parser()\n        # silence!\n        for k in ['submit', 'script']:\n            parser._actions[2].choices[k].format_help = nop\n            parser._actions[2].choices[k].format_usage = nop\n            parser._actions[2].choices[k].error = nop\n        parser.format_help = nop\n        parser.format_usage = nop\n        parser.error = nop\n        try:\n            args, unk = parser.parse_known_args(['-h'])\n        except SystemExit as e:\n            self.assertEqual(e.args[0], 0)\n        try:\n            args, unk = parser.parse_known_args(['submit', '-h'])\n        except SystemExit as e:\n            self.assertEqual(e.args[0], 0)\n        try:\n            args, unk = parser.parse_known_args(['submit'])\n        except SystemExit as e:\n            self.assertEqual(e.args[0], 2)\n\n    def _check_args(self, args, args_kv):\n        for k, v in args_kv:\n            k = re.sub(\"^--\", \"\", k).replace('-', '_')\n            self.assertTrue(hasattr(args, k))\n            v1 = getattr(args, k)\n            if v is None:\n                self.assertEqual(v1, True)\n            elif type(v1) is list:\n                pass\n            else:\n                self.assertEqual(v1, v)\n\n    def test_conf_file(self):\n        wd = tempfile.mkdtemp(prefix='pydoop_')\n        conf_file = os.path.join(wd, 'pydoop.conf')\n        args_kv = ((\"--pretend\", None),\n                   (\"--input-format\", 'mapreduce.lib.input.TextInputFormat'),\n                   (\"--output-format\", 'mapreduce.lib.input.TextOutputFormat'),\n                   (\"--num-reducers\", 10),\n                   )\n        try:\n            with open(conf_file, 'w') as cf:\n                d = ''.join(['{}\\n{}\\n'.format(k, v)\n                             if v is not None else '{}\\n'.format(k)\n                             for (k, v) in args_kv])\n                cf.write(d)\n            parser = app.make_parser()\n            parser.format_help = nop\n            module = 'mymod1.mod2.mod3'\n            ainput = 'input'\n            aoutput = 'output'\n            argv = ['submit', module, ainput, aoutput, '@' + conf_file]\n            [args, unknown] = parser.parse_known_args(argv)\n            self.assertEqual(args.module, module)\n            self.assertEqual(args.input, ainput)\n            self.assertEqual(args.output, aoutput)\n            self.assertEqual(len(unknown), 0)\n            self._check_args(args, args_kv)\n        finally:\n            shutil.rmtree(wd)\n\n    def test_empty_param(self):\n        parser = app.make_parser()\n        parser.format_help = nop\n        program = 'program'\n        ainput = 'input'\n        aoutput = 'output'\n        argv = ['submit', '--module', '', program, ainput, aoutput]\n        [args, unknown] = parser.parse_known_args(argv)\n        self.assertEqual(args.module, '')\n\n    def test_generate_pipes_code_env(self):\n        args = self._gen_default_args()\n        self.submitter.set_args(args)\n        old_ld_lib_path = os.environ.get('LD_LIBRARY_PATH', '')\n\n        try:\n            # we set this variable for this test since it may not be set in\n            # the environment\n            os.environ['LD_LIBRARY_PATH'] = '/test_path'\n            code = self.submitter._generate_pipes_code()\n            self.assertTrue('export PATH=' in code)\n            self.assertTrue('export PYTHONPATH=' in code)\n            self.assertTrue('export LD_LIBRARY_PATH=\"/test_path\"' in code)\n        finally:\n            os.environ['LD_LIBRARY_PATH'] = old_ld_lib_path\n\n    def test_generate_pipes_code_no_override_ld_path(self):\n        args = self._gen_default_args()\n        args.no_override_ld_path = True\n        self.submitter.set_args(args)\n        old_ld_lib_path = os.environ.get('LD_LIBRARY_PATH', '')\n\n        try:\n            os.environ['LD_LIBRARY_PATH'] = '/test_path'\n            code = self.submitter._generate_pipes_code()\n            self.assertTrue('export PYTHONPATH=' in code)\n            self.assertFalse('export LD_LIBRARY_PATH=' in code)\n        finally:\n            os.environ['LD_LIBRARY_PATH'] = old_ld_lib_path\n\n    def test_generate_pipes_code_no_override_path(self):\n        args = self._gen_default_args()\n        args.no_override_path = True\n        self.submitter.set_args(args)\n\n        code = self.submitter._generate_pipes_code()\n        self.assertTrue('export PYTHONPATH=' in code)\n        self.assertFalse('export PATH=' in code)\n\n    def test_generate_pipes_code_no_override_pythonpath(self):\n        args = self._gen_default_args()\n        args.no_override_pypath = True\n        self.submitter.set_args(args)\n\n        code = self.submitter._generate_pipes_code()\n        self.assertTrue('export PYTHONPATH=\"${PWD}:${PYTHONPATH}\"' in code)\n        self.assertTrue('export PATH=' in code)\n\n    def test_generate_pipes_code_with_set_env(self):\n        args = self._gen_default_args()\n        args.set_env = [\"PATH=/my/custom/path\"]\n        self.submitter.set_args(args)\n        old_ld_lib_path = os.environ.get('LD_LIBRARY_PATH', '')\n\n        try:\n            os.environ['LD_LIBRARY_PATH'] = '/test_path'\n            code = self.submitter._generate_pipes_code()\n            self.assertTrue('export PATH=\"/my/custom/path\"' in code)\n            self.assertTrue('export PYTHONPATH=' in code)\n            self.assertTrue('export LD_LIBRARY_PATH=\"/test_path\"' in code)\n        finally:\n            os.environ['LD_LIBRARY_PATH'] = old_ld_lib_path\n\n    def test_generate_code_no_env_override(self):\n        args = self._gen_default_args()\n        args.no_override_env = True\n        self.submitter.set_args(args)\n\n        code = self.submitter._generate_pipes_code()\n        self.assertFalse('export PATH=' in code)\n        self.assertFalse('export LD_LIBRARY_PATH=\"/test_path\"' in code)\n        # PYTHONPATH should still be there because we add the hadoop\n        # working directory\n        self.assertTrue('export PYTHONPATH=' in code)\n\n    def test_generate_code_no_env_override_with_set_env(self):\n        args = self._gen_default_args()\n        args.no_override_env = True\n        args.set_env = [\"PATH=/my/custom/path\"]\n        self.submitter.set_args(args)\n\n        code = self.submitter._generate_pipes_code()\n\n        self.assertTrue('export PATH=\"/my/custom/path\"' in code)\n        self.assertFalse('export LD_LIBRARY_PATH=\"/test_path\"' in code)\n        # PYTHONPATH should still be there because we add the hadoop\n        # working directory\n        self.assertTrue('export PYTHONPATH=' in code)\n\n    def test_env_arg_to_dict(self):\n        env_arg = ['var1=value1', ' var2 = value2 ', 'var3 = str with = sign']\n        d = self.submitter._env_arg_to_dict(env_arg)\n        self.assertEquals('value1', d['var1'])\n        self.assertEquals('value2', d['var2'])\n        self.assertEquals('str with = sign', d['var3'])\n\n    def test_bad_upload_files(self):\n        args = self._gen_default_args()\n        args.python_zip = [\"\"]\n        self.assertRaises(Exception, self.submitter.set_args, args)\n\n    def test_pretend(self):\n        args = self._gen_default_args()\n        args.pretend = True\n        args.log_level = \"CRITICAL\"\n        stdout = sys.stdout\n        sys.stdout = StringIO() if sys.version_info >= (3,) else BytesIO()\n        self.submitter.set_args(args)\n        self.submitter.run()\n        captured = sys.stdout.getvalue()\n        sys.stdout = stdout\n        self.assertGreaterEqual(len(captured), 0)\n\n\ndef suite():\n    suite_ = unittest.TestLoader().loadTestsFromTestCase(TestAppSubmit)\n    return suite_\n\n\nif __name__ == '__main__':\n    _RUNNER = unittest.TextTestRunner(verbosity=2)\n    _RUNNER.run((suite()))\n"
  },
  {
    "path": "test/avro/all_tests.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport unittest\nfrom pydoop.test_utils import get_module\n\n\nTEST_MODULE_NAMES = [\n    'test_io',\n]\n\n\ndef suite(path=None):\n    suites = []\n    for module in TEST_MODULE_NAMES:\n        suites.append(get_module(module, path).suite())\n    return unittest.TestSuite(suites)\n\n\nif __name__ == '__main__':\n    import sys\n    _RESULT = unittest.TextTestRunner(verbosity=2).run(suite())\n    sys.exit(not _RESULT.wasSuccessful())\n"
  },
  {
    "path": "test/avro/common.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nfrom pydoop.utils.py3compat import StringIO\n\nfrom avro.io import DatumWriter, BinaryEncoder\n\n\nclass AvroSerializer(object):\n\n    def __init__(self, schema):\n        self.schema = schema\n        self.datum_writer = DatumWriter(schema)\n\n    def serialize(self, record):\n        f = StringIO()\n        encoder = BinaryEncoder(f)\n        self.datum_writer.write(record, encoder)\n        return f.getvalue()\n\n\ndef avro_user_record(i):\n    return {\n        \"office\": 'office-%s' % i,\n        \"favorite_number\": i,\n        \"favorite_color\": 'color-%s' % i,\n        \"name\": 'name-%s' % i,\n    }\n"
  },
  {
    "path": "test/avro/test_io.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport os\nimport unittest\nimport itertools as it\n\nimport avro.datafile as avdf\nfrom avro.io import DatumReader, DatumWriter\n\nfrom pydoop.mapreduce.api import FileSplit\nfrom pydoop.avrolib import (\n    SeekableDataFileReader, AvroReader, AvroWriter, parse\n)\nfrom pydoop.test_utils import WDTestCase\nfrom pydoop.utils.py3compat import czip, cmap\nimport pydoop.hdfs as hdfs\n\nfrom common import avro_user_record\n\n\nTHIS_DIR = os.path.dirname(os.path.abspath(__file__))\n\n\nclass TestAvroIO(WDTestCase):\n\n    def setUp(self):\n        super(TestAvroIO, self).setUp()\n        with open(os.path.join(THIS_DIR, \"user.avsc\")) as f:\n            self.schema = parse(f.read())\n\n    def write_avro_file(self, rec_creator, n_samples, sync_interval):\n        avdf.SYNC_INTERVAL = sync_interval\n        self.assertEqual(avdf.SYNC_INTERVAL, sync_interval)\n        fo = self._mkf('data.avro', mode='wb')\n        with avdf.DataFileWriter(fo, DatumWriter(), self.schema) as writer:\n            for i in range(n_samples):\n                writer.append(rec_creator(i))\n        return fo.name\n\n    def test_seekable(self):\n        fn = self.write_avro_file(avro_user_record, 500, 1024)\n        with open(fn, 'rb') as f:\n            sreader = SeekableDataFileReader(f, DatumReader())\n            res = [t for t in czip(cmap(\n                lambda _: f.tell(), it.repeat(1)\n            ), sreader)]\n            sreader.align_after(res[-1][0])\n            with self.assertRaises(StopIteration):\n                r = next(sreader)\n            sreader.align_after(0)\n            r = next(sreader)\n            self.assertEqual(r, res[0][1])\n\n            def offset_iterator():\n                s = -1\n                for o, r in res:\n                    sreader.align_after(o)\n                    t = f.tell()\n                    if t == s:\n                        continue\n                    s = t\n                    try:\n                        x = next(sreader)\n                    except StopIteration:\n                        return\n                    yield (t, x)\n\n            i = 0\n            for xo, x in offset_iterator():\n                sreader.align_after(xo)\n                for o, r in res[i:]:\n                    if o >= xo:\n                        self.assertEqual(x, r)\n                        break\n                    i += 1\n\n    def test_avro_reader(self):\n\n        N = 500\n        fn = self.write_avro_file(avro_user_record, N, 1024)\n        url = hdfs.path.abspath(fn, local=True)\n\n        class FunkyCtx(object):\n            def __init__(self, isplit):\n                self.input_split = isplit\n\n        def get_areader(offset, length):\n            isplit = FileSplit(url, offset, length)\n            ctx = FunkyCtx(isplit)\n            return AvroReader(ctx)\n\n        areader = get_areader(0, 14)\n        file_length = areader.reader.file_length\n        with self.assertRaises(StopIteration):\n            next(areader)\n        areader = get_areader(0, file_length)\n        with SeekableDataFileReader(open(fn, 'rb'), DatumReader()) as sreader:\n            for (o, a), s in czip(areader, sreader):\n                self.assertEqual(a, s)\n        mid_len = int(file_length / 2)\n        lows = [x for x in get_areader(0, mid_len)]\n        highs = [x for x in get_areader(mid_len, file_length)]\n        self.assertEqual(N, len(lows) + len(highs))\n\n    def test_avro_writer(self):\n\n        class FunkyCtx(object):\n\n            def __init__(self_, job_conf):\n                self_.job_conf = job_conf\n\n        class AWriter(AvroWriter):\n\n            schema = self.schema\n\n            def emit(self_, key, value):\n                self_.writer.append(key)\n\n        ctx = FunkyCtx({\n            'mapreduce.task.partition': 1,\n            'mapreduce.task.output.dir': hdfs.path.abspath(self.wd, local=True)\n        })\n        awriter = AWriter(ctx)\n        N = 10\n        for i in range(N):\n            awriter.emit(avro_user_record(i), '')\n        awriter.close()\n\n\ndef suite():\n    suite_ = unittest.TestSuite()\n    suite_.addTest(TestAvroIO('test_seekable'))\n    suite_.addTest(TestAvroIO('test_avro_reader'))\n    suite_.addTest(TestAvroIO('test_avro_writer'))\n    return suite_\n\n\nif __name__ == '__main__':\n    _RUNNER = unittest.TextTestRunner(verbosity=2)\n    _RUNNER.run((suite()))\n"
  },
  {
    "path": "test/avro/user.avsc",
    "content": "{\n    \"namespace\": \"example.avro\",\n    \"type\": \"record\",\n    \"name\": \"User\",\n    \"fields\": [\n        {\"name\": \"office\", \"type\": \"string\"},\n        {\"name\": \"name\", \"type\": \"string\"},\n        {\"name\": \"favorite_number\",  \"type\": [\"int\", \"null\"]},\n        {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n    ]\n}\n"
  },
  {
    "path": "test/common/__init__.py",
    "content": ""
  },
  {
    "path": "test/common/all_tests.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport sys\nimport unittest\n\nfrom pydoop.test_utils import get_module\n\n\nTEST_MODULE_NAMES = [\n    'test_hadoop_utils',\n    'test_hadut',\n    'test_pydoop',\n]\n\n\ndef suite(path=None):\n    suites = []\n    for module in TEST_MODULE_NAMES:\n        suites.append(get_module(module, path).suite())\n    return unittest.TestSuite(suites)\n\n\ndef main():\n    result = unittest.TextTestRunner(verbosity=2).run(suite())\n    sys.exit(not result.wasSuccessful())\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "test/common/test_hadoop_utils.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport unittest\nimport tempfile\nimport os\nimport shutil\n\nfrom xml.dom.minidom import getDOMImplementation\n\nDOM_IMPL = getDOMImplementation()\n\nimport pydoop.hadoop_utils as hu\n\n\nclass TestHadoopUtils(unittest.TestCase):\n\n    def setUp(self):\n        self.hadoop_home = tempfile.mkdtemp(prefix=\"pydoop_test_\")\n        self.hadoop_conf = os.path.join(self.hadoop_home, \"conf\")\n        os.mkdir(self.hadoop_conf)\n        self.orig_env = os.environ.copy()\n        os.environ[\"HADOOP_CONF_DIR\"] = self.hadoop_conf\n        self.pf = hu.PathFinder()\n\n    def tearDown(self):\n        os.environ.clear()\n        os.environ.update(self.orig_env)\n        shutil.rmtree(self.hadoop_home)\n\n    def test_get_hadoop_params(self):\n        self.__check_params()\n        self.__check_params('', {})\n        self.__check_params('<?xml version=\"1.0\"?>', {})\n        doc = DOM_IMPL.createDocument(None, \"configuration\", None)\n        self.__check_params(doc.toxml(), {})\n        root = doc.documentElement\n        prop = root.appendChild(doc.createElement(\"property\"))\n        self.__check_params(doc.toxml(), {})\n        for s in \"name\", \"value\":\n            n = prop.appendChild(doc.createElement(s))\n            n.appendChild(doc.createTextNode(s.upper()))\n        self.__check_params(doc.toxml(), {\"NAME\": \"VALUE\"})\n\n    def __check_params(self, xml_content=None, expected=None):\n        if expected is None:\n            expected = {}\n        xml_fn = os.path.join(self.hadoop_conf, \"core-site.xml\")\n        if os.path.exists(xml_fn):\n            os.remove(xml_fn)\n        if xml_content is not None:\n            with open(xml_fn, \"w\") as fo:\n                fo.write(xml_content)\n        params = self.pf.hadoop_params()\n        self.assertEqual(params, expected)\n\n\ndef suite():\n    suite = unittest.TestSuite()\n    suite.addTest(TestHadoopUtils('test_get_hadoop_params'))\n    return suite\n\n\nif __name__ == '__main__':\n    runner = unittest.TextTestRunner(verbosity=2)\n    runner.run((suite()))\n"
  },
  {
    "path": "test/common/test_hadut.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n# pylint: disable=W0212\n\n\"\"\"\nTest suite for pydoop.hadut\n\"\"\"\n\nimport subprocess\nimport unittest\n\nimport pydoop.hadut as hadut\n\n\ndef pair_set(seq):\n    return set((seq[i], seq[i + 1]) for i in range(0, len(seq), 2))\n\n\nclass TestHadut(unittest.TestCase):\n\n    CHECKNATIVE_OUT, _ = subprocess.Popen(\n        [\"hadoop\", \"checknative\"], universal_newlines=True,\n        stdout=subprocess.PIPE, stderr=subprocess.PIPE\n    ).communicate()\n\n    def assertEqualPairSet(self, seq1, seq2):\n        return self.assertEqual(pair_set(seq1), pair_set(seq2))\n\n    def test_pop_generic_args(self):\n        self.assertRaises(ValueError, hadut._pop_generic_args, ['-fs'])\n        args = [\n            '-input', 'i',\n            '-libjars', 'l',\n            '-fs', 'f',\n            '-output', 'o',\n            '-jar', 'pippo'\n        ]\n        gargs = hadut._pop_generic_args(args)\n        self.assertEqualPairSet(gargs, ['-libjars', 'l', '-fs', 'f'])\n        self.assertEqualPairSet(\n            args, ['-input', 'i', '-output', 'o', '-jar', 'pippo']\n        )\n\n    def test_merge_csv_args(self):\n        self.assertRaises(ValueError, hadut._merge_csv_args, ['-archives'])\n        args = [\n            '-libjars', 'l1',\n            '-fs', 'f',\n            '-libjars', 'l2',\n            '-files', 'pippo',\n        ]\n        hadut._merge_csv_args(args)\n        try:\n            self.assertEqualPairSet(\n                args, ['-libjars', 'l1,l2', '-fs', 'f', '-files', 'pippo']\n            )\n        except AssertionError:\n            self.assertEqualPairSet(\n                args, ['-libjars', 'l2,l1', '-fs', 'f', '-files', 'pippo']\n            )\n\n    def test_cmd(self):\n        out = hadut.run_cmd(\"checknative\", keep_streams=True)\n        self.assertEqual(out, self.CHECKNATIVE_OUT)\n\n    def test_run_class(self):\n        out = hadut.run_class(\n            \"org.apache.hadoop.util.NativeLibraryChecker\", keep_streams=True\n        )\n        self.assertEqual(out, self.CHECKNATIVE_OUT)\n\n\ndef suite():\n    suite_ = unittest.TestSuite()\n    suite_.addTest(TestHadut('test_pop_generic_args'))\n    suite_.addTest(TestHadut('test_merge_csv_args'))\n    suite_.addTest(TestHadut('test_cmd'))\n    suite_.addTest(TestHadut('test_run_class'))\n    return suite_\n\n\nif __name__ == '__main__':\n    _RUNNER = unittest.TextTestRunner(verbosity=2)\n    _RUNNER.run((suite()))\n"
  },
  {
    "path": "test/common/test_pydoop.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nTest suite for top-level functions.\n\"\"\"\n\nimport unittest\nimport os\nimport tempfile\nimport shutil\nfrom imp import reload\n\nimport pydoop\n\n\nclass TestPydoop(unittest.TestCase):\n\n    def setUp(self):\n        self.wd = tempfile.mkdtemp(prefix='pydoop_test_')\n        self.old_vars = {\n            'HADOOP_HOME': os.getenv('HADOOP_HOME'),\n            'HADOOP_CONF_DIR': os.getenv('HADOOP_CONF_DIR'),\n        }\n\n    def tearDown(self):\n        for k, v in self.old_vars.items():\n            if v:\n                os.environ[k] = v\n            else:\n                os.environ.pop(k, None)\n        reload(pydoop)\n        shutil.rmtree(self.wd)\n\n    def test_home(self):\n        old_home = pydoop.hadoop_home()\n        if os.path.isdir(old_home):\n            new_home = os.path.join(self.wd, 'hadoop')\n            os.symlink(old_home, new_home)\n            os.environ['HADOOP_HOME'] = new_home\n            reload(pydoop)\n            self.assertEqual(pydoop.hadoop_home(), new_home)\n\n    def test_conf(self):\n        old_conf = pydoop.hadoop_conf()\n        new_conf = os.path.join(self.wd, \"conf\")\n        shutil.copytree(old_conf, new_conf)\n        os.environ['HADOOP_CONF_DIR'] = new_conf\n        reload(pydoop)\n        self.assertEqual(pydoop.hadoop_conf(), new_conf)\n\n    def test_pydoop_jar_path(self):\n        jar_path = pydoop.jar_path()\n        if jar_path is not None:\n            self.assertTrue(os.path.exists(jar_path))\n            directory, filename = os.path.split(jar_path)\n            self.assertEqual(filename, pydoop.jar_name())\n            self.assertEqual('pydoop', os.path.basename(directory))\n\n\ndef suite():\n    suite_ = unittest.TestSuite()\n    suite_.addTest(TestPydoop('test_home'))\n    suite_.addTest(TestPydoop('test_conf'))\n    suite_.addTest(TestPydoop('test_pydoop_jar_path'))\n    return suite_\n\n\nif __name__ == '__main__':\n    _RUNNER = unittest.TextTestRunner(verbosity=2)\n    _RUNNER.run((suite()))\n"
  },
  {
    "path": "test/common/test_test_support.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\\\nTest suite for pydoop.test_support\n\"\"\"\n\nimport unittest\nimport uuid\nimport os\n\nimport pydoop.test_support as pts\n\nTARGET_CODE = \"\"\"\\\n#!/foo/bar/python\n\nfrom __future__ import print_function\nfrom future import whatever\nimport foobar\n\nprint(\"Hello, world\")\n\"\"\"\n\n\nclass TestTestSupport(unittest.TestCase):\n\n    def test_inject_code(self):\n        lines = TARGET_CODE.splitlines()\n        new_code = uuid.uuid4().hex\n        ret = pts.inject_code(new_code, TARGET_CODE)\n        ret_lines = ret.splitlines()\n        self.assertEqual(ret_lines[:3], lines[:3])\n        self.assertTrue(new_code in ret_lines[3:-4])\n        self.assertEqual(ret_lines[-4:], lines[-4:])\n\n    def test_set_python_cmd(self):\n        cmd = \"/usr/bin/python3\"\n        ret = pts.set_python_cmd(TARGET_CODE, cmd)\n        self.assertEqual(ret.split(os.linesep, 1)[0], \"#!%s\" % cmd)\n\n\ndef suite():\n    suite_ = unittest.TestSuite()\n    suite_.addTest(TestTestSupport('test_inject_code'))\n    suite_.addTest(TestTestSupport('test_set_python_cmd'))\n    return suite_\n\n\nif __name__ == '__main__':\n    _RUNNER = unittest.TextTestRunner(verbosity=2)\n    _RUNNER.run((suite()))\n"
  },
  {
    "path": "test/hdfs/__init__.py",
    "content": ""
  },
  {
    "path": "test/hdfs/all_tests.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport unittest\nfrom pydoop.test_utils import get_module\n\n\nTEST_MODULE_NAMES = [\n    'test_core',\n    'test_local_fs',\n    'test_hdfs_fs',\n    'test_path',\n    'test_hdfs',\n]\n\n\ndef suite(path=None):\n    suites = []\n    for module in TEST_MODULE_NAMES:\n        suites.append(get_module(module, path).suite())\n    return unittest.TestSuite(suites)\n\n\nif __name__ == '__main__':\n    import sys\n    _RESULT = unittest.TextTestRunner(verbosity=2).run(suite())\n    sys.exit(not _RESULT.wasSuccessful())\n"
  },
  {
    "path": "test/hdfs/common_hdfs_tests.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport sys\nimport os\nimport unittest\nimport uuid\nimport shutil\nimport operator\nimport array\nfrom ctypes import create_string_buffer\n\nimport pydoop.hdfs as hdfs\nimport pydoop.test_utils as utils\nfrom pydoop.utils.py3compat import _is_py3\n\n\nclass TestCommon(unittest.TestCase):\n\n    def __init__(self, target, hdfs_host='', hdfs_port=0):\n        unittest.TestCase.__init__(self, target)\n        self.hdfs_host = hdfs_host\n        self.hdfs_port = hdfs_port\n\n    def setUp(self):\n        self.fs = hdfs.hdfs(self.hdfs_host, self.hdfs_port)\n        self.wd = utils.make_wd(self.fs)\n\n    def tearDown(self):\n        self.fs.delete(self.wd)\n        self.fs.close()\n\n    def _make_random_path(self, where=None, add_uni=True):\n        rval = \"%s/%s\" % (where or self.wd, uuid.uuid4().hex)\n        if add_uni:\n            rval = \"%s_%s\" % (rval, utils.UNI_CHR)\n        return rval\n\n    # also an implicit test for the create_directory method\n    def _make_random_dir(self, where=None, add_uni=True):\n        path = self._make_random_path(where=where, add_uni=add_uni)\n        self.fs.create_directory(path)\n        self.assertTrue(self.fs.exists(path))\n        return path\n\n    # also an implicit test for the write method\n    def _make_random_file(self, where=None, content=None, **kwargs):\n        kwargs[\"mode\"] = \"w\"\n        content = content or utils.make_random_data(printable=True)\n        path = self._make_random_path(where=where)\n        with self.fs.open_file(path, **kwargs) as fo:\n            i = 0\n            bytes_written = 0\n            bufsize = 24 * 1024 * 1024\n            while i < len(content):\n                bytes_written += fo.write(content[i: i + bufsize])\n                i += bufsize\n\n        self.assertEqual(bytes_written, len(content))\n        return path\n\n    def failUnlessRaisesExternal(self, excClass, callableObj, *args, **kwargs):\n        utils.silent_call(\n            self.failUnlessRaises, excClass, callableObj, *args, **kwargs\n        )\n\n    assertRaisesExternal = failUnlessRaisesExternal\n\n    def assertEqualPathInfo(self, info1, info2, tolerance=10):\n        \"\"\"\\\n        Check path info results for equality.\n\n        Since ``last_access`` values are timestamps in seconds, we\n        need to tolerate a small difference between them. In practice,\n        unless the expected value is stored way in advance, this\n        difference should be 0 or 1 second.\n        \"\"\"\n        self.assertEqual(info1.keys(), info2.keys())\n        for (k, v1) in info1.items():\n            v2 = info2[k]\n            if k == \"last_access\":\n                self.assertLessEqual(abs(v2 - v1), tolerance)\n            else:\n                self.assertEqual(v1, v2)\n\n    def open_close(self):\n        path = self._make_random_path()\n        self.fs.open_file(path, \"w\").close()\n        with self.fs.open_file(path, \"r\") as f:\n            self.assertFalse(f.closed)\n        self.assertTrue(f.closed)\n        self.assertRaises(ValueError, f.read)\n        path = self._make_random_path()\n        self.assertRaisesExternal(IOError, self.fs.open_file, path, \"r\")\n        self.assertRaises(ValueError, self.fs.open_file, \"\")\n\n    def delete(self):\n        parent = self._make_random_dir()\n        path = self._make_random_dir(where=parent)\n        for i in False, True:\n            fn = self._make_random_file(where=path)\n            self.fs.delete(fn, recursive=i)\n            self.assertFalse(self.fs.exists(fn))\n        self._make_random_file(where=path)\n        self.fs.delete(path, recursive=True)\n        self.assertFalse(self.fs.exists(path))\n        self.fs.delete(parent, recursive=False)\n        self.assertFalse(self.fs.exists(parent))\n        self.assertRaises(ValueError, self.fs.delete, \"\")\n\n    def copy(self):\n        local_fs = hdfs.hdfs('', 0)\n        local_wd = utils.make_wd(local_fs)\n        from_path = os.path.join(local_wd, uuid.uuid4().hex)\n        content = uuid.uuid4().bytes\n        with open(from_path, \"wb\") as f:\n            f.write(content)\n        to_path = self._make_random_file()\n        local_fs.copy(from_path, self.fs, to_path)\n        self.assertRaises(ValueError, local_fs.copy, \"\", self.fs, \"\")\n        local_fs.close()\n        with self.fs.open_file(to_path) as f:\n            self.assertEqual(f.read(), content)\n        shutil.rmtree(local_wd)\n\n    def move(self):\n        content = utils.make_random_data(printable=True)\n        from_path = self._make_random_file(content=content)\n        to_path = self._make_random_path()\n        self.fs.move(from_path, self.fs, to_path)\n        self.assertFalse(self.fs.exists(from_path))\n        with self.fs.open_file(to_path) as f:\n            self.assertEqual(f.read(), content)\n        self.assertRaises(ValueError, self.fs.move, \"\", self.fs, \"\")\n\n    def chmod(self):\n        new_perm = 0o777\n        path = self._make_random_dir()\n        old_perm = self.fs.get_path_info(path)[\"permissions\"]\n        assert old_perm != new_perm\n        self.fs.chmod(path, new_perm)\n        self.assertEqual(self.fs.get_path_info(path)[\"permissions\"], new_perm)\n        self.fs.chmod(path, old_perm)\n        self.assertEqual(self.fs.get_path_info(path)[\"permissions\"], old_perm)\n        self.assertRaises(ValueError, self.fs.chmod, \"\", new_perm)\n\n    def __set_and_check_perm(self, path, new_mode, expected_mode):\n        self.fs.chmod(path, new_mode)\n        perm = self.fs.get_path_info(path)[\"permissions\"]\n        self.assertEqual(expected_mode, perm)\n\n    def chmod_w_string(self):\n        path = self._make_random_dir()\n        self.fs.chmod(path, 0o500)\n        # each user\n        self.__set_and_check_perm(path, \"u+w\", 0o700)\n        self.__set_and_check_perm(path, \"g+w\", 0o720)\n        self.__set_and_check_perm(path, \"o+w\", 0o722)\n        # each permission mode\n        self.__set_and_check_perm(path, \"o+r\", 0o726)\n        self.__set_and_check_perm(path, \"o+x\", 0o727)\n        # subtract operation, and multiple permission modes\n        self.__set_and_check_perm(path, \"o-rwx\", 0o720)\n        # multiple users\n        self.__set_and_check_perm(path, \"ugo-rwx\", 0o000)\n        # 'a' user\n        self.__set_and_check_perm(path, \"a+r\", 0o444)\n        # blank user -- should respect the user's umask\n        umask = os.umask(0o007)\n        self.fs.chmod(path, \"+w\")\n        perm = self.fs.get_path_info(path)[\"permissions\"]\n        os.umask(umask)\n        self.assertEqual(0o664, perm)\n        # assignment op\n        self.__set_and_check_perm(path, \"a=rwx\", 0o777)\n\n    def file_attrs(self):\n        path = self._make_random_path()\n        content = utils.make_random_data()\n        for mode in \"wb\", \"wt\":\n            with self.fs.open_file(path, mode) as f:\n                self.assertTrue(f.name.endswith(path))\n                self.assertTrue(f.fs is self.fs)\n                self.assertEqual(f.size, 0)\n                self.assertEqual(f.mode, mode)\n                self.assertTrue(f.writable())\n                f.write(content if mode == \"wb\" else content.decode(\"utf-8\"))\n            self.assertEqual(f.size, len(content))\n        for mode in \"rb\", \"rt\":\n            with self.fs.open_file(path, mode) as f:\n                self.assertTrue(f.name.endswith(path))\n                self.assertTrue(f.fs is self.fs)\n                self.assertEqual(f.size, len(content))\n                self.assertEqual(f.mode, mode)\n                self.assertFalse(f.writable())\n\n    def flush(self):\n        path = self._make_random_path()\n        with self.fs.open_file(path, \"w\") as f:\n            f.write(utils.make_random_data())\n            f.flush()\n\n    def available(self):\n        content = utils.make_random_data()\n        path = self._make_random_file(content=content)\n        with self.fs.open_file(path) as f:\n            self.assertEqual(len(content), f.available())\n\n    def get_path_info(self):\n        content = utils.make_random_data()\n        path = self._make_random_file(content=content)\n        info = self.fs.get_path_info(path)\n        self.__check_path_info(info, kind=\"file\", size=len(content))\n        self.assertTrue(info['name'].endswith(path))\n        path = self._make_random_dir()\n        info = self.fs.get_path_info(path)\n        self.__check_path_info(info, kind=\"directory\")\n        self.assertTrue(info['name'].endswith(path))\n        self.assertRaises(\n            IOError, self.fs.get_path_info, self._make_random_path()\n        )\n        self.assertRaises(ValueError, self.fs.get_path_info, \"\")\n\n    def read(self):\n        content = utils.make_random_data()\n        path = self._make_random_file(content=content)\n        with self.fs.open_file(path) as f:\n            self.assertEqual(f.read(), content)\n        with self.fs.open_file(path) as f:\n            self.assertEqual(f.read(-1), content)\n        with self.fs.open_file(path) as f:\n            self.assertEqual(f.read(3), content[:3])\n            self.assertEqual(f.read(3), content[3:6])\n            if not _is_py3 and not self.fs.host:\n                self.assertRaises(ValueError, f.write, content)\n            else:\n                self.assertRaises(IOError, f.write, content)\n\n    def __read_chunk(self, chunk_factory):\n        content = utils.make_random_data()\n        path = self._make_random_file(content=content)\n        size = len(content)\n        for chunk_size in size - 1, size, size + 1:\n            with self.fs.open_file(path) as f:\n                chunk = chunk_factory(chunk_size)\n                bytes_read = f.read_chunk(chunk)\n                self.assertEqual(bytes_read, min(size, chunk_size))\n                self.assertEqual(bytes(bytearray(chunk))[:bytes_read],\n                                 content[:bytes_read])\n\n    def read_chunk(self):\n        def array_by_len(length):\n            return array.array(\"b\", b\"\\x00\" * length)\n        for factory in bytearray, create_string_buffer, array_by_len:\n            self.__read_chunk(factory)\n\n    def write(self):\n        content = utils.make_random_data()\n        path = self._make_random_path()\n        with self.fs.open_file(path, \"w\") as fo:\n            bytes_written = fo.write(content)\n            self.assertEqual(bytes_written, len(content))\n        with self.fs.open_file(path) as fo:\n            self.assertEqual(content, fo.read())\n        with self.fs.open_file(path, \"w\") as fo:\n            bytes_written = fo.write(bytearray(content))\n            self.assertEqual(bytes_written, len(content))\n        with self.fs.open_file(path) as fo:\n            self.assertEqual(content, fo.read())\n        chunk = create_string_buffer(content, len(content))\n        with self.fs.open_file(path, \"w\") as fo:\n            bytes_written = fo.write(chunk)\n            self.assertEqual(bytes_written, len(content))\n\n    def append(self):\n        replication = 1  # see https://issues.apache.org/jira/browse/HDFS-3091\n        content, update = utils.make_random_data(), utils.make_random_data()\n        path = self._make_random_path()\n        with self.fs.open_file(path, \"w\", replication=replication) as fo:\n            fo.write(content)\n        try:\n            with utils.silent_call(self.fs.open_file, path, \"a\") as fo:\n                fo.write(update)\n        except IOError:\n            sys.stderr.write(\"NOT SUPPORTED ... \")\n            return\n        else:\n            with self.fs.open_file(path) as fi:\n                self.assertEqual(fi.read(), content + update)\n\n    def tell(self):\n        offset = 3\n        path = self._make_random_file()\n        with self.fs.open_file(path) as f:\n            f.read(offset)\n            self.assertEqual(f.tell(), offset)\n\n    def pread(self):\n        content = utils.make_random_data()\n        offset, length = 2, 3\n        path = self._make_random_file(content=content)\n        with self.fs.open_file(path) as f:\n            self.assertEqual(\n                f.pread(offset, length), content[offset: offset + length]\n            )\n            self.assertEqual(f.tell(), 0)\n            self.assertEqual(content[1:], f.pread(1, -1))\n            self.assertRaises(IOError, f.pread, -1, 10)\n            # read starting past end of file\n            self.assertRaises(IOError, f.pread, len(content) + 1, 10)\n            # read past end of file\n            buf = f.pread(len(content) - 2, 10)\n            self.assertEqual(2, len(buf))\n\n    def pread_chunk(self):\n        content = utils.make_random_data()\n        offset, length = 2, 3\n        chunk = create_string_buffer(length)\n        path = self._make_random_file(content=content)\n        with self.fs.open_file(path) as f:\n            bytes_read = f.pread_chunk(offset, chunk)\n            self.assertEqual(bytes_read, length)\n            self.assertEqual(chunk.value, content[offset: offset + length])\n            self.assertEqual(f.tell(), 0)\n\n    def copy_on_self(self):\n        content = utils.make_random_data()\n        path = self._make_random_file(content=content)\n        path1 = self._make_random_path()\n        self.fs.copy(path, self.fs, path1)\n        with self.fs.open_file(path1) as f:\n            self.assertEqual(f.read(), content)\n\n    def rename(self):\n        old_path = self._make_random_file()\n        new_path = self._make_random_path()\n        self.fs.rename(old_path, new_path)\n        self.assertTrue(self.fs.exists(new_path))\n        self.assertFalse(self.fs.exists(old_path))\n        self.assertRaises(ValueError, self.fs.rename, old_path, \"\")\n        self.assertRaises(ValueError, self.fs.rename, \"\", new_path)\n\n    def change_dir(self):\n        cwd = self.fs.working_directory()\n        new_d = self._make_random_path()  # does not need to exist\n        self.fs.set_working_directory(new_d)\n        self.assertEqual(self.fs.working_directory(), new_d)\n        self.fs.set_working_directory(cwd)\n        self.assertEqual(self.fs.working_directory(), cwd)\n        self.assertRaises(ValueError, self.fs.set_working_directory, \"\")\n\n    def list_directory(self):\n        new_d = self._make_random_dir()\n        self.assertEqual(self.fs.list_directory(new_d), [])\n        paths = [self._make_random_file(where=new_d) for _ in range(3)]\n        paths.sort(key=os.path.basename)\n        infos = self.fs.list_directory(new_d)\n        infos.sort(key=lambda p: os.path.basename(p[\"name\"]))\n        self.assertEqual(len(infos), len(paths))\n        for i, p in zip(infos, paths):\n            self.__check_path_info(i, kind=\"file\")\n            self.assertTrue(i['name'].endswith(p))\n        self.assertRaises(\n            IOError, self.fs.list_directory, self._make_random_path()\n        )\n        self.assertRaises(ValueError, self.fs.list_directory, \"\")\n\n    def __check_readline(self, get_lines):\n        samples = [\n            b\"foo\\nbar\\n\\ntar\",\n            b\"\\nfoo\\nbar\\n\\ntar\",\n            b\"foo\\nbar\\n\\ntar\\n\",\n            b\"\\n\\n\\n\", b\"\\n\", b\"\",\n            b\"foobartar\",\n        ]\n        path = self._make_random_path()\n        for text in samples:\n            expected_lines = text.splitlines(True)\n            with self.fs.open_file(path, \"w\") as f:\n                f.write(text)\n            with self.fs.open_file(path) as f:\n                lines = get_lines(f)\n            self.assertEqual(lines, expected_lines)\n\n    def readline(self):\n        def get_lines(f):\n            lines = []\n            while 1:\n                line = f.readline()\n                if not line:\n                    break\n                lines.append(line)\n            return lines\n        self.__check_readline(get_lines)\n\n    def readline_big(self):\n        for i in range(10, 23):\n            x = b\"*\" * (2**i) + b\"\\n\"\n            path = self._make_random_file(content=x)\n            with self.fs.open_file(path) as f:\n                line = f.readline()\n            self.assertEqual(\n                line, x, \"len(a) = %d, len(x) = %d\" % (len(line), len(x))\n            )\n\n    def readline_and_read(self):\n        content = b\"first line\\nsecond line\\n\"\n        path = self._make_random_file(content=content)\n        chunks = []\n        with self.fs.open_file(path) as f:\n            chunks.append(f.read(1))\n            chunks.append(f.readline())\n            chunks.append(f.read(4))\n        self.assertEqual(chunks, [b'f', b'irst line\\n', b'seco'])\n\n    def iter_lines(self):\n\n        def get_lines_explicit(f):\n            lines = []\n            while 1:\n                try:\n                    lines.append(next(f))\n                except StopIteration:\n                    break\n            return lines\n\n        def get_lines_implicit(f):\n            return [l for l in f]\n\n        for fun in get_lines_explicit, get_lines_implicit:\n            self.__check_readline(fun)\n\n    def seek(self):\n        lines = [b\"1\\n\", b\"2\\n\", b\"3\\n\"]\n        data = b\"\".join(lines)\n        path = self._make_random_path()\n        with self.fs.open_file(path, \"w\") as f:\n            f.write(data)\n        with self.fs.open_file(path) as f:\n            for i, l in enumerate(lines):\n                f.seek(sum(map(len, lines[:i])))\n                self.assertEqual(f.readline(), l)\n                f.seek(0)\n                self.assertEqual(f.readline(), lines[0])\n                f.seek(sum(map(len, lines[:i])))\n                self.assertEqual(f.readline(), l)\n        with self.fs.open_file(path) as f:\n            f.seek(1)\n            f.seek(1, os.SEEK_CUR)\n            self.assertEqual(f.tell(), 2)\n            f.seek(-1, os.SEEK_END)\n            self.assertEqual(f.tell(), len(data) - 1)\n            # seek past end of file\n            self.assertRaises(IOError, f.seek, len(data) + 10)\n\n    def block_boundary(self):\n        path = self._make_random_path()\n        CHUNK_SIZE = 10\n        N = 2\n        # (dfs.namenode.fs-limits.min-block-size): 4096 < 1048576\n        bs = max(1048576, N * utils.get_bytes_per_checksum())\n        total_data_size = 2 * bs\n        with self.fs.open_file(path, \"w\", blocksize=bs) as f:\n            i = 0\n            bufsize = 12 * 1024 * 1024\n            while i < total_data_size:\n                data = b'X' * min(bufsize, total_data_size - i)\n                f.write(data)\n                i += bufsize\n\n        with self.fs.open_file(path) as f:\n            p = total_data_size - CHUNK_SIZE\n            for pos in (0, 1, bs - 1, bs, bs + 1, p - 1, p, p + 1,\n                        total_data_size - 1):\n                expected_len = (\n                    CHUNK_SIZE if pos <= p else total_data_size - pos\n                )\n                f.seek(pos)\n                chunk = f.read(CHUNK_SIZE)\n                self.assertEqual(len(chunk), expected_len)\n\n    def walk(self):\n        new_d, new_f = self._make_random_dir(), self._make_random_file()\n        for top in new_d, new_f:\n            items = list(self.fs.walk(top))\n            self.assertEqual(len(items), 1)\n            self.assertEqualPathInfo(items[0], self.fs.get_path_info(top))\n        top = new_d\n        cache = [top]\n        for _ in range(2):\n            cache.append(self._make_random_file(where=top))\n        parent = self._make_random_dir(where=top)\n        cache.append(parent)\n        for _ in range(2):\n            cache.append(self._make_random_file(where=parent))\n        child = self._make_random_dir(where=parent)\n        cache.append(child)\n        for _ in range(2):\n            cache.append(self._make_random_file(where=child))\n        infos = list(self.fs.walk(top))\n        expected_infos = [self.fs.get_path_info(p) for p in cache]\n        self.assertEqual(len(infos), len(expected_infos))\n        for l in infos, expected_infos:\n            l.sort(key=operator.itemgetter(\"name\"))\n        for i, e in zip(infos, expected_infos):\n            self.assertEqualPathInfo(i, e)\n        if not _is_py3:\n            # check it's OK for \"top\" to be a bytes string\n            b_top = self._make_random_dir(add_uni=False)\n            try:\n                b_top = b_top.encode()\n            except Exception:\n                pass\n            list(self.fs.walk(b_top))\n        nonexistent_walk = self.fs.walk(self._make_random_path())\n        if _is_py3:\n            self.assertRaises(OSError, lambda: next(nonexistent_walk))\n        else:\n            self.assertRaises(IOError, lambda: next(nonexistent_walk))\n        for top in '', None:\n            self.assertRaises(ValueError, lambda: next(self.fs.walk(top)))\n\n    def exists(self):\n        self.assertFalse(self.fs.exists('some_file'))\n        self.assertFalse(self.fs.exists('some_file/other_file'))\n        dname = self._make_random_dir()\n        self.assertTrue(self.fs.exists(dname))\n        fname = self._make_random_file()\n        self.assertTrue(self.fs.exists(fname))\n        self.assertRaises(ValueError, self.fs.exists, \"\")\n\n    def text_io(self):\n        t_path, b_path = self._make_random_path(), self._make_random_path()\n        text = u'a string' + utils.UNI_CHR\n        data = text.encode(\"utf-8\")\n        with self.fs.open_file(t_path, \"wt\") as fo:\n            chars_written = fo.write(text)\n        with self.fs.open_file(b_path, \"w\") as fo:\n            bytes_written = fo.write(data)\n        self.assertEqual(chars_written, len(text))\n        self.assertEqual(bytes_written, len(data))\n        with self.fs.open_file(t_path, \"rt\") as f:\n            self.assertEqual(f.read(), text)\n            f.seek(2)\n            self.assertEqual(f.read(), text[2:])\n            self.assertEqual(f.pread(3, 4), text[3:7])\n            with self.assertRaises(AttributeError):\n                f.read_chunk(\"\")\n                f.pread_chunk(1, \"\")\n        with self.fs.open_file(b_path, \"r\") as f:\n            self.assertEqual(f.read(), data)\n\n    def __check_path_info(self, info, **expected_values):\n        keys = ('kind', 'group', 'name', 'last_mod', 'replication', 'owner',\n                'permissions', 'block_size', 'last_access', 'size')\n        for k in keys:\n            self.assertTrue(k in info)\n        for k, exp_v in list(expected_values.items()):\n            v = info[k]\n            self.assertEqual(v, exp_v)\n\n\ndef common_tests():\n    return [\n        'open_close',\n        'delete',\n        'copy',\n        'move',\n        'chmod',\n        'chmod_w_string',\n        'file_attrs',\n        'flush',\n        'read',\n        'read_chunk',\n        'write',\n        'append',\n        'tell',\n        'pread',\n        'pread_chunk',\n        'rename',\n        'change_dir',\n        'copy_on_self',\n        'available',\n        'get_path_info',\n        'list_directory',\n        'readline',\n        'readline_big',\n        'readline_and_read',\n        'iter_lines',\n        'seek',\n        'block_boundary',\n        'walk',\n        'exists',\n        'text_io',\n    ]\n"
  },
  {
    "path": "test/hdfs/test_common.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport unittest\n\nfrom pydoop.hdfs.common import parse_mode\n\n\nclass TestMode(unittest.TestCase):\n\n    def runTest(self):\n        for mode in \"r\", \"rb\":\n            self.assertEqual(parse_mode(mode), (\"r\", False))\n        for mode in \"w\", \"wb\":\n            self.assertEqual(parse_mode(mode), (\"w\", False))\n        for mode in \"a\", \"ab\":\n            self.assertEqual(parse_mode(mode), (\"a\", False))\n        self.assertEqual(parse_mode(\"rt\"), (\"r\", True))\n        self.assertEqual(parse_mode(\"wt\"), (\"w\", True))\n        self.assertEqual(parse_mode(\"at\"), (\"a\", True))\n        for mode in \"\", \"k\", \"kb\", \"kt\":\n            self.assertRaises(ValueError, parse_mode, mode)\n\n\ndef suite():\n    return unittest.TestLoader().loadTestsFromTestCase(TestMode)\n\n\nif __name__ == '__main__':\n    _RUNNER = unittest.TextTestRunner(verbosity=2)\n    _RUNNER.run((suite()))\n"
  },
  {
    "path": "test/hdfs/test_core.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport unittest\nimport uuid\n\nfrom pydoop.hdfs.core import init\nhdfs = init()\n\n\nclass TestCore(unittest.TestCase):\n\n    def test_default(self):\n        path = \"/tmp/pydoop-test-{}\".format(uuid.uuid4().hex)\n        fs = f = None\n        try:\n            fs = hdfs.CoreHdfsFs(\"default\", 0)\n            f = fs.open_file(path, \"w\")\n            f.write(b\"bar\\n\")\n        finally:\n            if f:\n                f.close()\n                fs.delete(path)\n            if fs:\n                fs.close()\n\n\ndef suite():\n    suite_ = unittest.TestSuite()\n    suite_.addTest(TestCore('test_default'))\n    return suite_\n\n\nif __name__ == '__main__':\n    _RUNNER = unittest.TextTestRunner(verbosity=2)\n    _RUNNER.run((suite()))\n"
  },
  {
    "path": "test/hdfs/test_hdfs.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nfrom __future__ import division\n\nimport unittest\nimport tempfile\nimport os\nimport stat\nfrom pydoop.utils.py3compat import czip\nfrom threading import Thread\n\nimport pydoop.hdfs as hdfs\nfrom pydoop.hdfs.common import BUFSIZE\nfrom pydoop.test_utils import UNI_CHR, make_random_data, FSTree\n\n\nclass TestHDFS(unittest.TestCase):\n\n    def setUp(self):\n        wd = tempfile.mkdtemp(suffix='_%s' % UNI_CHR)\n        wd_bn = os.path.basename(wd)\n        self.local_wd = \"file:%s\" % wd\n        fs = hdfs.hdfs(\"default\", 0)\n        fs.create_directory(wd_bn)\n        self.hdfs_wd = fs.get_path_info(wd_bn)[\"name\"]\n        fs.close()\n        basenames = [\"test_path_%d\" % i for i in range(2)]\n        self.local_paths = [\"%s/%s\" % (self.local_wd, bn) for bn in basenames]\n        self.hdfs_paths = [\"%s/%s\" % (self.hdfs_wd, bn) for bn in basenames]\n        self.data = make_random_data(\n            4 * BUFSIZE + BUFSIZE // 2, printable=False\n        )\n        for path in self.local_paths:\n            self.assertTrue(path.startswith(\"file:\"))\n        for path in self.hdfs_paths:\n            if not hdfs.default_is_local():\n                self.assertTrue(path.startswith(\"hdfs:\"))\n\n    def tearDown(self):\n        fs = hdfs.hdfs(\"\", 0)\n        fs.delete(self.local_wd)\n        fs.close()\n        fs = hdfs.hdfs(\"default\", 0)\n        fs.delete(self.hdfs_wd)\n        fs.close()\n\n    def open(self):\n        for test_path in self.hdfs_paths[0], self.local_paths[0]:\n            with hdfs.open(test_path, \"w\") as f:\n                f.write(self.data)\n            f.fs.close()\n            with hdfs.open(test_path) as f:\n                self.assertEqual(f.read(), self.data)\n            f.fs.close()\n\n    def dump(self):\n        for test_path in self.hdfs_paths[0], self.local_paths[0]:\n            hdfs.dump(self.data, test_path, mode=\"wb\")\n            with hdfs.open(test_path) as fi:\n                rdata = fi.read()\n            fi.fs.close()\n            self.assertEqual(rdata, self.data)\n\n    def __ls(self, ls_func, path_transform):\n        for wd, paths in czip(\n            (self.local_wd, self.hdfs_wd), (self.local_paths, self.hdfs_paths)\n        ):\n            for p in paths:\n                hdfs.dump(self.data, p, mode=\"wb\")\n            test_dir = \"%s/%s\" % (wd, \"test_dir\")\n            test_path = \"%s/%s\" % (test_dir, \"test_path\")\n            hdfs.dump(self.data, test_path, mode=\"wb\")\n            paths.append(test_dir)\n            for recursive in False, True:\n                if recursive:\n                    paths.append(test_path)\n                dir_list = [\n                    path_transform(p) for p in ls_func(wd, recursive=recursive)\n                ]\n                self.assertEqual(sorted(dir_list), sorted(paths))\n\n    def lsl(self):\n        self.__ls(hdfs.lsl, lambda x: x[\"name\"])\n\n    def ls(self):\n        self.__ls(hdfs.ls, lambda x: x)\n\n    def mkdir(self):\n        for wd in self.local_wd, self.hdfs_wd:\n            d1 = \"%s/d1\" % wd\n            d2 = \"%s/d2\" % d1\n            hdfs.mkdir(d2)\n            dir_list = hdfs.ls(d1)\n            self.assertEqual(len(dir_list), 1)\n            self.assertTrue(dir_list[0].endswith(d2))\n\n    def load(self):\n        for test_path in self.hdfs_paths[0], self.local_paths[0]:\n            hdfs.dump(self.data, test_path, mode=\"wb\")\n            rdata = hdfs.load(test_path)\n            self.assertEqual(rdata, self.data)\n\n    def __make_tree(self, wd, root=\"d1\", create=True):\n        \"\"\"\n        d1\n        |-- d2\n        |   `-- f2\n        `-- f1\n        \"\"\"\n        d1 = \"%s/%s\" % (wd, root)\n        t1 = FSTree(d1)\n        d2 = \"%s/d2\" % d1\n        t2 = t1.add(d2)\n        if create:\n            hdfs.mkdir(d2)\n        for t, d, bn in ((t1, d1, \"f1\"), (t2, d2, \"f2\")):\n            f = \"%s/%s\" % (d, bn)\n            if create:\n                hdfs.dump(self.data, f, mode=\"wb\")\n            t.add(f, 0)\n        return t1\n\n    def __cp_file(self, wd):\n        fn = \"%s/fn\" % wd\n        hdfs.dump(self.data, fn, mode=\"wb\")\n        dest_dir = \"%s/dest_dir\" % wd\n        hdfs.mkdir(dest_dir)\n        fn_copy_on_wd = \"%s/fn_copy\" % wd\n        hdfs.cp(fn, fn_copy_on_wd, mode=\"wb\")\n        self.assertEqual(hdfs.load(fn_copy_on_wd), self.data)\n        self.assertRaises(IOError, hdfs.cp, fn, fn_copy_on_wd)\n        fn_copy_on_dest_dir = \"%s/fn\" % dest_dir\n        hdfs.cp(fn, dest_dir, mode=\"wb\")\n        self.assertEqual(hdfs.load(fn_copy_on_dest_dir), self.data)\n        self.assertRaises(IOError, hdfs.cp, fn, dest_dir)\n\n    def __cp_dir(self, wd):\n        src_dir = \"%s/src_dir\" % wd\n        hdfs.mkdir(src_dir)\n        copy_on_wd = \"%s/src_dir_copy\" % wd\n        copy_on_copy_on_wd = \"%s/src_dir\" % copy_on_wd\n        hdfs.cp(src_dir, copy_on_wd, mode=\"wb\")\n        self.assertTrue(hdfs.path.exists(copy_on_wd))\n        hdfs.cp(src_dir, copy_on_wd, mode=\"wb\")\n        self.assertTrue(hdfs.path.exists(copy_on_copy_on_wd))\n        self.assertRaises(IOError, hdfs.cp, src_dir, copy_on_wd)\n\n    def __cp_recursive(self, wd):\n        src_t = self.__make_tree(wd)\n        src = src_t.name\n        copy_on_wd = \"%s_copy\" % src\n        src_bn, copy_on_wd_bn = [\n            hdfs.path.basename(d) for d in (src, copy_on_wd)\n        ]\n        hdfs.cp(src, copy_on_wd, mode=\"wb\")\n        exp_t = self.__make_tree(wd, root=copy_on_wd_bn, create=False)\n        for t, exp_t in czip(src_t.walk(), exp_t.walk()):\n            self.assertTrue(hdfs.path.exists(exp_t.name))\n            if t.kind == 0:\n                self.assertEqual(hdfs.load(exp_t.name), self.data)\n        # check semantics when target dir already exists\n        hdfs.rm(copy_on_wd)\n        hdfs.mkdir(copy_on_wd)\n        hdfs.cp(src, copy_on_wd, mode=\"wb\")\n        exp_t = self.__make_tree(copy_on_wd, root=src_bn, create=False)\n        for t, exp_t in czip(src_t.walk(), exp_t.walk()):\n            self.assertTrue(hdfs.path.exists(exp_t.name))\n            if t.kind == 0:\n                self.assertEqual(hdfs.load(exp_t.name), self.data)\n\n    def cp(self):\n        for wd in self.local_wd, self.hdfs_wd:\n            self.__cp_file(wd)\n            self.__cp_dir(wd)\n            self.__cp_recursive(wd)\n\n    def put(self):\n        src = hdfs.path.split(self.local_paths[0])[-1]\n        dest = self.hdfs_paths[0]\n        with open(src, \"wb\") as f:\n            f.write(self.data)\n        hdfs.put(src, dest, mode=\"wb\")\n        with hdfs.open(dest) as fi:\n            rdata = fi.read()\n        self.assertEqual(rdata, self.data)\n\n    def get(self):\n        src = self.hdfs_paths[0]\n        dest = hdfs.path.split(self.local_paths[0])[-1]\n        hdfs.dump(self.data, src, mode=\"wb\")\n        hdfs.get(src, dest, mode=\"wb\")\n        with open(dest, 'rb') as fi:\n            rdata = fi.read()\n        self.assertEqual(rdata, self.data)\n\n    def rm(self):\n        for wd in self.local_wd, self.hdfs_wd:\n            t1 = self.__make_tree(wd)\n            hdfs.rm(t1.name)\n            self.assertEqual(len(hdfs.ls(wd)), 0)\n\n    def chmod(self):\n        with tempfile.NamedTemporaryFile(suffix='_%s' % UNI_CHR) as f:\n            hdfs.chmod(\"file://\" + f.name, 444)\n            s = os.stat(f.name)\n            self.assertEqual(444, stat.S_IMODE(s.st_mode))\n\n    def move(self):\n        for wd in self.local_wd, self.hdfs_wd:\n            t1 = self.__make_tree(wd)\n            t2 = [_ for _ in t1.children if _.kind == 1][0]\n            f2 = t2.children[0]\n            hdfs.move(f2.name, t1.name)\n            ls = [os.path.basename(_) for _ in hdfs.ls(t1.name)]\n            self.assertTrue(os.path.basename(f2.name) in ls)\n            self.assertEqual(len(hdfs.ls(t2.name)), 0)\n\n    def chown(self):\n        new_user = 'nobody'\n        test_path = self.hdfs_paths[0]\n        hdfs.dump(self.data, test_path, mode=\"wb\")\n        hdfs.chown(test_path, user=new_user)\n        path_info = hdfs.lsl(test_path)[0]\n        self.assertEqual(path_info['owner'], new_user)\n        prev_owner = path_info['owner']\n        prev_grp = path_info['group']\n        # owner and group should remain unchanged\n        hdfs.chown(test_path, user='', group='')\n        path_info = hdfs.lsl(test_path)[0]\n        self.assertEqual(path_info['owner'], prev_owner)\n        self.assertEqual(path_info['group'], prev_grp)\n\n    def rename(self):\n        test_path = self.hdfs_paths[0]\n        new_path = \"%s.new\" % test_path\n        hdfs.dump(self.data, test_path, mode=\"wb\")\n        hdfs.rename(test_path, new_path)\n        self.assertFalse(hdfs.path.exists(test_path))\n        self.assertTrue(hdfs.path.exists(new_path))\n        if not hdfs.default_is_local():\n            self.assertRaises(\n                RuntimeError, hdfs.rename, new_path, self.local_paths[0]\n            )\n\n    def renames(self):\n        test_path = self.hdfs_paths[0]\n        hdfs.dump(self.data, test_path, mode=\"wb\")\n        new_d = hdfs.path.join(self.hdfs_wd, \"new_dir\")\n        new_path = hdfs.path.join(new_d, \"new_p\")\n        hdfs.renames(test_path, new_path)\n        self.assertFalse(hdfs.path.exists(test_path))\n        self.assertTrue(hdfs.path.exists(new_path))\n\n    def capacity(self):\n        fs = hdfs.hdfs(\"\", 0)\n        self.assertRaises(RuntimeError, fs.capacity)\n        fs.close()\n        if not hdfs.default_is_local():\n            fs = hdfs.hdfs(\"default\", 0)\n            cap = fs.capacity()\n            self.assertGreaterEqual(cap, 0)\n\n    def get_hosts(self):\n        if hdfs.default_is_local():\n            # only run on HDFS\n            return\n        hdfs.dump(self.data, self.hdfs_paths[0], mode=\"wb\")\n        fs = hdfs.hdfs(\"default\", 0)\n        hs = fs.get_hosts(self.hdfs_paths[0], 0, 10)\n        self.assertTrue(len(hs) > 0)\n        self.assertRaises(\n            ValueError, fs.get_hosts, self.hdfs_paths[0], -10, 10\n        )\n        self.assertRaises(ValueError, fs.get_hosts, self.hdfs_paths[0], 0, -10)\n\n    def thread_allow(self):\n        # test whether our code is properly allowing other python threads to\n        # make progress while we're busy doing I/O\n        class BusyCounter(Thread):\n            def __init__(self):\n                super(BusyCounter, self).__init__()\n                self.done = False\n                self._count = 0\n\n            @property\n            def count(self):\n                return self._count\n\n            def run(self):\n                while not self.done:\n                    self._count += 1\n\n        class BusyContext(object):\n            def __init__(self):\n                self.counter = None\n\n            def __enter__(self):\n                self.counter = BusyCounter()\n                self.counter.start()\n\n            def __exit__(self, _1, _2, _3):\n                self.counter.done = True\n                self.counter.join()\n\n            @property\n            def count(self):\n                return self.counter.count\n\n        some_data = b\"a\" * (5 * 1024 * 1024)  # 5 MB\n        counter = BusyContext()\n\n        ###########################\n        acceptable_threshold = 5\n        # The tests were sometimes failing on TravisCI (slower machines) with\n        # counts below 100.  A test where we left the GIL locked showed that in\n        # that case counter value doesn't change at all across calls, so in\n        # theory even an increment of 1 would demonstrate that the mechanism is\n        # working.\n\n        # If the hdfs call doesn't release the GIL, the counter won't make any\n        # progress during the HDFS call and will be stuck at 0.  On the other\n        # hand, if the GIL is release during the operation we'll see a count\n        # value > 0.\n        fs = hdfs.hdfs(\"default\", 0)\n        with fs.open_file(self.hdfs_paths[0], \"w\") as f:\n            with counter:\n                f.write(some_data)\n            self.assertGreaterEqual(counter.count, acceptable_threshold)\n\n        with fs.open_file(self.hdfs_paths[0], \"r\") as f:\n            with counter:\n                f.read()\n            self.assertGreaterEqual(counter.count, acceptable_threshold)\n\n        with counter:\n            fs.get_hosts(self.hdfs_paths[0], 0, 10)\n        self.assertGreaterEqual(counter.count, acceptable_threshold)\n\n        with counter:\n            fs.list_directory('/')\n        self.assertGreaterEqual(counter.count, acceptable_threshold)\n\n        with counter:\n            hdfs.cp(self.hdfs_paths[0], self.hdfs_paths[0] + '_2', mode=\"wb\")\n        self.assertGreaterEqual(counter.count, acceptable_threshold)\n\n        with counter:\n            hdfs.rm(self.hdfs_paths[0] + '_2')\n        self.assertGreaterEqual(counter.count, acceptable_threshold)\n\n        # ...we could go on, but the better strategy would be to insert a check\n        # analogous to these in each method's unit test\n\n\ndef suite():\n    suite_ = unittest.TestSuite()\n    suite_.addTest(TestHDFS(\"open\"))\n    suite_.addTest(TestHDFS(\"dump\"))\n    suite_.addTest(TestHDFS(\"lsl\"))\n    suite_.addTest(TestHDFS(\"ls\"))\n    suite_.addTest(TestHDFS(\"mkdir\"))\n    suite_.addTest(TestHDFS(\"load\"))\n    suite_.addTest(TestHDFS(\"cp\"))\n    suite_.addTest(TestHDFS(\"put\"))\n    suite_.addTest(TestHDFS(\"get\"))\n    suite_.addTest(TestHDFS(\"rm\"))\n    suite_.addTest(TestHDFS(\"chmod\"))\n    suite_.addTest(TestHDFS(\"move\"))\n    suite_.addTest(TestHDFS(\"chown\"))\n    suite_.addTest(TestHDFS(\"rename\"))\n    suite_.addTest(TestHDFS(\"renames\"))\n    suite_.addTest(TestHDFS(\"capacity\"))\n    suite_.addTest(TestHDFS(\"get_hosts\"))\n    # randomly fails on Travis\n    # suite_.addTest(TestHDFS(\"thread_allow\"))\n    return suite_\n\n\nif __name__ == '__main__':\n    _RUNNER = unittest.TextTestRunner(verbosity=2)\n    _RUNNER.run((suite()))\n"
  },
  {
    "path": "test/hdfs/test_hdfs_fs.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport unittest\nimport getpass\nimport os\nimport socket\nfrom itertools import product\n\nimport pydoop.hdfs as hdfs\nfrom common_hdfs_tests import TestCommon, common_tests\nimport pydoop.test_utils as u\nfrom pydoop.utils.py3compat import clong\n\nCURRENT_USER = getpass.getuser()\nDEFAULT_FS = hdfs.fs._default_fs()\n\n\ndef get_explicit_hp():\n    hp = DEFAULT_FS.netloc.split(\":\")\n    if len(hp) < 2:\n        hp.append(u._DEFAULT_HDFS_PORT)\n    return os.getenv(\"HDFS_HOST\", hp[0]), int(os.getenv(\"HDFS_PORT\", hp[1]))\n\n\nclass TestConnection(unittest.TestCase):\n\n    def setUp(self):\n        self.hp_cases = [(\"default\", 0)]\n        self.u_cases = [None, CURRENT_USER]\n        if DEFAULT_FS.scheme == \"hdfs\":\n            hdfs_host, hdfs_port = get_explicit_hp()\n            self.hp_cases.append((hdfs_host, hdfs_port))\n            self.u_cases.append(\"nobody\")\n            try:\n                hdfs_ip = socket.gethostbyname(hdfs_host)\n            except socket.gaierror:\n                pass\n            else:\n                self.hp_cases.append((hdfs_ip, hdfs_port))\n\n    def connect(self):\n        for host, port in self.hp_cases:\n            for user in self.u_cases:\n                expected_user = user or CURRENT_USER\n                with hdfs.hdfs(host, port, user=user) as fs:\n                    self.assertEqual(fs.user, expected_user)\n\n    def cache(self):\n        for (h1, p1), (h2, p2) in product(self.hp_cases, repeat=2):\n            hdfs.hdfs._CACHE.clear()\n            hdfs.hdfs._ALIASES = {\"host\": {}, \"port\": {}, \"user\": {}}  # FIXME\n            with hdfs.hdfs(h1, p1) as fs1:\n                with hdfs.hdfs(h2, p2) as fs2:\n                    print(' * %r vs %r' % ((h1, p1), (h2, p2)))\n                    self.assertTrue(fs2.fs is fs1.fs)\n                for fs in fs1, fs2:\n                    self.assertFalse(fs.closed)\n            for fs in fs1, fs2:\n                self.assertTrue(fs.closed)\n\n\nclass TestHDFS(TestCommon):\n\n    def __init__(self, target):\n        TestCommon.__init__(self, target, 'default', 0)\n\n    def capacity(self):\n        c = self.fs.capacity()\n        self.assertTrue(isinstance(c, (int, clong)))\n\n    def default_block_size(self):\n        dbs = self.fs.default_block_size()\n        self.assertTrue(isinstance(dbs, (int, clong)))\n\n    def used(self):\n        u_ = self.fs.used()\n        self.assertTrue(isinstance(u_, (int, clong)))\n\n    def chown(self):\n        new_owner = \"nobody\"\n        new_group = \"users\"\n        path = self._make_random_file()\n        old_owner = self.fs.get_path_info(path)[\"owner\"]\n        old_group = self.fs.get_path_info(path)[\"group\"]\n        self.fs.chown(path, user=new_owner)\n        self.assertEqual(self.fs.get_path_info(path)[\"owner\"], new_owner)\n        self.assertEqual(self.fs.get_path_info(path)[\"group\"], old_group)\n        self.fs.chown(path, group=new_group)\n        self.assertEqual(self.fs.get_path_info(path)[\"owner\"], new_owner)\n        self.assertEqual(self.fs.get_path_info(path)[\"group\"], new_group)\n        self.fs.chown(path, old_owner, old_group)\n        self.assertEqual(self.fs.get_path_info(path)[\"owner\"], old_owner)\n        self.assertEqual(self.fs.get_path_info(path)[\"group\"], old_group)\n\n    def utime(self):\n        path = self._make_random_file()\n        old_mtime = self.fs.get_path_info(path)[\"last_mod\"]\n        old_atime = self.fs.get_path_info(path)[\"last_access\"]\n        new_mtime = old_mtime - 500\n        new_atime = old_mtime - 100\n        self.fs.utime(path, new_mtime, new_atime)\n        self.assertEqual(\n            self.fs.get_path_info(path)[\"last_mod\"], int(new_mtime)\n        )\n        self.assertEqual(\n            self.fs.get_path_info(path)[\"last_access\"], int(new_atime)\n        )\n        self.fs.utime(path, old_mtime, old_atime)\n        self.assertEqual(\n            self.fs.get_path_info(path)[\"last_mod\"], int(old_mtime)\n        )\n        self.assertEqual(\n            self.fs.get_path_info(path)[\"last_access\"], int(old_atime)\n        )\n\n    def block_size(self):\n        for bs_MB in range(100, 500, 50):\n            bs = bs_MB * 2**20\n            path = self._make_random_file(blocksize=bs)\n            self.assertEqual(self.fs.get_path_info(path)[\"block_size\"], bs)\n\n    def replication(self):\n        for r in range(1, 6):\n            path = self._make_random_file(replication=r)\n            self.assertEqual(self.fs.get_path_info(path)[\"replication\"], r)\n\n    def set_replication(self):\n        old_r, new_r = 2, 3\n        path = self._make_random_file(replication=old_r)\n        self.fs.set_replication(path, new_r)\n        self.assertEqual(self.fs.get_path_info(path)[\"replication\"], new_r)\n\n    # HDFS returns less than the number of requested bytes if the chunk\n    # being read crosses the boundary between data blocks.\n    def readline_block_boundary(self):\n\n        def _write_prefix(f, size, bs):\n            # Avoid memory problem with JVM\n            chunk_size = min(bs, 12 * 1048576)\n            written = 0\n            while written < size:\n                data = b'X' * min(chunk_size, size - written)\n                written += f.write(data)\n\n        # (dfs.namenode.fs-limits.min-block-size): 4096 < 1048576\n        bs = 1048576\n        line = b\"012345678\\n\"\n        offset = bs - (10 * len(line) + 5)\n        path = self._make_random_path()\n        with self.fs.open_file(path, mode=\"w\", blocksize=bs) as f:\n            bytes_written = lines_written = 0\n            _write_prefix(f, offset, bs)\n            bytes_written = offset\n            while bytes_written < bs + 1:\n                f.write(line)\n                lines_written += 1\n                bytes_written += len(line)\n        with self.fs.open_file(path) as f:\n            f.seek(offset)\n            lines = []\n            while 1:\n                L = f.readline()\n                if not L:\n                    break\n                lines.append(L)\n        self.assertEqual(len(lines), lines_written)\n        for i, L in enumerate(lines):\n            self.assertEqual(L, line, \"line %d: %r != %r\" % (i, L, line))\n\n    def get_hosts(self):\n        # (dfs.namenode.fs-limits.min-block-size): 4096 < 1048576\n        blocksize = 1048576\n        N = 4\n        content = b\"x\" * blocksize * N\n        path = self._make_random_file(content=content, blocksize=blocksize)\n        start = 0\n        for i in range(N):\n            length = blocksize * i + 1\n            hosts_per_block = self.fs.get_hosts(path, start, length)\n            self.assertEqual(len(hosts_per_block), i + 1)\n\n\ndef suite():\n    suite_ = unittest.TestSuite()\n    suite_.addTest(TestConnection('connect'))\n    suite_.addTest(TestConnection('cache'))\n    tests = common_tests()\n    if DEFAULT_FS.scheme == \"hdfs\":\n        tests.extend([\n            'capacity',\n            'default_block_size',\n            'used',\n            'chown',\n            'utime',\n            'block_size',\n            'replication',\n            'set_replication',\n            'readline_block_boundary',\n            'get_hosts',\n        ])\n    for t in tests:\n        suite_.addTest(TestHDFS(t))\n    return suite_\n\n\nif __name__ == '__main__':\n    _RUNNER = unittest.TextTestRunner(verbosity=2)\n    _RUNNER.run((suite()))\n"
  },
  {
    "path": "test/hdfs/test_local_fs.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport unittest\nimport getpass\nimport tempfile\nimport os\n\nimport pydoop.hdfs as hdfs\nfrom common_hdfs_tests import TestCommon, common_tests\n\n\nclass TestConnection(unittest.TestCase):\n\n    def runTest(self):\n        current_user = getpass.getuser()\n        cwd = os.getcwd()\n        os.chdir(tempfile.gettempdir())\n        for user in None, current_user, \"nobody\":\n            expected_user = current_user\n            fs = hdfs.hdfs(\"\", 0, user=user)\n            self.assertEqual(fs.user, expected_user)\n            fs.close()\n        os.chdir(cwd)\n\n\nclass TestLocalFS(TestCommon):\n\n    def __init__(self, target):\n        TestCommon.__init__(self, target, '', 0)\n\n\ndef suite():\n    suite_ = unittest.TestSuite()\n    suite_.addTest(TestConnection('runTest'))\n    tests = common_tests()\n    for t in tests:\n        suite_.addTest(TestLocalFS(t))\n    return suite_\n\n\nif __name__ == '__main__':\n    _RUNNER = unittest.TextTestRunner(verbosity=2)\n    _RUNNER.run((suite()))\n"
  },
  {
    "path": "test/hdfs/test_path.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport os\nimport unittest\nimport tempfile\nfrom numbers import Number\n\nimport pydoop.hdfs as hdfs\nfrom pydoop.hdfs.common import DEFAULT_PORT, DEFAULT_USER\nfrom pydoop.utils.misc import make_random_str\nfrom pydoop.test_utils import UNI_CHR\n\n\ndef uni_last(tup):\n    return tup[:-1] + (tup[-1] + UNI_CHR,)\n\n\nclass TestSplit(unittest.TestCase):\n\n    def good(self):\n        cases = [\n            ('hdfs://localhost:9000/', ('localhost', 9000, '/')),\n            ('hdfs://localhost:9000/a/b', ('localhost', 9000, '/a/b')),\n            ('hdfs://localhost/a/b', ('localhost', DEFAULT_PORT, '/a/b')),\n            ('hdfs:///a/b', ('default', 0, '/a/b')),\n            ('hdfs:/', ('default', 0, '/')),\n            ('file:///a/b', ('', 0, '/a/b')),\n            ('file:/a/b', ('', 0, '/a/b')),\n            ('file:///a', ('', 0, '/a')),\n            ('file:/a', ('', 0, '/a')),\n            ('file://temp/foo.txt', ('', 0, 'temp/foo.txt')),\n            ('file://temp', ('', 0, 'temp')),\n        ]\n        if hdfs.default_is_local():\n            cases.extend([\n                ('///a/b', ('', 0, '/a/b')),\n                ('/a/b', ('', 0, '/a/b')),\n                ('a/b', ('', 0, 'a/b')),\n            ])\n        else:\n            cases.extend([\n                ('///a/b', ('default', 0, '/a/b')),\n                ('/a/b', ('default', 0, '/a/b')),\n                ('a/b', ('default', 0, '/user/%s/a/b' % DEFAULT_USER)),\n            ])\n        for p, r in cases:\n            self.assertEqual(hdfs.path.split(p), r)\n        for p, r in cases[1:]:\n            self.assertEqual(hdfs.path.split(p + UNI_CHR), uni_last(r))\n\n    def good_with_user(self):\n        if hdfs.default_is_local():\n            cases = [\n                ('a/b', u, ('', 0, 'a/b')) for u in [None, DEFAULT_USER, 'foo']\n            ]\n        else:\n            cases = [\n                ('a/b', None, ('default', 0, '/user/%s/a/b' % DEFAULT_USER)),\n                ('a/b', DEFAULT_USER, (\n                    'default', 0, '/user/%s/a/b' % DEFAULT_USER\n                )),\n                ('a/b', 'foo', ('default', 0, '/user/foo/a/b')),\n            ]\n        for p, u, r in cases:\n            self.assertEqual(hdfs.path.split(p, u), r)\n            self.assertEqual(hdfs.path.split(p + UNI_CHR, u), uni_last(r))\n\n    def bad(self):\n        cases = [\n            '',                                  # not allowed in the Java API\n            'hdfs:',                             # no scheme-specific part\n            'hdfs://',                           # path part is empty\n            'ftp://localhost:9000/',             # bad scheme\n            'hdfs://localhost:spam/',            # port is not an int\n            'hdfs://localhost:9000',             # path part is empty\n            'hdfs://localhost:9000/a:b',         # colon outside netloc\n            '//localhost:9000/a/b',              # null scheme\n        ]\n        if not hdfs.default_is_local():\n            cases.append('/localhost:9000/a/b')  # colon outside netloc\n        for p in cases:\n            self.assertRaises(ValueError, hdfs.path.split, p)\n\n    def splitext(self):\n        for pre in '', 'file:', 'hdfs://host:1':\n            name, ext = '%sfoo' % pre, '.txt'\n            self.assertEqual(hdfs.path.splitext(name + ext), (name, ext))\n        p = 'hdfs://foo.com:1/'\n        self.assertEqual(hdfs.path.splitext(p), (p, ''))\n\n\nclass TestUnparse(unittest.TestCase):\n\n    def good(self):\n        cases = [\n            (('hdfs', 'host:1', '/'), 'hdfs://host:1/'),\n            (('file', '', '/'), 'file:/'),\n            (('hdfs', 'host:1', UNI_CHR), 'hdfs://host:1/%s' % UNI_CHR),\n            (('file', '', UNI_CHR), 'file:/%s' % UNI_CHR),\n            (('', '', UNI_CHR), UNI_CHR),\n        ]\n        for (scheme, netloc, path), exp_uri in cases:\n            self.assertEqual(hdfs.path.unparse(scheme, netloc, path), exp_uri)\n\n    def bad(self):\n        self.assertRaises(ValueError, hdfs.path.unparse, '', 'host:1', '/a')\n\n\nclass TestJoin(unittest.TestCase):\n\n    def __check_join(self, cases):\n        for p, r in cases:\n            self.assertEqual(hdfs.path.join(*p), r)\n\n    def simple(self):\n        self.__check_join([\n            (('foo', 'bar', 'tar'), 'foo/bar/tar'),\n            (('/foo', 'bar', 'tar'), '/foo/bar/tar'),\n        ])\n\n    def slashes(self):\n        self.__check_join([\n            (('foo/', 'bar/', 'tar'), 'foo/bar/tar'),\n            (('/foo/', 'bar/', 'tar'), '/foo/bar/tar'),\n        ])\n\n    def absolute(self):\n        self.__check_join([\n            (('foo', '/bar', 'tar'), '/bar/tar'),\n            (('foo', 'hdfs://host:1/bar', 'tar'), 'hdfs://host:1/bar/tar'),\n            (('foo', 'file:/bar', 'tar'), 'file:/bar/tar'),\n            (('foo', 'file:///bar', 'tar'), 'file:///bar/tar'),\n        ])\n\n    def full(self):\n        self.__check_join([\n            (('hdfs://host:1/', '/foo'), 'hdfs://host:1/foo'),\n            (('hdfs://host:1/', 'file:/foo', '/bar'), 'file:/foo/bar'),\n            (('foo', '/bar', 'hdfs://host:1/tar'), 'hdfs://host:1/tar'),\n        ])\n\n    def unicode_(self):\n        self.__check_join(\n            [(('/foo', 'bar', UNI_CHR), '/foo/bar/%s' % UNI_CHR)]\n        )\n\n\nclass TestAbspath(unittest.TestCase):\n\n    def setUp(self):\n        if hdfs.default_is_local():\n            self.root = \"file:\"\n        else:\n            fs = hdfs.hdfs(\"default\", 0)\n            self.root = \"hdfs://%s:%s\" % (fs.host, fs.port)\n            fs.close()\n        self.p = 'a/%s' % UNI_CHR\n\n    def without_user(self):\n        abs_p = hdfs.path.abspath(self.p, user=None, local=False)\n        if hdfs.default_is_local():\n            self.assertEqual(\n                abs_p, '%s%s' % (self.root, os.path.abspath(self.p))\n            )\n        else:\n            self.assertEqual(\n                abs_p, '%s/user/%s/%s' % (self.root, DEFAULT_USER, self.p)\n            )\n\n    def with_user(self):\n        abs_p = hdfs.path.abspath(self.p, user=\"pydoop\", local=False)\n        if hdfs.default_is_local():\n            self.assertEqual(\n                abs_p, '%s%s' % (self.root, os.path.abspath(self.p))\n            )\n        else:\n            self.assertEqual(abs_p, '%s/user/pydoop/%s' % (self.root, self.p))\n\n    def forced_local(self):\n        for user in None, \"pydoop\":\n            abs_p = hdfs.path.abspath(self.p, user=user, local=True)\n            self.assertEqual(abs_p, 'file:%s' % os.path.abspath(self.p))\n\n    def already_absolute(self):\n        for p in ('file:/a/%s' % UNI_CHR,\n                  'hdfs://localhost:9000/a/%s' % UNI_CHR):\n            for user in None, \"pydoop\":\n                abs_p = hdfs.path.abspath(p, user=user, local=False)\n                self.assertEqual(abs_p, p)\n                abs_p = hdfs.path.abspath(p, user=user, local=True)\n                self.assertEqual(abs_p, 'file:%s' % os.path.abspath(p))\n\n\nclass TestSplitBasenameDirname(unittest.TestCase):\n\n    def runTest(self):\n        cases = [  # path, expected dirname, expected basename\n            (\"hdfs://host:1/a/%s\" % UNI_CHR, \"hdfs://host:1/a\", UNI_CHR),\n            (\"hdfs://host:1/\", \"hdfs://host:1/\", \"\"),\n            (\"hdfs:/\", \"hdfs:/\", \"\"),\n            (\"file:/\", \"file:/\", \"\"),\n            (\"a/%s\" % UNI_CHR, \"a\", UNI_CHR),\n            (\"/a/%s\" % UNI_CHR, \"/a\", UNI_CHR),\n            (UNI_CHR, \"\", UNI_CHR),\n            ('/%s' % UNI_CHR, \"/\", UNI_CHR),\n            ('', '', ''),\n        ]\n        for p, d, bn in cases:\n            self.assertEqual(hdfs.path.dirname(p), d)\n            self.assertEqual(hdfs.path.basename(p), bn)\n            self.assertEqual(hdfs.path.splitpath(p), (d, bn))\n\n\nclass TestExists(unittest.TestCase):\n\n    def good(self):\n        base_path = make_random_str()\n        for path in base_path, base_path + UNI_CHR:\n            hdfs.dump(\"foo\\n\", path)\n            self.assertTrue(hdfs.path.exists(path))\n            hdfs.rm(path)\n            self.assertFalse(hdfs.path.exists(path))\n\n\nclass TestKind(unittest.TestCase):\n\n    def setUp(self):\n        self.path = make_random_str()\n        self.u_path = self.path + UNI_CHR\n\n    def test_kind(self):\n        for path in self.path, self.u_path:\n            self.assertTrue(hdfs.path.kind(path) is None)\n            try:\n                hdfs.dump(\"foo\\n\", path)\n                self.assertEqual('file', hdfs.path.kind(path))\n                hdfs.rm(path)\n                hdfs.mkdir(path)\n                self.assertEqual('directory', hdfs.path.kind(path))\n            finally:\n                try:\n                    hdfs.rm(path)\n                except IOError:\n                    pass\n\n    def test_isfile(self):\n        for path in self.path, self.u_path:\n            self.assertFalse(hdfs.path.isfile(path))\n            try:\n                hdfs.dump(\"foo\\n\", path)\n                self.assertTrue(hdfs.path.isfile(path))\n                hdfs.rm(path)\n                hdfs.mkdir(path)\n                self.assertFalse(hdfs.path.isfile(path))\n            finally:\n                try:\n                    hdfs.rm(path)\n                except IOError:\n                    pass\n\n    def test_isdir(self):\n        for path in self.path, self.u_path:\n            self.assertFalse(hdfs.path.isdir(path))\n            try:\n                hdfs.dump(\"foo\\n\", path)\n                self.assertFalse(hdfs.path.isdir(path))\n                hdfs.rm(path)\n                hdfs.mkdir(path)\n                self.assertTrue(hdfs.path.isdir(path))\n            finally:\n                try:\n                    hdfs.rm(path)\n                except IOError:\n                    pass\n\n\nclass TestExpand(unittest.TestCase):\n\n    def expanduser(self):\n        for pre in '~', '~%s' % DEFAULT_USER:\n            for rest in '', '/d':\n                p = '%s%s' % (pre, rest)\n                if hdfs.default_is_local():\n                    self.assertEqual(\n                        hdfs.path.expanduser(p), os.path.expanduser(p)\n                    )\n                else:\n                    exp_res = '/user/%s%s' % (DEFAULT_USER, rest)\n                    self.assertEqual(hdfs.path.expanduser(p), exp_res)\n\n    def expanduser_no_expansion(self):\n        for pre in ('hdfs://host:1', 'file://', ''):\n            for rest in ('/~', '/~foo', '/d/~', '/d/~foo'):\n                p = '%s%s' % (pre, rest)\n                self.assertEqual(hdfs.path.expanduser(p), p)\n\n    def expandvars(self):\n        k, v = 'PYDOOP_TEST_K', 'PYDOOP_TEST_V'\n        p = 'hdfs://host:1/${%s}' % k\n        os.environ[k] = v\n        exp_res = '%s/%s' % (p.rsplit('/', 1)[0], v)\n        try:\n            self.assertEqual(hdfs.path.expandvars(p), exp_res)\n        finally:\n            del os.environ[k]\n\n\nclass TestStat(unittest.TestCase):\n\n    NMAP = {\n        'st_mode': 'permissions',\n        'st_uid': 'owner',\n        'st_gid': 'group',\n        'st_size': 'size',\n        'st_atime': 'last_access',\n        'st_mtime': 'last_mod',\n        'st_blksize': 'block_size',\n    }\n\n    def stat(self):\n        if hdfs.default_is_local():\n            return\n        bn = '%s%s' % (make_random_str(), UNI_CHR)\n        fn = '/user/%s/%s' % (DEFAULT_USER, bn)\n        fs = hdfs.hdfs(\"default\", 0)\n        p = \"hdfs://%s:%s%s\" % (fs.host, fs.port, fn)\n        with fs.open_file(fn, 'wt') as fo:\n            fo.write(make_random_str())\n        info = fs.get_path_info(fn)\n        fs.close()\n        s = hdfs.path.stat(p)\n        for n1, n2 in self.NMAP.items():\n            attr = getattr(s, n1, None)\n            self.assertFalse(attr is None)\n            self.assertEqual(attr, info[n2])\n        self.__check_extra_args(s, info)\n        self.__check_wrapper_funcs(p)\n        hdfs.rm(p)\n\n    def stat_on_local(self):\n        wd_ = tempfile.mkdtemp(prefix='pydoop_', suffix=UNI_CHR)\n        p_ = os.path.join(wd_, make_random_str())\n        if hdfs.default_is_local():\n            wd, p = wd_, p_\n            host = \"default\"\n        else:\n            wd, p = ('file:%s' % _ for _ in (wd_, p_))\n            host = \"\"\n        fs = hdfs.hdfs(host, 0)\n        with fs.open_file(p_, 'w') as fo:\n            fo.write(b\"foobar\\n\")\n        info = fs.get_path_info(p_)\n        fs.close()\n        s = hdfs.path.stat(p)\n        os_s = os.stat(p_)\n        for n in dir(s):\n            if n.startswith('st_'):\n                try:\n                    exp_v = getattr(os_s, n)\n                except AttributeError:\n                    try:\n                        exp_v = info[self.NMAP[n]]\n                    except KeyError:\n                        continue\n                    self.assertEqual(getattr(s, n), exp_v)\n        self.__check_extra_args(s, info)\n        self.__check_wrapper_funcs(p)\n        hdfs.rm(wd)\n\n    def stat_on_dir(self):\n        if hdfs.default_is_local():\n            wd = tempfile.mkdtemp(prefix='pydoop_', suffix=UNI_CHR)\n        else:\n            wd = make_random_str() + UNI_CHR\n            hdfs.mkdir(wd)\n        s = hdfs.path.stat(wd)\n        if hdfs.default_is_local():\n            os_s = os.stat(wd)\n            for name in 'st_size', 'st_blksize', 'st_blocks':\n                self.assertEqual(getattr(s, name), getattr(os_s, name))\n        else:\n            for attr in s.st_size, s.st_blksize, s.st_blocks:\n                self.assertEqual(attr, 0)\n        hdfs.rm(wd)\n\n    def __check_extra_args(self, stat_res, path_info):\n        for n in 'kind', 'name', 'replication':\n            attr = getattr(stat_res, '%s' % n, None)\n            self.assertFalse(attr is None)\n            self.assertEqual(attr, path_info[n])\n\n    def __check_wrapper_funcs(self, path):\n        for n in 'getatime', 'getmtime', 'getctime', 'getsize':\n            func = getattr(hdfs.path, n)\n            self.assertTrue(isinstance(func(path), Number))\n\n\nclass TestIsSomething(unittest.TestCase):\n\n    def full_and_abs(self):\n        for name in 'isfull', 'isabs':\n            test = getattr(hdfs.path, name)\n            for p in 'hdfs://host:1/foo', 'file:/foo':\n                self.assertTrue(test(p))\n            self.assertFalse(test('foo'))\n        self.assertFalse(hdfs.path.isfull('/foo'))\n        self.assertTrue(hdfs.path.isabs('/foo'))\n\n    def islink(self):\n        wd_ = tempfile.mkdtemp(prefix='pydoop_', suffix=UNI_CHR)\n        wd = 'file:%s' % wd_\n        self.assertFalse(hdfs.path.islink(wd))\n        link = os.path.join(wd_, make_random_str())\n        os.symlink(wd_, link)\n        self.assertTrue(hdfs.path.islink('file:%s' % link))\n        hdfs.rm(wd)\n\n    def ismount(self):\n        self.assertFalse(hdfs.path.ismount('hdfs://host:1/foo'))\n\n\nclass TestNorm(unittest.TestCase):\n\n    def normpath(self):\n        for pre in '', 'file:', 'hdfs://host:1':\n            post = '/a/./b/c/../../foo'\n            npost = '/a/foo'\n            self.assertEqual(hdfs.path.normpath(pre + post), pre + npost)\n            self.assertEqual(hdfs.path.normpath('a/./b/c/../../foo'), 'a/foo')\n\n\nclass TestReal(unittest.TestCase):\n\n    def realpath(self):\n        wd_ = tempfile.mkdtemp(prefix='pydoop_', suffix=UNI_CHR)\n        wd = 'file:%s' % wd_\n        link = os.path.join(wd_, make_random_str())\n        os.symlink(wd_, link)\n        expected_path = 'file:%s' % os.path.realpath(wd_)\n        self.assertEqual(hdfs.path.realpath('file:%s' % link), expected_path)\n        hdfs.rm(wd)\n\n\nclass TestSame(unittest.TestCase):\n\n    def samefile_link(self):\n        wd_ = tempfile.mkdtemp(prefix='pydoop_', suffix=UNI_CHR)\n        wd = 'file:%s' % wd_\n        link = os.path.join(wd_, make_random_str())\n        os.symlink(wd_, link)\n        self.assertTrue(hdfs.path.samefile('file:%s' % link, 'file:%s' % wd_))\n        hdfs.rm(wd)\n\n    def samefile_rel(self):\n        p = make_random_str() + UNI_CHR\n        hdfs.dump(\"foo\\n\", p)\n        self.assertTrue(hdfs.path.samefile(p, hdfs.path.abspath(p)))\n        hdfs.rm(p)\n\n    def samefile_norm(self):\n        for pre in '', 'file:/', 'hdfs://host:1/':\n            self.assertTrue(hdfs.path.samefile(pre + 'a/b/../c', pre + 'a/c'))\n\n    def samefile_user(self):\n        if not hdfs.default_is_local():\n            self.assertTrue(hdfs.path.samefile('fn', '/user/u/fn', user='u'))\n\n\nclass TestAccess(unittest.TestCase):\n\n    def setUp(self):\n        self.path = make_random_str() + UNI_CHR\n        hdfs.mkdir(self.path)\n\n    def tearDown(self):\n        hdfs.rm(self.path)\n\n    # FIXME: far from exhaustive.  This is a slow test\n    def __test(self, offset, user=None):\n        for mode in os.R_OK, os.W_OK, os.X_OK:\n            hdfs.chmod(self.path, mode << offset)\n            print(' * mode now: %03o' % hdfs.path.stat(self.path).st_mode)\n            self.assertTrue(hdfs.path.access(self.path, mode, user=user))\n\n    def test_owner(self):\n        self.__test(6)\n\n    def test_other(self):\n        self.__test(0, user=make_random_str())\n\n\nclass TestUtime(unittest.TestCase):\n\n    def runTest(self):\n        path = make_random_str() + UNI_CHR\n        hdfs.dump(\"foo\\n\", path)\n        st = hdfs.path.stat(path)\n        atime, mtime = [getattr(st, 'st_%stime' % _) for _ in 'am']\n        new_atime, new_mtime = atime + 100, mtime + 200\n        hdfs.path.utime(path, (new_atime, new_mtime))\n        st = hdfs.path.stat(path)\n        self.assertEqual(int(st.st_atime), int(new_atime))\n        self.assertEqual(int(st.st_mtime), int(new_mtime))\n        hdfs.rm(path)\n\n\nclass TestCallFromHdfs(unittest.TestCase):\n\n    def setUp(self):\n        self.path = make_random_str() + UNI_CHR\n        hdfs.dump(\"foo\\n\", self.path)\n\n    def tearDown(self):\n        hdfs.rm(self.path)\n\n    def test_stat(self):\n        for name in 'stat', 'lstat':\n            self.assertTrue(hasattr(hdfs, name))\n            func = getattr(hdfs, name)\n            func(self.path)\n            func(self.path, user=DEFAULT_USER)\n\n    def test_access(self):\n        self.assertTrue(hasattr(hdfs, 'access'))\n        hdfs.access(self.path, os.F_OK)\n        hdfs.access(self.path, os.F_OK, user=DEFAULT_USER)\n\n    def test_utime(self):\n        self.assertTrue(hasattr(hdfs, 'utime'))\n        hdfs.utime(self.path)\n        hdfs.utime(self.path, times=(1e9, 1e9))\n        hdfs.utime(self.path, times=(1e9, 1e9), user=DEFAULT_USER)\n\n\ndef suite():\n    suite_ = unittest.TestSuite()\n    suite_.addTest(TestSplit('good'))\n    suite_.addTest(TestSplit('good_with_user'))\n    suite_.addTest(TestSplit('bad'))\n    suite_.addTest(TestSplit('splitext'))\n    suite_.addTest(TestUnparse('good'))\n    suite_.addTest(TestUnparse('bad'))\n    suite_.addTest(TestJoin('simple'))\n    suite_.addTest(TestJoin('slashes'))\n    suite_.addTest(TestJoin('absolute'))\n    suite_.addTest(TestJoin('full'))\n    suite_.addTest(TestJoin('unicode_'))\n    suite_.addTest(TestAbspath('with_user'))\n    suite_.addTest(TestAbspath('without_user'))\n    suite_.addTest(TestAbspath('forced_local'))\n    suite_.addTest(TestAbspath('already_absolute'))\n    suite_.addTest(TestSplitBasenameDirname('runTest'))\n    suite_.addTest(TestExists('good'))\n    suite_.addTest(TestExpand('expanduser'))\n    suite_.addTest(TestExpand('expanduser_no_expansion'))\n    suite_.addTest(TestExpand('expandvars'))\n    suite_.addTest(TestStat('stat'))\n    suite_.addTest(TestStat('stat_on_local'))\n    suite_.addTest(TestStat('stat_on_dir'))\n    suite_.addTest(TestIsSomething('full_and_abs'))\n    suite_.addTest(TestIsSomething('islink'))\n    suite_.addTest(TestIsSomething('ismount'))\n    suite_.addTest(TestNorm('normpath'))\n    suite_.addTest(TestReal('realpath'))\n    suite_.addTest(TestSame('samefile_link'))\n    suite_.addTest(TestSame('samefile_rel'))\n    suite_.addTest(TestSame('samefile_norm'))\n    suite_.addTest(TestSame('samefile_user'))\n    suite_.addTest(TestAccess('test_owner'))\n    suite_.addTest(TestAccess('test_other'))\n    suite_.addTest(TestUtime('runTest'))\n    suite_.addTest(TestCallFromHdfs('test_stat'))\n    suite_.addTest(TestCallFromHdfs('test_access'))\n    suite_.addTest(TestCallFromHdfs('test_utime'))\n    suite_.addTest(unittest.TestLoader().loadTestsFromTestCase(TestKind))\n    return suite_\n\n\nif __name__ == '__main__':\n    _RUNNER = unittest.TextTestRunner(verbosity=2)\n    _RUNNER.run((suite()))\n"
  },
  {
    "path": "test/hdfs/try_hdfs.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\n\"\"\"\nCheck that resetting the hdfs module after changing\nos.environ['HADOOP_CONF_DIR'] works (i.e., Pydoop references the\ncorrect HDFS service).\n\nNote that it does **NOT** work if you've already instantiated an hdfs\nhandle, and this is NOT due to the caching system.\n\"\"\"\n\nfrom __future__ import print_function\nimport sys\nimport os\nimport argparse\n\nimport pydoop.hdfs as hdfs\n\n\ndef dump_status(fs):\n    print(\"(host, port, user) = %r\" % ((fs.host, fs.port, fs.user),))\n    print(\"_CACHE = %r\" % (fs._CACHE,))\n    print(\"_ALIASES = %r\" % (fs._ALIASES,))\n    print()\n\n\ndef main(argv=sys.argv[1:]):\n    parser = argparse.ArgumentParser(description=__doc__)\n    parser.add_argument(\"--conf-dir\", metavar=\"HADOOP_CONF_DIR\")\n    args = parser.parse_args(argv)\n    if args.conf_dir:\n        os.environ[\"HADOOP_CONF_DIR\"] = os.path.abspath(args.conf_dir)\n        hdfs.reset()\n    fs = hdfs.hdfs()\n    print(\"--- OPEN ---\")\n    dump_status(fs)\n    print(\"cwd:\", fs.working_directory())\n    print\n    fs.close()\n    print(\"--- CLOSED ---\")\n    dump_status(fs)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "test/mapreduce/__init__.py",
    "content": ""
  },
  {
    "path": "test/mapreduce/all_tests.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport unittest\nfrom pydoop.test_utils import get_module\n\n\nTEST_MODULE_NAMES = [\n    'test_connections',\n    'test_opaque',\n]\n\n\ndef suite(path=None):\n    suites = []\n    for module in TEST_MODULE_NAMES:\n        suites.append(get_module(module, path).suite())\n    return unittest.TestSuite(suites)\n\n\nif __name__ == '__main__':\n    import sys\n    _RESULT = unittest.TextTestRunner(verbosity=2).run(suite())\n    sys.exit(not _RESULT.wasSuccessful())\n"
  },
  {
    "path": "test/mapreduce/it/crs4/pydoop/mapreduce/pipes/OpaqueRoundtrip.java",
    "content": "/* BEGIN_COPYRIGHT\n *\n * Copyright 2009-2026 CRS4.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n * use this file except in compliance with the License. You may obtain a copy\n * of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n * License for the specific language governing permissions and limitations\n * under the License.\n *\n * END_COPYRIGHT\n */\n\npackage it.crs4.pydoop.mapreduce.pipes;\n\nimport java.io.IOException;\nimport java.util.List;\nimport java.util.Properties;\n\nimport org.apache.hadoop.fs.FSDataOutputStream;\nimport org.apache.hadoop.fs.FileSystem;\nimport org.apache.hadoop.fs.Path;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.Job;\nimport org.apache.hadoop.mapreduce.JobID;\nimport org.apache.hadoop.mapreduce.TaskAttemptID;\nimport org.apache.hadoop.mapreduce.TaskID;\nimport org.apache.hadoop.mapreduce.TaskType;\nimport org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;\nimport org.apache.hadoop.io.IntWritable;\nimport org.apache.hadoop.conf.Configuration;\n\n\n/**\n * Use PipesNonJavaInputFormat.getSplits to read opaque splits from inUri,\n * then write them out to outUri.\n */\n\npublic class OpaqueRoundtrip {\n\n  public static void main(String[] args)\n      throws IOException, InterruptedException {\n    final String inUri = args[0];\n    final String outUri = args[1];\n    JobID jobId = new JobID(\"201408272347\", 0);\n    TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);\n    TaskAttemptID taID = new TaskAttemptID(taskId, 0);\n    Job job = Job.getInstance(new Configuration());\n    job.setJobID(jobId);\n    Properties props = Submitter.getPydoopProperties();\n    Configuration conf = job.getConfiguration();\n    conf.set(props.getProperty(\"PIPES_EXTERNALSPLITS_URI\"), inUri);\n    TaskAttemptContextImpl ctx = new TaskAttemptContextImpl(conf, taID);\n    PipesNonJavaInputFormat iformat = new PipesNonJavaInputFormat();\n    List<InputSplit> splits = iformat.getSplits(ctx);\n    Path path = new Path(outUri);\n    FileSystem fs = FileSystem.get(conf);\n    IntWritable numRecords = new IntWritable(splits.size());\n    FSDataOutputStream out = fs.create(path);\n    try {\n      numRecords.write(out);\n      for(int i = 0; i < numRecords.get(); i++) {\n        ((OpaqueSplit)splits.get(i)).write(out);\n      }\n    } finally {\n      out.close();\n    }\n    fs.close();\n  }\n\n}\n"
  },
  {
    "path": "test/mapreduce/test_connections.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport io\nimport os\nimport unittest\n\nimport pydoop.mapreduce.api as api\nimport pydoop.mapreduce.binary_protocol as bp\nimport pydoop.mapreduce.pipes as pipes\nimport pydoop.sercore as sercore\nfrom pydoop.test_utils import WDTestCase\n\nTHIS_DIR = os.path.dirname(os.path.abspath(__file__))\nM_NAME, R_NAME = \"m_task.cmd\", \"r_task.cmd\"\n\n\nclass Mapper(api.Mapper):\n\n    def map(self, context):\n        context.emit(context.key, context.value)\n\n\nclass Reducer(api.Reducer):\n\n    def reduce(self, context):\n        context.emit(context.key, sum(context.values))\n\n\n# move to test_utils?\nclass UplinkDumpReader(object):\n\n    def __init__(self, stream):\n        self.stream = stream\n\n    def close(self):\n        self.stream.close()\n\n    def __next__(self):\n        cmd = self.stream.read_vint()\n        if cmd == bp.AUTHENTICATION_RESP:\n            return cmd, self.stream.read_tuple(\"b\")\n        elif cmd == bp.OUTPUT:\n            return cmd, self.stream.read_tuple(\"bb\")\n        elif cmd == bp.PARTITIONED_OUTPUT:\n            return cmd, self.stream.write_tuple(\"ibb\")\n        elif cmd == bp.STATUS:\n            return cmd, self.stream.read_tuple(\"s\")\n        elif cmd == bp.PROGRESS:\n            return cmd, self.stream.read_tuple(\"f\")\n        elif cmd == bp.REGISTER_COUNTER:\n            return cmd, self.stream.read_tuple(\"iss\")\n        elif cmd == bp.INCREMENT_COUNTER:\n            return cmd, self.stream.read_tuple(\"il\")\n        elif cmd == bp.DONE:\n            raise StopIteration\n        else:\n            raise RuntimeError(\"unknown command: %d\" % cmd)\n\n    def __iter__(self):\n        return self\n\n    # py2 compat\n    def next(self):\n        return self.__next__()\n\n\nclass TestFileConnection(WDTestCase):\n\n    def test_map(self):\n        factory = pipes.Factory(Mapper)\n        self.__run_test(M_NAME, factory, private_encoding=False)\n\n    def test_reduce(self):\n        factory = pipes.Factory(Mapper, reducer_class=Reducer)\n        self.__run_test(R_NAME, factory)\n\n    def __run_test(self, name, factory, **kwargs):\n        orig_path = os.path.join(THIS_DIR, name)\n        cmd_path = os.path.join(self.wd, name)\n        with io.open(orig_path, \"rb\") as fi, io.open(cmd_path, \"wb\") as fo:\n            fo.write(fi.read())\n        os.environ[\"mapreduce.pipes.commandfile\"] = cmd_path\n        pipes.run_task(factory, **kwargs)\n        out_cmd_path = \"%s.out\" % cmd_path\n        self.assertTrue(os.path.exists(out_cmd_path))\n        with sercore.FileInStream(out_cmd_path) as stream:\n            out_cmds = set(cmd for cmd, _ in UplinkDumpReader(stream))\n        self.assertEqual(out_cmds, {bp.OUTPUT, bp.PROGRESS})\n\n\ndef suite():\n    suite_ = unittest.TestSuite()\n    suite_.addTest(TestFileConnection('test_map'))\n    suite_.addTest(TestFileConnection('test_reduce'))\n    return suite_\n\n\nif __name__ == '__main__':\n    _RUNNER = unittest.TextTestRunner(verbosity=2)\n    _RUNNER.run((suite()))\n"
  },
  {
    "path": "test/mapreduce/test_opaque.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport unittest\nimport os\nimport shutil\nimport uuid\n\nimport pydoop\nfrom pydoop.hdfs import hdfs\nfrom pydoop.mapreduce.pipes import (\n    OpaqueSplit, write_opaque_splits, read_opaque_splits\n)\n\nimport pydoop.test_utils as utils\n\n_JAVA_SRC_ROOT = 'it'\n_OPAQUE_ROUNDTRIP_CLASS = 'it.crs4.pydoop.mapreduce.pipes.OpaqueRoundtrip'\n_OPAQUE_ROUNDTRIP_SRC = 'it/crs4/pydoop/mapreduce/pipes/OpaqueRoundtrip.java'\n\n\nclass TestOpaqueSplit(unittest.TestCase):\n\n    def setUp(self):\n        self.fs = hdfs()\n        self.wd = utils.make_wd(self.fs)\n\n    def tearDown(self):\n        self.fs.delete(self.wd)\n        self.fs.close()\n\n    def _make_random_path(self, where=None):\n        return \"%s/%s_%s\" % (where or self.wd, uuid.uuid4().hex, utils.UNI_CHR)\n\n    def _generate_opaque_splits(self, n):\n        return [OpaqueSplit('{}_payload'.format(_)) for _ in range(n)]\n\n    def _test_opaque(self, o, no):\n        self.assertEqual(o.payload, no.payload)\n\n    def _test_opaques(self, opaques, nopaques):\n        self.assertEqual(len(opaques), len(nopaques))\n        for o, no in zip(opaques, nopaques):\n            self._test_opaque(o, no)\n\n    def _run_java(self, in_uri, out_uri, wd):\n        this_directory = os.path.abspath(os.path.dirname(__file__))\n        shutil.copytree(os.path.join(this_directory, _JAVA_SRC_ROOT),\n                        os.path.join(wd, _JAVA_SRC_ROOT))\n        classpath = '.:%s:%s:%s' % (\n            wd, pydoop.jar_path(), pydoop.hadoop_classpath())\n        src = os.path.join(wd, _OPAQUE_ROUNDTRIP_SRC)\n        utils.compile_java(src, classpath)\n        utils.run_java(\n            _OPAQUE_ROUNDTRIP_CLASS, classpath, [in_uri, out_uri], wd)\n\n    def _do_java_roundtrip(self, splits, wd='/tmp'):\n        in_uri = self._make_random_path()\n        out_uri = self._make_random_path()\n        with self.fs.open_file(in_uri, 'wb') as f:\n            write_opaque_splits(splits, f)\n        self._run_java(in_uri, out_uri, wd)\n        with self.fs.open_file(out_uri, 'rb') as f:\n            nsplits = read_opaque_splits(f)\n        return nsplits\n\n    def test_opaque(self):\n        payload = {'a': 33, 'b': \"333\"}\n        o = OpaqueSplit(payload)\n        self.assertEqual(payload, o.payload)\n        fname = self._make_random_path('/tmp')\n        with open(fname, 'wb') as f:\n            o.write(f)\n        with open(fname, 'rb') as f:\n            no = OpaqueSplit.read(f)\n        self._test_opaque(o, no)\n        os.unlink(fname)\n\n    def test_write_read_opaque_splits(self):\n        n = 10\n        opaques = self._generate_opaque_splits(n)\n        fname = self._make_random_path('/tmp')\n        with open(fname, 'wb') as f:\n            write_opaque_splits(opaques, f)\n        with open(fname, 'rb') as f:\n            nopaques = read_opaque_splits(f)\n        self._test_opaques(opaques, nopaques)\n        os.unlink(fname)\n\n    def test_opaque_java_round_trip(self):\n        n = 10\n        splits = self._generate_opaque_splits(n)\n        dname = self._make_random_path('/tmp')\n        os.mkdir(dname)\n        nsplits = self._do_java_roundtrip(splits, wd=dname)\n        shutil.rmtree(dname)\n        self._test_opaques(splits, nsplits)\n\n\ndef suite():\n    return unittest.TestLoader().loadTestsFromTestCase(TestOpaqueSplit)\n\n\nif __name__ == '__main__':\n    _RUNNER = unittest.TextTestRunner(verbosity=2)\n    _RUNNER.run((suite()))\n"
  },
  {
    "path": "test/sercore/all_tests.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport unittest\nfrom pydoop.test_utils import get_module\n\n\nTEST_MODULE_NAMES = [\n    'test_deser',\n    'test_streams',\n]\n\n\ndef suite(path=None):\n    suites = []\n    for module in TEST_MODULE_NAMES:\n        suites.append(get_module(module, path).suite())\n    return unittest.TestSuite(suites)\n\n\nif __name__ == '__main__':\n    import sys\n    _RESULT = unittest.TextTestRunner(verbosity=2).run(suite())\n    sys.exit(not _RESULT.wasSuccessful())\n"
  },
  {
    "path": "test/sercore/test_deser.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport os\nimport shutil\nimport struct\nimport tempfile\nimport unittest\n\nimport pydoop.sercore as sercore\n\n\nclass TestFileSplit(unittest.TestCase):\n\n    def setUp(self):\n        work_dir = tempfile.mkdtemp(prefix=\"pydoop_\")\n        work_path = os.path.join(work_dir, \"foo\")\n        self.filename, self.offset, self.length = \"foobar\", 0, 100\n        with sercore.FileOutStream(work_path) as s:\n            s.write_string(self.filename)\n            s.write(struct.pack(\">q\", self.offset))\n            s.write(struct.pack(\">q\", self.length))\n        size = os.stat(work_path).st_size\n        with sercore.FileInStream(work_path) as s:\n            self.raw_split = s.read(size)\n        shutil.rmtree(work_dir)\n\n    def test_standard(self):\n        t = sercore.deserialize_file_split(self.raw_split)\n        self.assertEqual(len(t), 3)\n        self.assertEqual(t[0], self.filename)\n        self.assertEqual(t[1], self.offset)\n        self.assertEqual(t[2], self.length)\n\n    def test_errors(self):\n        with self.assertRaises(IOError):\n            sercore.deserialize_file_split(self.raw_split[:-1])\n\n\nCASES = [\n    TestFileSplit,\n]\n\n\ndef suite():\n    ret = unittest.TestSuite()\n    test_loader = unittest.TestLoader()\n    for c in CASES:\n        ret.addTest(test_loader.loadTestsFromTestCase(c))\n    return ret\n\n\nif __name__ == '__main__':\n    unittest.TextTestRunner(verbosity=2).run((suite()))\n"
  },
  {
    "path": "test/sercore/test_streams.py",
    "content": "# BEGIN_COPYRIGHT\n#\n# Copyright 2009-2026 CRS4.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\"); you may not\n# use this file except in compliance with the License. You may obtain a copy\n# of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT\n# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the\n# License for the specific language governing permissions and limitations\n# under the License.\n#\n# END_COPYRIGHT\n\nimport io\nimport os\nimport shutil\nimport struct\nimport tempfile\nimport unittest\nimport uuid\nfrom random import randint\n\nfrom pydoop.mapreduce.binary_protocol import OUTPUT, PARTITIONED_OUTPUT\nimport pydoop.sercore as sercore\n\nINT64_MIN = -2**63\nINT64_MAX = 2**63 - 1\n\n# TODO: from pydoop.test_utils import UNI_CHR\nUNI_CHR = u'\\N{CYRILLIC CAPITAL LETTER O WITH DIAERESIS}'\n\n\nclass TestFileInStream(unittest.TestCase):\n\n    def setUp(self):\n        with io.open(__file__, \"rb\") as f:\n            self.data = f.read()\n\n    def test_from_path(self):\n        with sercore.FileInStream(__file__) as s:\n            self.__check_stream(s)\n\n    def test_from_file(self):\n        with io.open(__file__, \"rb\") as f:\n            with sercore.FileInStream(f) as s:\n                self.__check_stream(s)\n\n    def test_errors(self):\n        with self.assertRaises(IOError):\n            sercore.FileInStream(uuid.uuid4().hex)\n        with sercore.FileInStream(__file__) as s:\n            s.skip(len(self.data))\n            with self.assertRaises(IOError):\n                s.read(1)\n\n    def __check_stream(self, s):\n        self.assertEqual(s.read(10), self.data[:10])\n        s.skip(20)\n        self.assertEqual(s.read(20), self.data[30:50])\n\n\nclass TestFileOutStream(unittest.TestCase):\n\n    def setUp(self):\n        self.wd = tempfile.mkdtemp(prefix=\"pydoop_\")\n        self.fname = os.path.join(self.wd, \"foo\")\n        self.data = b\"abcdefgh\"\n\n    def tearDown(self):\n        shutil.rmtree(self.wd)\n\n    def test_from_path(self):\n        with sercore.FileOutStream(self.fname) as s:\n            self.__fill_stream(s)\n        self.__check_stream()\n\n    def test_from_file(self):\n        with io.open(self.fname, \"wb\") as f:\n            with sercore.FileOutStream(f) as s:\n                self.__fill_stream(s)\n        self.__check_stream()\n\n    def test_errors(self):\n        with self.assertRaises(IOError):\n            sercore.FileOutStream(os.path.join(uuid.uuid4().hex, \"foo\"))\n\n    def __fill_stream(self, s):\n        s.write(self.data)\n        s.flush()\n        s.advance(10)\n        s.write(self.data)\n\n    def __check_stream(self):\n        with io.open(self.fname, \"rb\") as f:\n            self.assertEqual(f.read(), self.data + 10 * b'\\x00' + self.data)\n\n\nclass TestSerDe(unittest.TestCase):\n\n    INT = 42\n    LONG = INT64_MAX\n    FLOAT = 3.14\n    STRING = u'BO' + UNI_CHR\n    BYTES = b'a\\x00b'  # bytes r/w methods MUST preserve null characters\n    TUPLE = INT, LONG, FLOAT, STRING, BYTES\n\n    def setUp(self):\n        self.wd = tempfile.mkdtemp(prefix=\"pydoop_\")\n        self.fname = os.path.join(self.wd, \"foo\")\n\n    def tearDown(self):\n        shutil.rmtree(self.wd)\n\n    def test_vint(self):\n        with sercore.FileOutStream(self.fname) as s:\n            s.write_vint(self.INT)\n        with sercore.FileInStream(self.fname) as s:\n            self.assertEqual(s.read_vint(), self.INT)\n\n    def test_vlong(self):\n        with sercore.FileOutStream(self.fname) as s:\n            s.write_vlong(self.LONG)\n        with sercore.FileInStream(self.fname) as s:\n            self.assertEqual(s.read_vlong(), self.LONG)\n\n    def test_float(self):\n        with sercore.FileOutStream(self.fname) as s:\n            s.write_float(self.FLOAT)\n        with sercore.FileInStream(self.fname) as s:\n            self.assertAlmostEqual(s.read_float(), self.FLOAT, 3)\n\n    def test_string_as_string(self):\n        with sercore.FileOutStream(self.fname) as s:\n            s.write_string(self.STRING)\n        with sercore.FileInStream(self.fname) as s:\n            self.assertEqual(s.read_string(), self.STRING)\n\n    def test_string_as_bytes(self):\n        with sercore.FileOutStream(self.fname) as s:\n            s.write_string(self.STRING)\n        with sercore.FileInStream(self.fname) as s:\n            self.assertEqual(s.read_bytes(), self.STRING.encode(\"utf8\"))\n\n    def test_bytes_as_string(self):\n        with sercore.FileOutStream(self.fname) as s:\n            s.write_bytes(self.BYTES)\n        with sercore.FileInStream(self.fname) as s:\n            self.assertEqual(s.read_string(), self.BYTES.decode(\"utf8\"))\n\n    def test_bytes_as_bytes(self):\n        with sercore.FileOutStream(self.fname) as s:\n            s.write_bytes(self.BYTES)\n        with sercore.FileInStream(self.fname) as s:\n            self.assertEqual(s.read_bytes(), self.BYTES)\n\n    def test_output(self):\n        k, v = b\"key\", b\"value\"\n        with sercore.FileOutStream(self.fname) as s:\n            s.write_output(k, v)\n        with sercore.FileInStream(self.fname) as s:\n            self.assertEqual(s.read_vint(), OUTPUT)\n            self.assertEqual(s.read_bytes(), k)\n            self.assertEqual(s.read_bytes(), v)\n        part = 1\n        with sercore.FileOutStream(self.fname) as s:\n            s.write_output(k, v, part)\n        with sercore.FileInStream(self.fname) as s:\n            self.assertEqual(s.read_vint(), PARTITIONED_OUTPUT)\n            self.assertEqual(s.read_vint(), part)\n            self.assertEqual(s.read_bytes(), k)\n            self.assertEqual(s.read_bytes(), v)\n\n    def test_multi_no_tuple(self):\n        self.__fill_stream_multi()\n        self.__check_stream_multi()\n\n    def test_multi_read_tuple(self):\n        self.__fill_stream_multi()\n        self.__check_stream_tuple()\n\n    def test_multi_write_tuple(self):\n        self.__fill_stream_tuple()\n        self.__check_stream_multi()\n\n    def test_multi_rw_tuple(self):\n        self.__fill_stream_tuple()\n        self.__check_stream_tuple()\n\n    def __fill_stream_multi(self):\n        with sercore.FileOutStream(self.fname) as s:\n            s.write_vint(self.INT)\n            s.write_vlong(self.LONG)\n            s.write_float(self.FLOAT)\n            s.write_string(self.STRING)\n            s.write_bytes(self.BYTES)\n\n    def __fill_stream_tuple(self):\n        with sercore.FileOutStream(self.fname) as s:\n            s.write_tuple(\"ilfsb\", self.TUPLE)\n\n    def __check_stream_multi(self):\n        with sercore.FileInStream(self.fname) as s:\n            self.assertEqual(s.read_vint(), self.INT)\n            self.assertEqual(s.read_vlong(), self.LONG)\n            self.assertAlmostEqual(s.read_float(), self.FLOAT, 3)\n            self.assertEqual(s.read_string(), self.STRING)\n            self.assertEqual(s.read_bytes(), self.BYTES)\n\n    def __check_stream_tuple(self):\n        with sercore.FileInStream(self.fname) as s:\n            t = s.read_tuple('ilfsb')\n            self.assertEqual(len(t), 5)\n            self.assertEqual(t[0], self.INT)\n            self.assertEqual(t[1], self.LONG)\n            self.assertAlmostEqual(t[2], self.FLOAT, 3)\n            self.assertEqual(t[3], self.STRING)\n            self.assertEqual(t[4], self.BYTES)\n\n    def test_errors(self):\n        type_mismatches = [\n            (\"vint\", 1.), (\"vint\", \"x\"), (\"vint\", b\"x\"),\n            (\"vlong\", 1.), (\"vlong\", \"x\"), (\"vlong\", b\"x\"),\n            (\"float\", \"x\"), (\"float\", b\"x\"),\n            (\"bytes\", 1), (\"bytes\", 1.), (\"bytes\", u\"x\"),\n            (\"string\", 1), (\"string\", 1.),\n        ]\n        with sercore.FileOutStream(self.fname) as s:\n            for name, val in type_mismatches:\n                meth = getattr(s, \"write_%s\" % name)\n                self.assertRaises(TypeError, meth, val)\n        self.__fill_stream_tuple()\n        with sercore.FileInStream(self.fname) as s:\n            with self.assertRaises(IOError):\n                s.read_tuple(\"ilfsbi\")  # EOF\n        with sercore.FileOutStream(self.fname) as s:\n            with self.assertRaises(ValueError):\n                s.write_tuple(\"iis\", (1, 2))  # not enough items\n\n    # \"extra\" features\n\n    def test_string_keep_zeros(self):\n        pystr = self.BYTES.decode(\"utf-8\")\n        with sercore.FileOutStream(self.fname) as s:\n            s.write_string(pystr)\n        with sercore.FileInStream(self.fname) as s:\n            val = s.read_bytes()\n            self.assertEqual(val, self.BYTES)\n\n    def test_string_allow_bytes(self):\n        with sercore.FileOutStream(self.fname) as s:\n            s.write_string(self.BYTES)\n        with sercore.FileInStream(self.fname) as s:\n            self.assertEqual(s.read_bytes(), self.BYTES)\n\n\nclass TestCheckClosed(unittest.TestCase):\n\n    def test_instream(self):\n        with sercore.FileInStream(__file__) as stream:\n            pass\n        ops = (\n            (stream.read, (1,)),\n            (stream.read_vint, ()),\n            (stream.read_vlong, ()),\n            (stream.read_float, ()),\n            (stream.read_string, ()),\n            (stream.read_tuple, (\"ii\")),\n            (stream.skip, (1,)),\n        )\n        self.__check(ops)\n\n    def test_outstream(self):\n        wd = tempfile.mkdtemp(prefix=\"pydoop_\")\n        fname = os.path.join(wd, \"foo\")\n        with sercore.FileOutStream(fname) as stream:\n            pass\n        ops = (\n            (stream.write, (b\"x\",)),\n            (stream.write_vint, (1,)),\n            (stream.write_vlong, (1,)),\n            (stream.write_float, (1.0,)),\n            (stream.write_string, (u\"x\")),\n            (stream.write_tuple, (\"ii\", (1, 1))),\n            (stream.advance, (1,)),\n            (stream.flush, ()),\n        )\n        self.__check(ops)\n        shutil.rmtree(wd)\n\n    def test_double_close(self):\n        wd = tempfile.mkdtemp(prefix=\"pydoop_\")\n        fname = os.path.join(wd, \"foo\")\n        with sercore.FileOutStream(fname) as stream:\n            pass\n        stream.close()\n        with sercore.FileInStream(fname) as stream:\n            pass\n        stream.close()\n        with io.open(fname, \"wb\") as f:\n            with sercore.FileOutStream(f) as stream:\n                pass\n            stream.close()\n        with io.open(fname, \"rb\") as f:\n            with sercore.FileInStream(f) as stream:\n                pass\n            stream.close()\n\n    def __check(self, ops):\n        for o, args in ops:\n            self.assertRaises(ValueError, o, *args)\n\n\nclass TestHadoopTypes(unittest.TestCase):\n\n    def setUp(self):\n        self.wd = tempfile.mkdtemp(prefix=\"pydoop_\")\n        self.fname = os.path.join(self.wd, \"foo\")\n\n    def test_long_writable(self):\n        preset_data = (INT64_MIN, -100, -1, 0, 1, 100, INT64_MAX)\n        random_data = [randint(INT64_MIN, INT64_MAX) for _ in range(100)]\n        for data in preset_data, random_data:\n            with io.open(self.fname, \"wb\") as f:\n                f.write(struct.pack(\">\" + len(data) * \"q\", *data))\n            with sercore.FileInStream(self.fname) as stream:\n                for v in data:\n                    self.assertEqual(stream.read_long_writable(), v)\n        # payload entry, e.g., TextInputFormat key\n        k = 1000\n        sk = struct.pack(\">q\", k)\n        with sercore.FileOutStream(self.fname) as stream:\n            stream.write_bytes(sk)\n        with sercore.FileInStream(self.fname) as stream:\n            self.assertEqual(stream.read_vint(), len(sk))\n            self.assertEqual(stream.read_long_writable(), k)\n\n\nCASES = [\n    TestFileInStream,\n    TestFileOutStream,\n    TestSerDe,\n    TestCheckClosed,\n    TestHadoopTypes,\n]\n\n\ndef suite():\n    ret = unittest.TestSuite()\n    test_loader = unittest.TestLoader()\n    for c in CASES:\n        ret.addTest(test_loader.loadTestsFromTestCase(c))\n    return ret\n\n\nif __name__ == '__main__':\n    unittest.TextTestRunner(verbosity=2).run((suite()))\n"
  }
]