[
  {
    "path": ".github/dependabot.yml",
    "content": "version: 2\nupdates:\n  - package-ecosystem: \"docker\"\n    directory: \"/\"\n    schedule:\n      interval: \"daily\"\n\n  - package-ecosystem: \"github-actions\"\n    directory: \"/\"\n    schedule:\n      interval: \"daily\"\n"
  },
  {
    "path": ".github/workflows/build-container.yml",
    "content": "name: Build and push container image\n\non:\n  push:\n    branches: [ main ]\n  pull_request:\n    branches: [ main ]\n  release:\n    types: [published]\n\njobs:\n  build:\n    runs-on: ubuntu-latest\n\n    permissions:\n      contents: read\n      packages: write\n      attestations: write\n      id-token: write\n\n    steps:\n      - name: Check out repository\n        uses: actions/checkout@v6\n        with:\n          submodules: recursive\n\n      - name: Set up Podman\n        run: |\n          sudo apt-get update\n          sudo apt-get install -y podman\n\n      - name: Log into the container registry\n        if: github.event_name != 'pull_request'\n        run: echo \"${{ secrets.GITHUB_TOKEN }}\" | podman login ghcr.io -u ${{ github.actor }} --password-stdin\n\n      - name: Build the container image\n        run: |\n          # Container image identifiers must be all-lowercase.\n          # The two commas transform \"User/OSMExpress\" to \"user/osmexpress\".\n          IMAGE_ID=ghcr.io/${GITHUB_REPOSITORY,,}\n          SHA_TAG=${{ github.sha }}\n          LATEST_TAG=latest\n\n          # Build the container image with SHA and latest tags.\n          podman build -t ${IMAGE_ID}:${SHA_TAG} -t ${IMAGE_ID}:${LATEST_TAG} .\n\n          # If this is a release event, tag the image with the release tag.\n          if [ \"${{ github.event_name }}\" = \"release\" ]; then\n            RELEASE_TAG=${{ github.event.release.tag_name }}\n            podman tag ${IMAGE_ID}:${SHA_TAG} ${IMAGE_ID}:${RELEASE_TAG}\n          fi\n\n      - name: Push the container image to the registry\n        if: github.event_name != 'pull_request'\n        run: |\n          IMAGE_ID=ghcr.io/${GITHUB_REPOSITORY,,}\n          SHA_TAG=${{ github.sha }}\n          LATEST_TAG=latest\n\n          # Push the container image with SHA and latest tags.\n          podman push $IMAGE_ID:$SHA_TAG\n          podman push $IMAGE_ID:$LATEST_TAG\n\n          # If this is a release event, push the image with the release tag.\n          if [ \"${{ github.event_name }}\" = \"release\" ]; then\n            RELEASE_TAG=${{ github.event.release.tag_name }}\n            podman push $IMAGE_ID:$RELEASE_TAG\n          fi\n"
  },
  {
    "path": ".github/workflows/codeql.yml",
    "content": "name: Scan for security problems with CodeQL\n\non:\n  push:\n    branches: [ \"main\" ]\n  pull_request:\n    branches: [ \"main\" ]\n  schedule:\n    - cron: '17 4 * * 0'\n\njobs:\n  analyze:\n    name: Analyze (${{ matrix.language }})\n    runs-on: 'ubuntu-latest'\n    permissions:\n      security-events: write\n      packages: read  # required to fetch internal or private CodeQL packs\n\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n        - language: actions\n          build-mode: none\n        - language: c-cpp\n          build-mode: none\n        - language: python\n          build-mode: none\n    steps:\n    - name: Check out repository\n      uses: actions/checkout@v6\n\n    - name: Initialize CodeQL\n      uses: github/codeql-action/init@v4\n      with:\n        languages: ${{ matrix.language }}\n        build-mode: ${{ matrix.build-mode }}\n\n    - name: Perform CodeQL analysis\n      uses: github/codeql-action/analyze@v4\n      with:\n        category: \"/language:${{matrix.language}}\"\n"
  },
  {
    "path": ".gitignore",
    "content": "CMakeCache.txt\nCMakeFiles\n*.swp\n*.osmx\n*.osmx-lock\nMakefile\n*.pbf\n*.cmake\nosmxTest\nvenv\ndepends\na.out\n*.osc\n*.osc.gz\n__pycache__\n/osmx\n*.dylib\nTesting/\ncompile_commands.json\ninstall_manifest.txt\ndist/*.tgz\n"
  },
  {
    "path": ".gitmodules",
    "content": "[submodule \"vendor/s2geometry\"]\n\tpath = vendor/s2geometry\n\turl = https://github.com/google/s2geometry.git\n"
  },
  {
    "path": "CMakeLists.txt",
    "content": "cmake_minimum_required (VERSION 3.5)\nset(CMAKE_C_COMPILER \"/usr/bin/clang\")\nset(CMAKE_CXX_COMPILER \"/usr/bin/clang++\")\nproject(OSMExpress)\nset(CMAKE_CXX_FLAGS_RELEASE \"-O3\")\nset(CMAKE_CXX_FLAGS_DEBUG \"-DDEBUG -g\")\nset(CMAKE_CXX_FLAGS \"-Wno-deprecated\")\nset(CMAKE_CXX_FLAGS \"-Wno-deprecated-declarations\")\nset(CMAKE_CXX_FLAGS \"-pthread\")\nset(OSMX_VERSION \"0.2.0\")\nset(BUILD_SHARED_LIBS OFF CACHE INTERNAL \"\")\nset(ROARING_USE_CPM OFF)\nset(ENABLE_ROARING_TESTS OFF)\n\nlist(APPEND CMAKE_MODULE_PATH \"${CMAKE_SOURCE_DIR}/cmake\")\ninclude(FetchContent)\n\n# TODO: Switch to a released version after next CapnProto release (post 1.2.0).\n# Reason: https://github.com/capnproto/capnproto/issues/2353\n# Change for v1: https://github.com/capnproto/capnproto/pull/2355\n# Change for v2: https://github.com/capnproto/capnproto/pull/2354\nFetchContent_Declare(\n    CapnProto\n    GIT_REPOSITORY https://github.com/capnproto/capnproto.git\n    GIT_TAG master\n    EXCLUDE_FROM_ALL\n    FIND_PACKAGE_ARGS)\n\nFetchContent_Declare(\n    Catch2\n    GIT_REPOSITORY https://github.com/catchorg/Catch2.git\n    GIT_TAG v3.8.1\n    EXCLUDE_FROM_ALL\n    FIND_PACKAGE_ARGS 3)\n\nFetchContent_Declare(\n    cxxopts\n    GIT_REPOSITORY https://github.com/jarro2783/cxxopts.git\n    GIT_TAG v3.3.1\n    EXCLUDE_FROM_ALL\n    FIND_PACKAGE_ARGS)\n\nFetchContent_Declare(\n    LMDB\n    GIT_REPOSITORY https://git.openldap.org/openldap/openldap.git\n    GIT_TAG OPENLDAP_REL_ENG_2_6_10\n    EXCLUDE_FROM_ALL\n    FIND_PACKAGE_ARGS)\n\nFetchContent_Declare(\n    nlohmann_json\n    GIT_REPOSITORY https://github.com/nlohmann/json.git\n    GIT_TAG v3.12.0\n    EXCLUDE_FROM_ALL\n    FIND_PACKAGE_ARGS)\n\nFetchContent_Declare(\n    Osmium\n    GIT_REPOSITORY https://github.com/osmcode/libosmium.git\n    GIT_TAG v2.22.0\n    SOURCE_SUBDIR test/catch\n    EXCLUDE_FROM_ALL\n    FIND_PACKAGE_ARGS)\n\nFetchContent_Declare(\n    Protozero\n    GIT_REPOSITORY https://github.com/mapbox/protozero.git\n    GIT_TAG v1.8.0\n    EXCLUDE_FROM_ALL\n    FIND_PACKAGE_ARGS)\n\nFetchContent_Declare(\n    roaring\n    GIT_REPOSITORY https://github.com/RoaringBitmap/CRoaring.git\n    GIT_TAG v4.3.6\n    EXCLUDE_FROM_ALL\n    FIND_PACKAGE_ARGS)\n\nFetchContent_MakeAvailable(\n    CapnProto Catch2 cxxopts LMDB nlohmann_json Osmium Protozero roaring)\n\nif(NOT CapnProto_FOUND)\n    add_subdirectory(${capnproto_SOURCE_DIR} EXCLUDE_FROM_ALL)\nendif()\n\nif(NOT TARGET LMDB::LMDB)\n    set(LMDB_INCLUDE_DIR ${lmdb_SOURCE_DIR}/libraries/liblmdb)\n    add_library(\n        LMDB_LMDB\n        STATIC\n            ${lmdb_SOURCE_DIR}/libraries/liblmdb/mdb.c\n            ${lmdb_SOURCE_DIR}/libraries/liblmdb/midl.c)\n    target_include_directories(LMDB_LMDB PUBLIC ${LMDB_INCLUDE_DIR})\n\n    add_library(LMDB::LMDB INTERFACE IMPORTED)\n    set_target_properties(\n        LMDB::LMDB\n        PROPERTIES\n            INTERFACE_LINK_LIBRARIES LMDB_LMDB\n            INTERFACE_INCLUDE_DIRECTORIES ${LMDB_INCLUDE_DIR})\nendif()\n\nif(NOT OSMIUM_FOUND)\n    add_library(Osmium INTERFACE)\n    include_directories(SYSTEM ${osmium_SOURCE_DIR}/include)\nendif()\n\nif(NOT Protozero_FOUND)\n    add_library(Protozero INTERFACE)\n    include_directories(SYSTEM ${protozero_SOURCE_DIR}/include)\nendif()\n\nadd_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/vendor/s2geometry EXCLUDE_FROM_ALL)\n\ninclude_directories(vendor/s2geometry/src)\ninclude_directories(include)\n\n# needed for Expat install dir\nif(CMAKE_SYSTEM_NAME STREQUAL FreeBSD)\ninclude_directories(/usr/local/include)\nlink_directories(osmx /usr/local/lib)\nendif()\n\nset(CAPNPC_OUTPUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/capnpc_generated)\nfile(MAKE_DIRECTORY ${CAPNPC_OUTPUT_DIR})\ncapnp_generate_cpp(CAPNP_SRCS CAPNP_HDRS include/osmx/messages.capnp)\n\nadd_executable(\n    osmx\n    src/cmd.cpp\n    src/storage.cpp\n    src/expand.cpp\n    src/extract.cpp\n    src/update.cpp\n    src/region.cpp\n    ${CAPNP_SRCS})\n\nadd_dependencies(osmx s2)\n\ntarget_include_directories(\n    osmx\n    PUBLIC include ${CAPNPC_OUTPUT_DIR}/include)\n\ntarget_link_libraries(\n    osmx\n    bz2 CapnProto::capnp cxxopts::cxxopts expat LMDB::LMDB\n    nlohmann_json::nlohmann_json roaring s2 z)\n\nset_property(TARGET osmx PROPERTY CXX_STANDARD 14)\n\nadd_executable(osmxTest test/test_region.cpp src/region.cpp)\n\nset_property(TARGET osmxTest PROPERTY CXX_STANDARD 14)\n\ntarget_include_directories(\n    osmxTest\n    PUBLIC include ${CAPNPC_OUTPUT_DIR}/include)\n\ntarget_link_libraries(\n    osmxTest\n    bz2 CapnProto::capnp cxxopts::cxxopts expat LMDB::LMDB\n    nlohmann_json::nlohmann_json roaring s2 z\n    Catch2::Catch2WithMain)\n\nenable_testing()\nadd_test(osmxTest osmxTest)\n\ninstall(TARGETS osmx DESTINATION bin)\nadd_custom_target(archive COMMAND dist/archive.sh ${OSMX_VERSION} ${CMAKE_SYSTEM_NAME})\nadd_dependencies(archive osmx)\n\nadd_library(\n    osmx-static\n    STATIC\n    src/storage.cpp\n    src/expand.cpp\n    src/extract.cpp\n    src/update.cpp\n    src/region.cpp)\n\nset_property(TARGET osmx-static PROPERTY CXX_STANDARD 14)\n\ntarget_include_directories(\n    osmx-static\n    PUBLIC include ${CAPNPC_OUTPUT_DIR}/include)\n\ntarget_link_libraries(\n    osmx-static\n    bz2 CapnProto::capnp cxxopts::cxxopts expat LMDB::LMDB\n    nlohmann_json::nlohmann_json roaring s2 z)\n"
  },
  {
    "path": "Dockerfile",
    "content": "FROM alpine:3.22 AS builder\n\n# TODO: Add croaring-dev once available in Alpine Linux.\n# https://gitlab.alpinelinux.org/alpine/aports/-/merge_requests/87769\nRUN apk add --no-cache  \\\n    clang               \\\n    cmake               \\\n    git                 \\\n    linux-headers       \\\n    make                \\\n    python3-dev         \\\n                        \\\n    bzip2-dev           \\\n    catch2-3            \\\n    capnproto-dev       \\\n    cxxopts-dev         \\\n    expat-dev           \\\n    libosmium-dev       \\\n    lmdb-dev            \\\n    nlohmann-json       \\\n    openssl-dev         \\\n    protozero-dev       \\\n    zlib-dev\n\nWORKDIR /usr/src/osmexpress\nCOPY . /usr/src/osmexpress\n\nRUN cmake -DCMAKE_BUILD_TYPE=Release .\nRUN make -j16 && ./osmxTest && make install\n\n\nFROM alpine:3.22\n\n# cxxopts, libosmium, nlohmann-json and protozero are header-only\n# C++ libraries; catch2 is only used for testing. We do not need\n# them in the production container.\nRUN apk add --no-cache  \\\n    libbz2              \\\n    libcrypto3          \\\n    capnproto           \\\n    libexpat            \\\n    libssl3             \\\n    lmdb                \\\n    zlib\n\nCOPY --from=builder /usr/local/bin/osmx /usr/local/bin/osmx\nENTRYPOINT [ \"/usr/local/bin/osmx\" ]\n"
  },
  {
    "path": "LICENSE.md",
    "content": "Copyright 2019 Protomaps. Some source code from https://github.com/osmcode/pyosmium Copyright (c) 2014-2018, Sarah Hoffmann, All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
  },
  {
    "path": "README.md",
    "content": "# OSM Express\n\n![Screenshot](examples/screenshot.png)\n\n[Manual](docs/MANUAL.md), [Programming Guide](docs/PROGRAMMING_GUIDE.md)\n\nOSM Express is a fast storage format for OpenStreetMap that powers [SliceOSM](https://github.com/SliceOSM). It's designed as a low level building block specific to the OSM data model; common access patterns such as random lookups by ID, in-place minutely updates, and spatial queries are efficient and simple to manage in production applications. \n\n## Features\n\n* **Random access:** Look up nodes, ways and relations and their metadata by ID; fetch member elements of ways and relations to construct geometries.\n* **Spatial indexing:** Nodes are bucketed into [S2 Geometry](http://s2geometry.io) cells. Access a region by providing a cell covering; works for nonrectangular regions.\n* **Scalable:** OSM Express works the same way for OSM data of any size, from a small city to the entire planet. The entire planet can be worked with efficiently on typical hardware such as a laptop computer.\n* **In-place updates:** Included are scripts to download minutely changesets from [planet.openstreetmap.org](https://planet.openstreetmap.org) and apply them to an .osmx database.\n* **Concurrent access:** Multiple processes can open the database file for reading simultaneously. No running server process is required. Writing minutely updates doesn't block reader access. Reads and writes are transactional. \n* **Portable:** An .osmx file can be read and written to from either C++ or Python.\n\n## Details\n\nOSM Express is a compact 1,500 LOC, and really a cobbling together of a few low-level libraries:\n\n* [Libosmium](https://osmcode.org/libosmium/index.html) for the reading and writing of .osm.pbf files.\n* [LMDB](https://symas.com/lmdb) for a memory-mapped ACID key-value store with fast cursor iteration.\n* [Cap'n Proto](https://capnproto.org) for in-memory and on-disk representation of OSM elements.\n* [CRoaring](https://roaringbitmap.org) for in-memory representation of ID sets as compressed bitmaps.\n* [S2 Geometry](http://s2geometry.io) for indexing of geographic coordinates.\n\n## Installation\n\n[See the manual for instructions on building from source](/docs/PROGRAMMING_GUIDE.md). \n\n## Usage\n\nOSM Express is being used in production for [SliceOSM](https://slice.openstreetmap.us) and the file format is stable.\n\n* Use the `osmx` command line tool to expand a .osm.pbf to an .osmx database and perform basic tasks such as extracting regions or querying by ID. No programming required.\n* Use the [Python library](python/) library via `pip install osmx` to access an .osmx database programatically. See the [Python Examples](python/examples) for how to create command line tools, webservers or detailed diffs based on minutely data. \n* Use the C++ library to access an .osmx database programatically. \n\n### Command line\n\n```bash\nosmx expand planet.osm.pbf planet.osmx # converts a pbf or xml to osmx. Takes 5-10 hours for the planet, resulting in a ~600GB file.\nosmx extract planet.osmx extract.osm.pbf --bbox 40.7411\\,-73.9937\\,40.7486\\,-73.9821 # extract a new pbf for the given bounding box.\nosmx update planet.osmx 3648548.osc 3648548 2019-08-29T17:50:02Z --commit # applies an OsmChange diff.\nosmx query planet.osmx # Print statistics, seqnum and timestamp.\nosmx query planet.osmx way 34633854 # look up an element by ID.\n```\n\n`osmx extract` has a flag `--noUserData` intended for public facing instances which will remove the user, uid and changeset fields to comply with [GDPR guidelines](https://wiki.openstreetmap.org/wiki/GDPR).\n\nDetailed command line usage can be found in the [Manual](docs/MANUAL.md).\n\n### Headers\n\nThe C++ API is very rough with minimal abstraction. [examples/way_wkt.cpp](examples/way_wkt.cpp) is a short, commented C++ program that uses the headers to read a way from a .osmx file and outputs its [Well-Known Text](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) LineString geometry.\n\n```bash\n./way_wkt ../ny.osmx 34633854\nEmpire State Building\tLINESTRING (-73.9864855 40.7484833,-73.9851554 40.7479226,-73.9848259 40.7483735,-73.9861526 40.7489422,-73.9863111 40.7487242,-73.9863282 40.7487007,-73.9864684 40.7485078,-73.9864855 40.7484833)\n```\n\n[examples/bbox_wkt.cpp](examples/bbox_wkt.cpp) is a more complex example that takes a bounding box as input, and returns WKT LineStrings for ways that overlap the bbox. This overlap is an approximation based on cells and may include ways outside the bounding box.\n\nDetailed C++ usage can be found in the [Programming Guide](docs/PROGRAMMING_GUIDE.md).\n\n### Docker (experimental)\n\nA `Dockerfile` is provided but users will need to build their own container. To do so, run:\n```\ndocker build -t osmx .\n```\n\n## License and Development\n\n2-Clause BSD, see [LICENSE.md](LICENSE.md)."
  },
  {
    "path": "dist/archive.sh",
    "content": "#!/bin/bash\nset -e\nFILENAME=dist/osmexpress-$1-$2.tgz\nrm -f LICENSES\nprintf \"osmexpress\\n===========\\n\" >> LICENSES\ncat LICENSE.md >> LICENSES\nprintf \"\\ncapnproto\\n===========\\n\" >> LICENSES\ncat vendor/capnproto/LICENSE >> LICENSES\nprintf \"\\ncroaring\\n===========\\n\" >> LICENSES\ncat vendor/CRoaring/LICENSE >> LICENSES\nprintf \"\\ncxxopts\\n===========\\n\" >> LICENSES\ncat vendor/cxxopts/LICENSE >> LICENSES\nprintf \"\\njson\\n===========\\n\" >> LICENSES\ncat vendor/json/LICENSE.MIT >> LICENSES\nprintf \"\\nlibosmium\\n===========\\n\" >> LICENSES\ncat vendor/libosmium/LICENSE >> LICENSES\nprintf \"\\nlmdb\\n===========\\n\" >> LICENSES\ncat vendor/lmdb/libraries/liblmdb/LICENSE >> LICENSES\nprintf \"\\nprotozero\\n===========\\n\" >> LICENSES\ncat vendor/protozero/LICENSE.md >> LICENSES\nprintf \"\\ns2\\n===========\\n\" >> LICENSES\ncat vendor/s2geometry/LICENSE >> LICENSES\ntar -cvzf $FILENAME osmx LICENSES\nrm LICENSES\necho \"created $FILENAME\"\n"
  },
  {
    "path": "docs/MANUAL.md",
    "content": "**OSM Express** is a database file format for OpenStreetMap data (.osmx), as well as a command line tool and C++ library for reading and writing .osmx files. Find it on GitHub at [github.com/bdon/OSMExpress](https://github.com/bdon/OSMExpress)\n\n![screenshot](https://github.com/bdon/OSMExpress/blob/main/examples/screenshot.png?raw=true)\n\n*Illustration of the cell covering for a rectangular input region and its overlap with indexed OpenStreetMap geometries.*\n\n## Motivation\n\nHere are some use cases that OSM Express fits well.\n\n* You want an offline copy of OpenStreetMap, which can be updated every day, hour or minute from the main openstreetmap.org database, instead of redownloading the entire planet.\n* You want to quickly access all OSM objects in a geographical region, such as as neighborhood, city or small country.\n* You want to quickly look up OSM objects by ID, such as getting the `height` and `name` tags for a given way that represents a building, and construct geometries for ways and relations.\n* You want to embed a database that does any of the above, such as in a web application that returns OSM objects as GeoJSON.\n\n## Quick Start\n\n### Command Line\n\nFor information on how to compile the `osmx` program from source, see the [Programming Guide.](/docs/PROGRAMMING_GUIDE.md)\n\nOnce you have the `osmx` command line program, you'll need to start with an .osm.pbf or OSM XML file. The Planet file is available at [planet.openstreetmap.org](https://planet.openstreetmap.org), but it's preferable to begin with something smaller to learn with.\n\nThere are numerous sites for downloading .osm.pbf extracts, including [SliceOSM](https://slice.openstreetmap.us), a service itself powered by OSM Express.\n\nExample: create an .osmx file by using the `expand` command on the .osm.pbf file:\n\n    osmx expand new_york_county.osm.pbf new_york_county.osmx\n\nThis will result in a 91 MB .osmx file.\n\nWe can access objects inside this .osmx file by ID, displaying the node IDs of its member nodes and all tags:\n\n    osmx query new_york_county.osmx way 34633854\n\n    > 402743563 402743567 402743571 402743573 2709307502 2709307499 2709307464 402743563\n    addr:city=New York City addr:housenumber=350 addr:postcode=10018 ...\n\nWe can also extract regions of the .osmx file into a new .osm.pbf file, which is useful for interoperability with other OSM tools.\n\n    osmx extract new_york_county.osmx downtown.osm.pbf --bbox 40.7411\\,-73.9937\\,40.7486\\,-73.9821\n\n### Updating\n\n`utils/osmx-update` is provided to update `.osmx` to the most recent file on a replication server using `osmx update`. For example to update a planet.osmx file with minutely updates:\n\n    python utils/osmx-update planet.osmx https://planet.openstreetmap.org/replication/minute/\n\n## Library\n\nthe OSM Express library is intentionally minimal and non-opinionated - for example, no attempt is made to transform OSM tags to a fixed schema, distinguish between polygon and linear ways, or assemble multipolygon relations into polygons. For these typical tasks it's recommended to use OSM Express as a library in your own program. Documentation and example code are available at the [Programming Guide.](/docs/PROGRAMMING_GUIDE.md)\n\n## Other Languages\n\nAn .osmx file can be opened and queried direcly in a Python program using the `osmx` Python package. See [Python](/docs/PROGRAMMING_GUIDE.md#python) for details.\n\nLanguages other than Python may be supported in the future by either language-specific libraries or a new C API. See [Development](#Development) if you're interested or discuss on GitHub.\n\n## Technical Details\n\n### Storage Requirements\n\nA full planet.osmx created from planet.osm.pbf (47 GB) is around 580 GB.\n\nOSM Express is optimized for fast lookups, extracts and updates, goals opposed to making the database size as compact as possible. A typical .osmx file can be 10 times the size of the corresponding .osm.pbf, because:\n\n* Relationships between parent elements and member elements are encoded in both directions, to enable lookups from node to way, way to relation, etc.\n* The storage engine (LMDB) has no built-in compression, unlike some LSM-tree storage engines such as LevelDB.\n* The `mmap`-based design of LMDB and Cap'n Proto requires that fields are word-aligned on disk, causing storage overhead.\n* Keys and values are stored in full as strings. Keys could be hardcoded in a lookup table, saving about 10% space, but this would make the database less portable.\n\nAs of 2019, fast local storage is cheap; 1 terabyte solid state drives are less than 150 USD. On managed hosting providers like AWS and Google Cloud, extra storage is affordable compared to more memory or CPU cores. \n\nIf it's necessary to optimize for storage space, an .osmx file can be stored on a filesystem with transparent compression such as ZFS or Btrfs, at the cost of CPU overhead. This can reduce planet.osmx to around 200GB.\n\n### Privacy\n\nOSM Express stores all metadata - version, timestamp, changeset, username and user ID - for all OSM objects, except for untagged nodes. The `osmx extract` `--noUserData` flag ignores changeset, username and user ID information for extracts, to comply with [GDPR guidelines](https://wiki.openstreetmap.org/wiki/GDPR).\n\n### Performance\n\nOSM Express should work with reasonable amounts of memory, less than 8 gigabytes, even for `expand` and `extract` on planet.osmx. The strongest predictor of performance is I/O latency. If benchmarking different storage environments, I/O latency can be best measured via IOPS at queue depth 1.\n\n*WIP: benchmarks*\n\n## Alternatives\n\n* [osmium-tool](https://osmcode.org/osmium-tool/index.html) for creating extracts from osm.pbf files. This is more efficient for large country or continent sized extracts, or any task where the entire dataset needs to be read.\n* [Overpass API](http://overpass-api.de) is a powerful server application for interactive querying and tag-based lookup of OSM data.\n* [conveyal/osm-lib](https://github.com/conveyal/osm-lib) is a similar design, written in Java.\n* [imposm3](https://github.com/omniscale/imposm3), [osm2pgsql](https://github.com/openstreetmap/osm2pgsql) if you want OSM data in PostgreSQL and/or want to render maps. \n\n## Concepts\n\n### File Layout\n\nThe `osmx query` command with no arguments reveals the layout of an .osmx database:\n\n    osmx query planet.osmx\n    locations: 5313351219\n    nodes: 144307630\n    ways: 590470034\n    relations: 6895065\n    cell_node: 5313351219\n    node_way: 5906888644\n    node_relation: 10242142\n    way_relation: 63350432\n    relation_relation: 497137\n\nan .osmx file is a LMDB database with 10 sub-databases. All keys are 64 bit integers in [host byte order](https://en.wikipedia.org/wiki/Endianness) (little-endian on most modern CPUs).\n\n* `locations`: maps OSM node IDs to Locations, which store the coordinates and version number of the node (documented below).\n* `nodes`, `ways`, `relations` map OSM object IDs to a Cap'n Proto message defined in [`include/osmx/messages.capnp`](https://github.com/bdon/OSMExpress/blob/main/include/osmx/messages.capnp).\n    - `nodes` only contains *tagged* nodes; the value for each key describes the node's tags and other metadata. Untagged nodes are included only in `locations` to save space on disk.\n    - `ways` contains all ways; the value for each key describes the way's tags, metadata, and the list of node IDs that are part of the way.\n    - `relations` contains all relations; the value for each key contains the relation's tags, metadata, and the IDs and roles of its members.\n* `cell_node` maps a level 16 [S2 cell ID](http://s2geometry.io/devguide/s2cell_hierarchy.html) to a node ID, using LMDB's `DUPSORT` to store multiple values for each key (since each S2 cell will intersect many OSM objects).\n* `node_way`, `node_relation`, `way_relation` and `relation_relation` map OSM object IDs to their parent object IDs, also using `DUPSORT` (since nodes can belong to multiple ways, ways to multiple relations, etc).\n\nFinally, the `metadata` sub-database holds arbitrary string:string values. This is used to store the replication sequence number and timestamp. \n\nIt is important to note that LMDB transactions span all sub-databases. This means that a read operation will retrieve the correct `timestamp` for the data it fetches, even if the database is written to while the read is happening.\n\n#### Encoding of Locations\n\nValues in the `locations` sub-database are structs with the following layout:\n\n```c\nstruct Location {\n    int32_t longitude_i;\n    int32_t latitude_i;\n    int32_t version;\n};\n```\n\nEach field is serialized in host byte order.\n\nLongitude and latitude are stored as integers. To obtain the actual longitude and latitude as decimal numbers, divide the integer value by 10000000 (1e7). This integer-based encoding is precise to within a few centimeters anywhere on Earth. The same encoding is used by [libosmium](https://docs.osmcode.org/libosmium/latest/classosmium_1_1Location.html) and by the openstreetmap.org database internally.\n\n### Spatial Indexing\n\nOSM Express avoids expensive point-in-polygon computations for spatial operations. Instead, a query region is approximated by S2 cells with maximum level 16. The level 16 is chosen as a reasonable tradeoff between covering precision and storage space.\n\n*Author's note: the S2 Covering of a region may differ depending on choice of architecture and compiler, while still being valid. Let me know if you know how to make this consistent.*\n\n## Presentations\n\n[State of the Map US 2019, Minneapolis - Video](https://2019.stateofthemap.us/program/sun/osm-express-a-spatial-file-format-for-the-planet.html)\n"
  },
  {
    "path": "docs/PROGRAMMING_GUIDE.md",
    "content": "## Building from source\n\nOSM Express uses CMake for its build scripts. It's only been tested with the Clang C++ compiler so far.\n\nMost dependencies are included as Git submodules in the `vendor/` directory, but a few stable, common libraries are expected to exist on your system, including bzip2, zlib, Expat and OpenSSL. \n\n### FreeBSD 12\n\n`sudo pkg install cmake expat`\n\n### macOS \n\nvia Homebrew: `brew install cmake bzip2 zlib openssl expat`\n\n*Additional macOS notes: the Clang compiler should be available via XCode Command Line Tools.*\n### Ubuntu 22.04\n\nvia Apt package manager: `sudo apt install cmake clang libbz2-dev libz-dev libexpat-dev libssl-dev python3-dev`\n\n### Build Instructions\n\n    git clone --recursive https://github.com/bdon/OSMExpress.git\n    cd OSMExpress\n    cmake -DCMAKE_BUILD_TYPE=Release .\n    make\n\n*macOS note: If OpenSSL is installed through Homebrew, you may need to add an option to your cmake command: `-DOPENSSL_ROOT_DIR=/usr/local/opt/openssl\\@3`\nFor macOS systems with Apple Silicon, this path is `-DOPENSSL_ROOT_DIR=/opt/homebrew/opt/openssl\\@3`\n\n## Using the C++ Headers\n\n### Example: Way ID to WKT\n\nSee [examples/way_wkt.cpp](https://github.com/bdon/OSMExpress/blob/main/examples/way_wkt.cpp) for a commented program.\n\n### Example: Bbox to Way WKTs\n\nSee [examples/bbox_wkt.cpp](https://github.com/bdon/OSMExpress/blob/main/examples/way_wkt.cpp) for a commented program.\n\n## Python\n\nInstall the library with `pip install osmx` . This will also download and install the `pycapnp` and `lmdb` Python libraries.\n\nThe Python API supports only location, node, way and relation lookups at the moment. Example:\n\n    import osmx\n\n    env  = osmx.Environment('planet.osmx')\n    txn = osmx.Transaction(env)\n    locations = osmx.Locations(txn)\n    nodes = osmx.Nodes(txn)\n    ways = osmx.Ways(txn)\n\n    way = ways.get(123456)\n\n    for node_id in way.nodes:\n        print(locations.get(node_id))\n\n    print(osmx.tag_dict(way.tags))\n"
  },
  {
    "path": "examples/.gitignore",
    "content": "way_wkt\nbbox_wkt\n"
  },
  {
    "path": "examples/CMakeLists.txt",
    "content": "cmake_minimum_required (VERSION 3.5)\nset(CMAKE_CXX_FLAGS_RELEASE \"-O3\")\nset(CMAKE_CXX_FLAGS_DEBUG \"-DDEBUG -g\")\nset(CMAKE_CXX_FLAGS \"-Wno-deprecated\")\nset(CMAKE_CXX_FLAGS \"-Wno-deprecated-declarations\")\nset(CMAKE_CXX_FLAGS \"-pthread\")\n\ninclude_directories(../vendor/libosmium/include)\ninclude_directories(../vendor/protozero/include)\ninclude_directories(../vendor/s2geometry/src)\ninclude_directories(../vendor/CRoaring/cpp)\ninclude_directories(../vendor/CRoaring/include)\ninclude_directories(../vendor/cxxopts/include)\ninclude_directories(/usr/local/include)\ninclude_directories(../depends)\ninclude_directories(../include)\ninclude_directories(../vendor/lmdb/libraries/liblmdb)\ninclude_directories(../vendor/capnproto/c++/src)\nlink_directories(../vendor/s2geometry)\nlink_directories(/usr/local/lib)\nlink_directories(../vendor/CRoaring)\nlink_directories(../vendor/capnproto)\nlink_directories(../vendor/lmdb/libraries/liblmdb/)\nlink_directories(../vendor/capnproto/c++/src/capnp/)\nlink_directories(../vendor/capnproto/c++/src/kj/)\nlink_directories(../vendor/CRoaring/src/)\nlink_directories(${OPENSSL_ROOT_DIR}lib/)\n\nadd_executable(way_wkt way_wkt.cpp ../src/storage.cpp)\ntarget_link_libraries(way_wkt lmdb z expat bz2 s2 capnp kj roaring ssl crypto)\nset_property(TARGET way_wkt PROPERTY CXX_STANDARD 14)\n\nadd_executable(bbox_wkt bbox_wkt.cpp ../src/storage.cpp)\ntarget_link_libraries(bbox_wkt lmdb z expat bz2 s2 capnp kj roaring ssl crypto)\nset_property(TARGET bbox_wkt PROPERTY CXX_STANDARD 14)\n"
  },
  {
    "path": "examples/bbox_wkt.cpp",
    "content": "#include <vector>\n#include <iomanip>\n#include \"osmx/storage.h\"\n#include \"osmx/util.h\"\n#include \"s2/s2latlng.h\"\n#include \"s2/s2region_coverer.h\"\n#include \"s2/s2latlng_rect.h\"\n#include \"roaring/roaring64map.hh\"\n\nusing namespace std;\n\n// Example of a very simple program to get OSM objects in a region\n// and print them out as WKT.\n// see way_wkt for a simpler example.\n// This program does not handle Relations at all,\n// so it can't be used to find all Polygons in a region, since they may be Multipolygon relations.\n// Usage: ./bbox_wkt OSMX_FILE MIN_LON MIN_LAT MAX_LON MAX_LAT\n\nint main(int argc, char* argv[]) {\n  vector<string> args(argv, argv+argc);\n\n  MDB_env* env = osmx::db::createEnv(args[1]);\n  MDB_txn* txn;\n  CHECK_LMDB(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));\n\n  // Create a S2LatLngRect.\n  auto lo = S2LatLng::FromDegrees(stof(args[3]),stof(args[2]));\n  auto hi = S2LatLng::FromDegrees(stof(args[5]),stof(args[4]));\n  auto bbox = S2LatLngRect{lo,hi};\n\n  // Find the cell covering for the LatLngRect,\n  // with a maximum cell level of 16.\n  // Although nodes in the database are stored at level=16,\n  // Cells with levels less than 16 will be correctly handled by the traverseCell function.\n  // This allows for more compact representations of large regions.\n\n  S2RegionCoverer::Options options;\n  options.set_max_level(16);\n  S2RegionCoverer coverer(options);\n  S2CellUnion covering = coverer.GetCovering(bbox);\n\n  cerr << \"Cell covering size: \" << covering.size() << endl;\n\n  // Get all node_ids that match the given region.\n  Roaring64Map node_ids;\n  MDB_dbi dbi;\n  MDB_cursor *cursor;\n  CHECK_LMDB(mdb_dbi_open(txn, \"cell_node\", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi));\n  CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor));\n  for (auto cell_id : covering.cell_ids()) {\n    osmx::db::traverseCell(cursor,cell_id,node_ids);\n  }\n  mdb_cursor_close(cursor);\n\n  cerr << \"Nodes in region: \" << node_ids.cardinality() << endl;\n\n  // Get all way_ids that are referred to by node_ids.\n  Roaring64Map way_ids;\n  CHECK_LMDB(mdb_dbi_open(txn, \"node_way\", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi));\n  CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor));\n  for (auto const &node_id : node_ids) {\n    osmx::db::traverseReverse(cursor,node_id,way_ids);\n  }\n  mdb_cursor_close(cursor);\n\n  cerr << \"Ways in region: \" << way_ids.cardinality() << endl;\n\n  osmx::db::Locations locations(txn);\n  osmx::db::Elements ways(txn,\"ways\");\n\n  for (auto way_id : way_ids) {\n    // Fetch a Way element by ID.\n    auto message = ways.getReader(way_id);\n    auto way = message.getRoot<Way>();\n\n    // Tags are stored as a vector of key,value.\n    // Iterate through all tags and print the value if key = name.\n    auto tags = way.getTags();\n    for (int i = 0; i < tags.size() / 2; i++) {\n      if (tags[i*2] == \"name\") cout << tags[i*2+1].cStr();\n    }\n\n    // Assemble a WKT LineString geometry.\n    cout << \"\\tLINESTRING (\";\n    cout << std::fixed << std::setprecision(7); // the output should have 7 decimal places.\n    auto nodes = way.getNodes();\n    for (int i = 0; i < nodes.size(); i++) {\n      auto location = locations.get(nodes[i]);\n      if (i > 0) cout << \",\";\n      cout << location.coords.lon() << \" \" << location.coords.lat();\n    }\n    cout << \")\" << endl;\n  }\n\n  mdb_env_close(env); // close the database.\n}\n"
  },
  {
    "path": "examples/way_wkt.cpp",
    "content": "#include <vector>\n#include <iomanip>\n#include \"osmx/storage.h\"\n#include \"osmx/util.h\"\n\nusing namespace std;\n\n// Example of a very simple C++ program that uses osmx headers\n// to open a database, look up a way by ID, and assemble a WKT geometry from its nodes.\n// Usage: ./print_wkt OSMX_FILE WAY_ID\n\nint main(int argc, char* argv[]) {\n  vector<string> args(argv, argv+argc);\n\n  // Opening a database: create an Environment, and then a Transaction within the environment. \n  MDB_env* env = osmx::db::createEnv(args[1]);\n  MDB_txn* txn;\n  CHECK_LMDB(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));\n\n  // Create a Database handle for each element type within the Transaction.\n  osmx::db::Locations locations(txn);\n  osmx::db::Elements ways(txn,\"ways\");\n\n  // Fetch a Way element by ID.\n  auto message = ways.getReader(stol(args[2]));\n  auto way = message.getRoot<Way>();\n\n  // Tags are stored as a vector of key,value.\n  // Iterate through all tags and print the value if key = name.\n  auto tags = way.getTags();\n  for (int i = 0; i < tags.size() / 2; i++) {\n    if (tags[i*2] == \"name\") cout << tags[i*2+1].cStr();\n  }\n\n  // Assemble a WKT LineString geometry.\n  cout << \"\\tLINESTRING (\";\n  cout << std::fixed << std::setprecision(7); // the output should have 7 decimal places.\n  auto nodes = way.getNodes();\n  for (int i = 0; i < nodes.size(); i++) {\n    auto location = locations.get(nodes[i]);\n    if (i > 0) cout << \",\";\n    cout << location.coords.lon() << \" \" << location.coords.lat();\n  }\n  cout << \")\" << endl;\n\n  mdb_env_close(env); // close the database.\n}\n"
  },
  {
    "path": "include/osmx/cmd.h",
    "content": "void cmdExpand(int argc, char* argv[]);\nvoid cmdExtract(int argc, char* argv[]);\nvoid cmdUpdate(int argc, char* argv[]);\n"
  },
  {
    "path": "include/osmx/messages.capnp",
    "content": "@0xd3a7e843a9c03421;\n\nstruct Metadata {\n  version @0 :UInt32;\n  timestamp @1 :UInt64;\n  changeset @2 :UInt32;\n  uid @3 :UInt32;\n  user @4 :Text;\n}\n\nstruct Node {\n  tags @0 :List(Text);\n  metadata @1 :Metadata;\n}\n\nstruct Way {\n  nodes @0 :List(UInt64);\n  tags @1 :List(Text);\n  metadata @2 :Metadata;\n}\n\nstruct RelationMember {\n  ref @0 :UInt64;\n  type @1 :Type;\n  role @2 :Text;\n\n  enum Type {\n    node @0;\n    way @1;\n    relation @2;\n  }\n}\n\nstruct Relation {\n  tags @0 :List(Text);\n  members @1 :List(RelationMember);\n  metadata @2 :Metadata;\n}\n"
  },
  {
    "path": "include/osmx/region.h",
    "content": "#include <string>\n\n#include <nlohmann/json.hpp>\n#include \"s2/s2region.h\"\n#include \"s2/s2cell_union.h\"\n#include \"s2/s2region_coverer.h\"\n#include \"s2/s2latlng_rect.h\"\n\nclass Region {\npublic:\n\tRegion(const std::string &text, const std::string &ext);\n\tbool Contains(S2Point p);\n\tS2CellUnion GetCovering(S2RegionCoverer &coverer);\n\tS2LatLngRect GetBounds();\n\nprivate:\n\tvoid AddS2RegionFromGeometry(nlohmann::json &geometry);\n\tvoid AddS2RegionFromPolyFile(std::istringstream &file);\n\tstd::vector<std::unique_ptr<S2Region>> mRegions;\n};\n"
  },
  {
    "path": "include/osmx/storage.h",
    "content": "#pragma once\n#include \"lmdb.h\"\n#include \"osmium/osm/location.hpp\"\n#include \"kj/io.h\"\n#include \"capnp/message.h\"\n#include \"capnp/serialize.h\"\n#include \"osmx/messages.capnp.h\"\n#include \"osmx/util.h\"\n#include \"s2/s2cell_id.h\"\n#include \"roaring/roaring64map.hh\"\n\nnamespace osmx { namespace db {\n\n\nuint64_t to64(osmium::Location loc);\nosmium::Location toLoc(uint64_t val);\nMDB_env *createEnv(std::string path, bool writable = false);\n\nclass Noncopyable {\n  public:\n  Noncopyable() { }\n  Noncopyable( const Noncopyable& ) = delete;\n  Noncopyable& operator=( const Noncopyable& ) = delete;\n};\n\nclass Metadata : public Noncopyable {\n  public:\n  Metadata(MDB_txn *txn);\n  void put(const std::string &key_str, const std::string &value_str);\n  std::string get(const std::string &key_str);\n\n  private:\n  MDB_txn* mTxn;\n  MDB_dbi mDbi;\n};\n\nclass Elements : public Noncopyable {\n  public:\n  Elements(MDB_txn *txn, const std::string &name);\n  void put(uint64_t id, kj::VectorOutputStream &vos, int flags = 0);\n  void del(uint64_t id);\n  bool exists(uint64_t id);\n  capnp::FlatArrayMessageReader getReader(uint64_t id);\n\n  private:\n  MDB_txn *mTxn;\n  MDB_dbi mDbi;\n};\n\nclass Location {\n  public:\n  Location() { };\n  Location(osmium::Location l, int32_t v) : coords(l), version(v) {\n\n  }\n\n  bool is_undefined() {\n    return coords.is_undefined();\n  }\n\n  bool is_defined() {\n    return coords.is_defined();\n  }\n  osmium::Location coords;\n  int32_t version;\n};\n\nclass Locations : public Noncopyable {\n  public:\n  Locations(MDB_txn *txn);\n  void put(uint64_t id, const Location value, int flags = 0);\n  void del(uint64_t id);\n  bool exists(uint64_t id);\n  Location get(uint64_t id) const;\n\n  private:\n  MDB_txn* mTxn;\n  MDB_dbi mDbi;\n};\n\nclass Index : public Noncopyable {\n  public:\n  Index(MDB_txn *txn, const std::string &name);\n  void put(uint64_t from, uint64_t osm_id, int flags = 0);\n  void del(uint64_t from, uint64_t osm_id );\n\n  private:\n  MDB_dbi mDbi;\n  MDB_txn *mTxn;\n};\n\nclass IndexWriter : public Noncopyable {\n  public:\n  IndexWriter(MDB_env *env, const std::string &name);\n  void put(uint64_t from, uint64_t osm_id, int flags = 0);\n  void commit();\n\n  private:\n  MDB_env *mEnv;\n  MDB_dbi mDbi;\n  MDB_txn *mTxn;\n  std::string mName;\n  int mWrites = 0;\n};\n\nvoid traverseCell(MDB_cursor *cursor, S2CellId cell_id, roaring::Roaring64Map &set);\nvoid traverseReverse(MDB_cursor *cursor, uint64_t from, roaring::Roaring64Map &set);\n\n} }\n"
  },
  {
    "path": "include/osmx/util.h",
    "content": "#pragma once\n#include <chrono>\n#include <iostream>\n#include \"lmdb.h\"\n#include \"osmium/tags/taglist.hpp\"\n\n#define CHECK_LMDB(x) if (0 != x) { printf(\"%s, file %s, line %d.\\n\", mdb_strerror(x), __FILE__, __LINE__); abort(); }\n\n// a higher cell level results in more precise extracts, as the size of 1 cell is the minimum index resolution.\n#define CELL_INDEX_LEVEL 16\n\nclass Timer {\n  public:\n  Timer(std::string name) : mName(name) {\n    mStartTime = std::chrono::high_resolution_clock::now();\n    std::cout << \"Start \" << mName << std::endl;\n  }\n\n  ~Timer() {\n    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>( std::chrono::high_resolution_clock::now() - mStartTime ).count();\n    std::cout << \"Finished \" << mName << \" in \" << duration/1000.0 << \" seconds.\" << std::endl;\n  }\n\n  private:\n  std::chrono::high_resolution_clock::time_point mStartTime;\n  std::string mName;\n};\n\ntemplate <typename T>\nvoid setTags(const osmium::TagList &tags, T &builder) {\n  builder.initTags(tags.size() * 2);\n  auto tagBuilder = builder.getTags();\n\n  int i = 0;\n  for (auto const &tag : tags) {\n    tagBuilder.set(i,tag.key());\n    i++;\n    tagBuilder.set(i,tag.value());\n    i++;\n  }\n}\n"
  },
  {
    "path": "python/.gitignore",
    "content": "build\ndist\n*.egg-info\n"
  },
  {
    "path": "python/README.md",
    "content": "A Python package to read OSM Express (.osmx) database files. \n\n## Installation\n\n```bash\npip install osmx\n```\n\n## Usage\n\n[examples/read_way.py](examples/read_way.py) : Simple program: given a way ID, print the coordinates of its member nodes, its metadata and all the relations it directly belongs to.\n\n[examples/web_server.py](examples/web_server.py) Uses only the Python standard library; starts an HTTP server that takes a url like /way/WAY_ID and returns a GeoJSON feature for that OSM object. Shows example of how to descend into relation members. \n\n[examples/augmented_diff.py](examples/augmented_diff.py) Creates an [augmented diff](https://wiki.openstreetmap.org/wiki/Overpass_API/Augmented_Diffs) similar to those implemented by Overpass API, but limited to a single OsmChange (.osc) replication sequence file. Requires that the OSMX database represents the replication sequence state directly before that of the .OSC file.\n"
  },
  {
    "path": "python/examples/augmented_diff.py",
    "content": "from collections import namedtuple\nfrom datetime import datetime\nimport copy\nimport sys\nimport xml.etree.ElementTree as ET\nimport xml.dom.minidom\nimport osmx\n\n# generates an augmented diff for an OSC (OsmChange) file.\n# see https://wiki.openstreetmap.org/wiki/Overpass_API/Augmented_Diffs\n# this is intended to be run before the OSC file is applied to the osmx file.\n\nif len(sys.argv) < 4:\n    print(\"Usage: augmented_diff.py OSMX_FILE OSC_FILE OUTPUT\")\n    exit(1)\n\n# 1st pass:\n# populate the collection of actions\n# create dictionary from osm_type/osm_id to action\n# e.g. node/12345 > Node()\nAction = namedtuple('Action',['type','element'])\nactions = {}\n\nosc = ET.parse(sys.argv[2]).getroot()\nfor block in osc:\n    for e in block:\n        action_key = e.tag + \"/\" + e.get(\"id\")\n        # Always ensure we're updating to the latest version of an object for the diff\n        if action_key in actions:\n            newest_version = int(actions[action_key].element.get(\"version\"))\n            e_version = int(e.get(\"version\"))\n            if e_version < newest_version:\n                print(\"Found element {}, version {} is less than previously visited version {}\"\n                      .format(action_key, e_version, newest_version))\n                continue\n        actions[action_key] = Action(block.tag,e)\n\n\naction_list = [v for k,v in actions.items()]\n\nenv = osmx.Environment(sys.argv[1])\nwith osmx.Transaction(env) as txn:\n    locations = osmx.Locations(txn)\n    nodes = osmx.Nodes(txn)\n    ways = osmx.Ways(txn)\n    relations = osmx.Relations(txn)\n\n    def not_in_db(elem):\n        elem_id = int(elem.get('id'))\n        if elem.tag == 'node':\n            return not locations.get(elem_id)\n        elif elem.tag == 'way':\n            return not ways.get(elem_id)\n        else:\n            return not relations.get(elem_id)\n\n    def get_lat_lon(ref, use_new):\n        if use_new and ('node/' + ref in actions):\n            node = actions['node/' + ref]\n            return (node.element.get('lon'),node.element.get('lat'))\n        else:\n            ll = locations.get(ref)\n            return (str(ll[1]),str(ll[0]))\n\n    def set_old_metadata(elem):\n        elem_id = int(elem.get('id'))\n        if elem.tag == 'node':\n            o = nodes.get(elem_id)\n        elif elem.tag == 'way':\n            o = ways.get(elem_id)\n        else:\n            o = relations.get(elem_id)\n        if o:\n            elem.set('version',str(o.metadata.version))\n            elem.set('user',str(o.metadata.user))\n            elem.set('uid',str(o.metadata.uid))\n            # convert to ISO8601 timestamp\n            timestamp = o.metadata.timestamp\n            formatted = datetime.utcfromtimestamp(timestamp).isoformat()\n            elem.set('timestamp',formatted + 'Z')\n            elem.set('changeset',str(o.metadata.changeset))\n        else:\n            # tagless nodes\n            try:\n                version = locations.get(elem_id)[2]\n            except TypeError:\n                # If loc is None here, it typically means that a node was created and\n                # then deleted within the diff interval. In the future we should\n                # remove these operations from the diff entirely.\n                print(\"No old loc found for tagless node {}\".format(elem_id))\n                version = \"?\"\n            elem.set('version',str(version))\n            elem.set('user','?')\n            elem.set('uid','?')\n            elem.set('timestamp','?')\n            elem.set('changeset','?')\n\n    # 2nd pass\n    # create an XML tree of actions with old and new sub-elements\n\n    o = ET.Element('osm')\n    o.set(\"version\",\"0.6\")\n    o.set(\"generator\",\"Overpass API not used, but achavi detects it at the start of string; OSMExpress/python/examples/augmented_diff.py\")\n\n    for action in action_list:\n        a = ET.SubElement(o,'action')\n        a.set('type',action.type)\n        old = ET.SubElement(a,'old')\n        new = ET.SubElement(a,'new')\n        if action.type == 'create':\n            new.append(action.element)\n        elif action.type == 'delete':\n            # get the old metadata\n            modified = copy.deepcopy(action.element)\n            set_old_metadata(action.element)\n            old.append(action.element)\n\n            modified.set('visible','false')\n            for child in list(modified):\n                modified.remove(child)\n            # TODO the Geofabrik deleted elements seem to have the old metadata and old version numbers\n            # check if this is true of planet replication files\n            new.append(modified)\n        else:\n            obj_id = action.element.get('id')\n            if not_in_db(action.element):\n                # Typically occurs when:\n                #  1. TODO: An element is deleted but then restored later,\n                #     which should remain a modify operation. This will be difficult\n                #     because objects are not retained in OSMX when deleted in OSM.\n                #  2. OK: An element was created and then modified within the diff interval\n                print(\"Could not find {0} {1} in db, changing to create\".format(action.element.tag,action.element.get('id')))\n                new.append(action.element)\n                a.set('type','create')\n            else:\n                prev_version = ET.SubElement(old,action.element.tag)\n                prev_version.set('id',obj_id)\n                set_old_metadata(prev_version)\n                if action.element.tag == 'node':\n                    ll = get_lat_lon(obj_id,False)\n                    prev_version.set('lon',ll[0])\n                    prev_version.set('lat',ll[1])\n                elif action.element.tag == 'way':\n                    way = ways.get(obj_id)\n                    for n in way.nodes:\n                        node = ET.SubElement(prev_version,'nd')\n                        node.set('ref',str(n))\n                    it = iter(way.tags)\n                    for t in it:\n                        tag = ET.SubElement(prev_version,'tag')\n                        tag.set('k',t)\n                        tag.set('v',next(it))\n                else:\n                    relation = relations.get(obj_id)\n                    for m in relation.members:\n                        member = ET.SubElement(prev_version,'member')\n                        member.set('ref',str(m.ref))\n                        member.set('role',m.role)\n                        member.set('type',str(m.type))\n                    it = iter(relation.tags)\n                    for t in it:\n                        tag = ET.SubElement(prev_version,'tag')\n                        tag.set('k',t)\n                        tag.set('v',next(it))\n                new.append(action.element)\n\n    # 3rd pass\n    # Augment the created \"old\" and \"new\" elements\n    def augment_nd(nd,use_new):\n        ll = get_lat_lon(nd.get('ref'),use_new)\n        nd.set('lon',ll[0])\n        nd.set('lat',ll[1])\n\n    def augment_member(mem,use_new):\n        if mem.get('type') == 'way':\n            ref = mem.get('ref')\n            if use_new and ('way/' + ref in actions):\n                way = actions['way/' + ref]\n                for child in way.element:\n                    if child.tag == 'nd':\n                        ll = get_lat_lon(child.get('ref'),use_new)\n                        nd = ET.SubElement(mem,'nd')\n                        nd.set('lon',ll[0])\n                        nd.set('lat',ll[1])\n            else:\n                for node_id in ways.get(ref).nodes:\n                    ll = get_lat_lon(str(node_id),use_new)\n                    nd = ET.SubElement(mem,'nd')\n                    nd.set('lon',ll[0])\n                    nd.set('lat',ll[1])\n        elif mem.get('type') == 'node':\n            ll = get_lat_lon(mem.get('ref'),use_new)\n            mem.set('lon',ll[0])\n            mem.set('lat',ll[1])\n\n    def augment(elem,use_new):\n        if len(elem) == 0:\n            return\n        if elem[0].tag == 'way':\n            for child in elem[0]:\n                if child.tag == 'nd':\n                    augment_nd(child,use_new)\n        elif elem[0].tag == 'relation':\n            for child in elem[0]:\n                if child.tag == 'member':\n                    augment_member(child,use_new)\n\n    for elem in o:\n        try:\n            augment(elem[0],False)\n            augment(elem[1],True)\n        except (TypeError, AttributeError):\n            print(\"Changed {0} {1} is incomplete in db\".format(elem[1][0].tag, elem[1][0].get('id')))\n\n\n    # 4th pass:\n    # find changes that propagate to referencing elements:\n    # when a node's location changes, that propagates to any ways it belongs to, relations it belongs to\n    # and also any relations that the way belongs to\n    # when a way's member list changes, it propagates to any relations it belongs to\n    node_way = osmx.NodeWay(txn)\n    node_relation = osmx.NodeRelation(txn)\n    way_relation = osmx.WayRelation(txn)\n\n    affected_ways = set()\n    affected_relations = set()\n    for elem in o:\n        if elem.get('type') == 'modify':\n            if elem[0][0].tag == 'node':\n                old_loc = (elem[0][0].get('lat'),elem[0][0].get('lon'))\n                new_loc = (elem[1][0].get('lat'),elem[1][0].get('lon'))\n                if old_loc != new_loc:\n                    node_id = elem[0][0].get('id')\n                    for rel in node_relation.get(node_id):\n                        if 'relation/' + str(rel) not in actions:\n                            affected_relations.add(rel)\n                    for way in node_way.get(node_id):\n                        if 'way/' + str(way) not in actions:\n                            affected_ways.add(way)\n                            for rel in way_relation.get(way):\n                                if 'relation/' + str(rel) not in actions:\n                                    affected_relations.add(rel)\n\n            elif elem[0][0].tag == 'way':\n                old_way = [nd.get('ref') for nd in elem[0][0] if nd.tag == 'nd']\n                new_way = [nd.get('ref') for nd in elem[1][0] if nd.tag == 'nd']\n                if old_way != new_way:\n                    way_id = elem[0][0].get('id')\n                    for rel in way_relation.get(way_id):\n                        if 'relation/' + str(rel) not in actions:\n                            affected_relations.add(rel)\n\n    for w in affected_ways:\n        a = ET.SubElement(o,'action')\n        a.set('type','modify')\n        old = ET.SubElement(a,'old')\n        way_element = ET.SubElement(old,'way')\n        way_element.set('id',str(w))\n        set_old_metadata(way_element)\n        way = ways.get(w)\n        for n in way.nodes:\n            node = ET.SubElement(way_element,'nd')\n            node.set('ref',str(n))\n        it = iter(way.tags)\n        for t in it:\n            tag = ET.SubElement(way_element,'tag')\n            tag.set('k',t)\n            tag.set('v',next(it))\n\n        new = ET.SubElement(a,'new')\n        new_elem = copy.deepcopy(way_element)\n        new.append(new_elem)\n        augment(old,False)\n        augment(new,True)\n\n    for r in affected_relations:\n        old = ET.Element('old')\n        relation_element = ET.SubElement(old,'relation')\n        relation_element.set('id',str(r))\n        set_old_metadata(relation_element)\n        relation = relations.get(r)\n\n        for m in relation.members:\n            member = ET.SubElement(relation_element,'member')\n            member.set('ref',str(m.ref))\n            member.set('role',m.role)\n            member.set('type',str(m.type))\n        it = iter(relation.tags)\n        for t in it:\n            tag = ET.SubElement(relation_element,'tag')\n            tag.set('k',t)\n            tag.set('v',next(it))\n\n        new_elem = copy.deepcopy(relation_element)\n        new = ET.Element('new')\n        new.append(new_elem)\n        try:\n            augment(old,False)\n            augment(new,True)\n            a = ET.SubElement(o,'action')\n            a.set('type','modify')\n            a.append(old)\n            a.append(new)\n        except (TypeError, AttributeError):\n            print(\"Affected relation {0} is incomplete in db\".format(r))\n\n# 5th pass: add bounding boxes\n\nclass Bounds:\n    def __init__(self):\n        self.minx = 180\n        self.maxx = -180\n        self.miny = 90\n        self.maxy = -90\n\n    def add(self,x,y):\n        if x < self.minx:\n            self.minx = x\n        if x > self.maxx:\n            self.maxx = x\n        if y < self.miny:\n            self.miny = y\n        if y > self.maxy:\n            self.maxy = y\n\n    def elem(self):\n        e = ET.Element('bounds')\n        e.set('minlat',str(self.miny))\n        e.set('minlon',str(self.minx))\n        e.set('maxlat',str(self.maxy))\n        e.set('maxlon',str(self.maxx))\n        return e\n\nfor child in o:\n    if len(child[0]) > 0:\n        osm_obj = child[0][0]\n        nds = osm_obj.findall('.//nd')\n        if nds:\n            bounds = Bounds()\n            for nd in nds:\n                bounds.add(float(nd.get('lon')),float(nd.get('lat')))\n            osm_obj.insert(0,bounds.elem())\n\n# 6th pass\n# sort by node, way, relation\n# within each, sorted by increasing ID\ndef sort_by_type(x):\n    if x[1][0].tag == 'node':\n        return 1\n    elif x[1][0].tag == 'way':\n        return 2\n    return 3\n\no[:] = sorted(o, key=lambda x:int(x[1][0].get('id')))\no[:] = sorted(o, key=sort_by_type)\n\nnote = ET.Element('note')\nnote.text = \"The data included in this document is from www.openstreetmap.org. The data is made available under ODbL.\"\no.insert(0,note)\n\n# pretty print helper\n# http://effbot.org/zone/element-lib.htm#prettyprint\ndef indent(elem, level=0):\n    i = \"\\n\" + level*\"  \"\n    if len(elem):\n        if not elem.text or not elem.text.strip():\n            elem.text = i + \"  \"\n        if not elem.tail or not elem.tail.strip():\n            elem.tail = i\n        for elem in elem:\n            indent(elem, level+1)\n        if not elem.tail or not elem.tail.strip():\n            elem.tail = i\n    else:\n        if level and (not elem.tail or not elem.tail.strip()):\n            elem.tail = i\n\nindent(o)\nET.ElementTree(o).write(sys.argv[3])\n"
  },
  {
    "path": "python/examples/read_way.py",
    "content": "import sys\nimport osmx\n\nif len(sys.argv) <= 1:\n    print(\"Usage: read_way.py OSMX_FILE WAY_ID\")\n    exit(1)\n\nenv  = osmx.Environment(sys.argv[1])\nwith osmx.Transaction(env) as txn:\n    locations = osmx.Locations(txn)\n    nodes = osmx.Nodes(txn)\n    ways = osmx.Ways(txn)\n    way_relation = osmx.WayRelation(txn)\n\n    way_id = sys.argv[2]\n    way = ways.get(way_id)\n\n    for node_id in way.nodes:\n        print(locations.get(node_id))\n\n    print(osmx.tag_dict(way.tags))\n    print(way.metadata)\n    print(way_relation.get(way_id))\n"
  },
  {
    "path": "python/examples/web_server.py",
    "content": "import json\nimport sys\nfrom http.server import BaseHTTPRequestHandler, HTTPServer\nimport osmx\n\nif len(sys.argv) <= 1:\n    print(\"Usage: web_server.py OSMX_FILE\")\n\nenv  = osmx.Environment(sys.argv[1])\n\n# simple implementation of OSM GeoJSON API using osmx + Python standard library.\n# not production ready!\n\nclass Handler(BaseHTTPRequestHandler):\n    def do_GET(self):\n        parts = self.path.split(\"/\")\n        if len(parts) < 3:\n            self.send_response(400)\n            self.wfile.write(\"bad request\".encode('utf-8'))\n            return\n\n        self.send_response(200)\n        self.send_header('Content-type','application/json')\n        self.end_headers()\n\n        osm_id = parts[2]\n        resp = {'type':'Feature','properties':{}}\n        with osmx.Transaction(env) as txn:\n            locations = osmx.Locations(txn)\n\n            def coord(node_id):\n                loc = locations.get(node_id)\n                return (loc[1],loc[0])\n\n            nodes = osmx.Nodes(txn)\n            if parts[1] == \"node\":\n                node = nodes.get(osm_id)\n                if node:\n                    for k,v in osmx.tag_dict(node.tags).items():\n                        resp['properties'][k] = v\n\n                resp['geometry'] = {'type':'Point','coordinates':coord(osm_id)}\n            elif parts[1] == \"way\":\n                ways = osmx.Ways(txn)\n                way = ways.get(osm_id)\n                for k,v in osmx.tag_dict(way.tags).items():\n                    resp['properties'][k] = v\n\n                coords = [coord(node_id) for node_id in way.nodes]\n                resp['geometry'] = {'type':'LineString','coordinates':coords}\n            elif parts[1] == \"relation\":\n                ways = osmx.Ways(txn)\n                relations = osmx.Relations(txn)\n                relation = relations.get(osm_id)\n                for k,v in osmx.tag_dict(relation.tags).items():\n                    resp['properties'][k] = v\n\n                geometries = []\n                def add_relation_geoms(r):\n                    for member in r.members:\n                        if member.type == 'node':\n                            geometries.append({'type':'Point','coordinates':locations.get(member.ref)})\n                        if member.type == 'way':\n                            way = ways.get(member.ref)\n                            coords = [coord(node_id) for node_id in way.nodes]\n                            geometries.append({'type':'LineString','coordinates':coords})\n                        if member.type == 'relation':\n                            add_relation_geoms(relations.get(member.ref))\n\n                add_relation_geoms(relation)\n                resp['geometry'] = {'type':'GeometryCollection','geometries':geometries}\n\n        self.wfile.write(json.dumps(resp).encode('utf-8'))\n\nprint('Server listening on port 8000...')\nhttpd = HTTPServer(('', 8000), Handler)\nhttpd.serve_forever()\n"
  },
  {
    "path": "python/osmx/__init__.py",
    "content": "from .osmx import *"
  },
  {
    "path": "python/osmx/messages.capnp",
    "content": "@0xd3a7e843a9c03421;\n\nstruct Metadata {\n  version @0 :UInt32;\n  timestamp @1 :UInt64;\n  changeset @2 :UInt32;\n  uid @3 :UInt32;\n  user @4 :Text;\n}\n\nstruct Node {\n  tags @0 :List(Text);\n  metadata @1 :Metadata;\n}\n\nstruct Way {\n  nodes @0 :List(UInt64);\n  tags @1 :List(Text);\n  metadata @2 :Metadata;\n}\n\nstruct RelationMember {\n  ref @0 :UInt64;\n  type @1 :Type;\n  role @2 :Text;\n\n  enum Type {\n    node @0;\n    way @1;\n    relation @2;\n  }\n}\n\nstruct Relation {\n  tags @0 :List(Text);\n  members @1 :List(RelationMember);\n  metadata @2 :Metadata;\n}\n"
  },
  {
    "path": "python/osmx/osmx.py",
    "content": "import sys\nimport os\nimport lmdb\nimport capnp\n\ncapnp.remove_import_hook()\nmessages_capnp = capnp.load(os.path.join(os.path.dirname(__file__), 'messages.capnp'))\n\ndef tag_dict(tag_list):\n    it = enumerate(tag_list)\n    d = {}\n    for x in it:\n        d[x[1]] = next(it)[1]\n    return d\n\nclass Environment:\n    def __init__(self,fname):\n        self._handle = lmdb.Environment(fname,max_dbs=10,readonly=True,readahead=False,subdir=False)\n\nclass Transaction:\n    def __init__(self,env):\n        self.env = env\n        self._handle = lmdb.Transaction(self.env._handle, buffers=True)\n\n    def __enter__(self,*args,**kwargs):\n        self._handle.__enter__(*args,**kwargs)\n        return self\n\n    def __exit__(self,*args,**kwargs):\n        self._handle.__exit__(*args,**kwargs)\n\nclass Index:\n    def __init__(self):\n        pass\n\nclass Index:\n    def __init__(self,txn,name):\n        self.txn = txn\n        self._handle = txn.env._handle.open_db(name,txn=txn._handle,integerkey=True,create=False,dupsort=True,integerdup=True,dupfixed=True)\n\n    def get(self,obj_id):\n        cursor = self.txn._handle.cursor(self._handle)\n        cursor.set_key(int(obj_id).to_bytes(8,byteorder=sys.byteorder))\n        retval = [int.from_bytes(data,byteorder=sys.byteorder,signed=False) for data in cursor.iternext_dup()]\n        cursor.close()\n        return retval\n\nclass Table:\n    def __init__(self,txn,name):\n        self.txn = txn\n        self._handle = txn.env._handle.open_db(name,txn=txn._handle,integerkey=True,create=False)\n\n    def _get_bytes(self,elem_id):\n        return self.txn._handle.get(int(elem_id).to_bytes(8,byteorder=sys.byteorder),db=self._handle)\n\nclass Locations(Table):\n    def __init__(self,txn):\n        super().__init__(txn,b'locations')\n\n    def get(self,node_id):\n        msg = self._get_bytes(node_id)\n        if not msg:\n            return None\n        return (\n            int.from_bytes(msg[4:8],byteorder=sys.byteorder,signed=True) / 10000000,\n            int.from_bytes(msg[0:4],byteorder=sys.byteorder,signed=True) / 10000000,\n            int.from_bytes(msg[8:12],byteorder=sys.byteorder,signed=False)\n            )\n\nclass Nodes(Table):\n    def __init__(self,txn):\n        super().__init__(txn,b'nodes')\n\n    def get(self,node_id):\n        msg = self._get_bytes(node_id)\n        if not msg:\n            return None\n        return messages_capnp.Node.from_bytes(msg)\n\nclass Ways(Table):\n    def __init__(self,txn):\n        super().__init__(txn,b'ways')\n\n    def get(self,way_id):\n        msg = self._get_bytes(way_id)\n        if not msg:\n            return None\n        return messages_capnp.Way.from_bytes(msg)\n\nclass Relations(Table):\n    def __init__(self,txn):\n        super().__init__(txn,b'relations')\n\n    def get(self,relation_id):\n        msg = self._get_bytes(relation_id)\n        if not msg:\n            return None\n        return messages_capnp.Relation.from_bytes(msg)\n\nclass NodeWay(Index):\n    def __init__(self,txn):\n        super().__init__(txn,b'node_way')\n\nclass NodeRelation(Index):\n    def __init__(self,txn):\n        super().__init__(txn,b'node_relation')\n\nclass WayRelation(Index):\n    def __init__(self,txn):\n        super().__init__(txn,b'way_relation')\n\nclass RelationRelation(Index):\n    def __init__(self,txn):\n        super().__init__(txn,b'relation_relation')\n"
  },
  {
    "path": "python/setup.py",
    "content": "import setuptools\n\nwith open(\"README.md\", \"r\") as fh:\n    long_description = fh.read()\n\nrequirements = [\n    'lmdb~=1.4.1',\n    'pycapnp~=2.0.0',\n]\n\nsetuptools.setup(\n    name='osmx',\n    version='0.0.5',\n    author=\"Brandon Liu\",\n    author_email='brandon@protomaps.com',\n    description='Read OSM Express (.osmx) database files.',\n    license=\"BSD-2-Clause\",\n    long_description=long_description,\n    long_description_content_type=\"text/markdown\",\n    url=\"https://github.com/bdon/OSMExpress\",\n    packages=setuptools.find_packages(),\n    classifiers=[\n        \"Programming Language :: Python :: 3\",\n        \"License :: OSI Approved :: BSD License\",\n        \"Operating System :: OS Independent\",\n    ],\n    install_requires = requirements,\n    requires_python='>=3.0',\n    package_data={'osmx':['messages.capnp']}\n)\n"
  },
  {
    "path": "src/cmd.cpp",
    "content": "#include <vector>\n#include \"osmx/storage.h\"\n#include \"osmx/cmd.h\"\n#include \"osmx/util.h\"\n\nusing namespace std;\nusing namespace osmx;\n\nvoid printHelp() {\n  cout << \"Usage: osmx COMMAND [ARG...]\" << endl << endl;\n  cout << \"COMMANDS:\" << endl;\n  cout << \" expand   Convert an OSM PBF or XML to an osmx database.\" << endl;\n  cout << \" extract  Create a regional extract PBF from an osmx database.\" << endl;\n  cout << \" update   Apply an OSM changeset to an osmx database.\" << endl;\n  cout << \" query    Look up objects by ID in an osmx database.\" << endl;\n  exit(1);\n}\n\nvoid printQueryHelp() {\n  cout << \"USAGE:\" << endl;\n  cout << \" osmx query OSMX_FILE [OPTIONS]\" << endl << endl;\n  cout << \"EXAMPLES:\" << endl;\n  cout << \" osmx query planet.osmx\" << endl;\n  cout << \" osmx query planet.osmx way 123456\" << endl << endl;\n  cout << \"OPTIONS:\" << endl;\n  cout << \" none specified: print table statistics.\" << endl;\n  cout << \" [node,way,relation] ID: print OSM object\" << endl;\n  cout << \" timestamp: print data timestamp\" << endl;\n  cout << \" seqnum: print replication seqence number\" << endl;\n  exit(1);\n}\n\nint main(int argc, char* argv[]) {\n  vector<string> args(argv, argv+argc);\n  auto db_cmds = {\"stat\",\"node\",\"way\",\"relation\"};\n  if (argc < 2) {\n    printHelp();\n  }\n  if (args[1] == \"expand\") {\n    cmdExpand(argc,argv);\n  } else if (args[1] == \"extract\") {\n    cmdExtract(argc,argv);\n  } else if (args[1] == \"update\") {\n    cmdUpdate(argc,argv);\n  } else if (args[1] == \"query\") {\n    if (args.size() == 2) {\n      printQueryHelp();\n    }\n    MDB_env* env = db::createEnv(args[2]);\n    MDB_txn* txn;\n    CHECK_LMDB(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));\n\n    if (args.size() >= 4) {\n      if (args[3] == \"node\") {\n        auto id = stol(args[4]);\n        auto location = db::Locations(txn).get(id);\n        cout << location.coords << endl;\n        auto tags = db::Elements(txn,\"nodes\").getReader(id).getRoot<Node>().getTags();\n        for (int i = 0; i < tags.size() / 2; i++) {\n          cout << tags[i*2].cStr() << \"=\" << tags[i*2+1].cStr() << \"\\n\";\n        }\n      } else if (args[3] == \"way\") {\n        db::Elements ways(txn,\"ways\");\n        auto message = ways.getReader(stol(args[4]));\n        auto way = message.getRoot<Way>();\n        for (auto node_id : way.getNodes()) {\n          cout << node_id << \" \";\n        }\n        cout << endl;\n        auto tags = way.getTags();\n        for (int i = 0; i < tags.size() / 2; i++) {\n          cout << tags[i*2].cStr() << \"=\" << tags[i*2+1].cStr() << \" \";\n        }\n        cout << endl;\n      } else if (args[3] == \"relation\") {\n        db::Elements relations(txn,\"relations\");\n        uint64_t relation_id = stol(args[4]);\n        auto message = relations.getReader(relation_id);\n        auto relation = message.getRoot<Relation>();\n        auto tags = relation.getTags();\n        for (int i = 0; i < tags.size() / 2; i++) {\n          cout << tags[i*2].cStr() << \"=\" << tags[i*2+1].cStr() << \" \";\n        }\n        auto members = relation.getMembers();\n        for (auto const &member : members) {\n          cout << member.getRef() << endl;\n        }\n      } else if (args[3] == \"timestamp\") {\n        db::Metadata metadata(txn);\n        cout << metadata.get(\"osmosis_replication_timestamp\") << endl;\n      } else if (args[3] == \"seqnum\") {\n        db::Metadata metadata(txn);\n        cout << metadata.get(\"osmosis_replication_sequence_number\") << endl;\n      } else {\n        printQueryHelp();\n      }\n    } else {\n      auto tables = {\"locations\",\"nodes\",\"ways\",\"relations\",\"cell_node\",\"node_way\",\"node_relation\",\"way_relation\",\"relation_relation\"};\n      for (auto const &table : tables) {\n        MDB_dbi dbi;\n        CHECK_LMDB(mdb_dbi_open(txn, table, MDB_INTEGERKEY, &dbi));\n        MDB_stat stat;\n        CHECK_LMDB(mdb_stat(txn,dbi,&stat));\n        cout << table << \": \" << stat.ms_entries << endl;\n      }\n\n      db::Metadata metadata(txn);\n      cout << \"Timestamp: \" << metadata.get(\"osmosis_replication_timestamp\") << endl;\n      cout << \"Sequence #: \" << metadata.get(\"osmosis_replication_sequence_number\") << endl;\n    }\n\n    mdb_env_sync(env,true);\n    mdb_env_close(env);\n  } else {\n    printHelp();\n  }\n}\n"
  },
  {
    "path": "src/expand.cpp",
    "content": "#include <iomanip>\n#include <fstream>\n#include \"osmium/handler.hpp\"\n#include \"osmium/visitor.hpp\"\n#include \"osmium/io/any_input.hpp\"\n#include \"osmium/util/progress_bar.hpp\"\n#include \"osmium/io/reader_with_progress_bar.hpp\"\n#include \"cxxopts.hpp\"\n#include \"kj/io.h\"\n#include \"capnp/message.h\"\n#include \"capnp/serialize.h\"\n#include \"s2/s2latlng.h\"\n#include \"s2/s2cell_id.h\"\n#include \"osmx/storage.h\"\n#include \"osmx/util.h\"\n#include \"osmx/messages.capnp.h\"\n\nusing namespace std;\nusing namespace osmx;\n\n\ntypedef std::pair<uint64_t, uint64_t> Pair; \ntypedef std::pair<Pair, uint64_t> pqelem; \n\nclass SortReader {\n  public:\n  SortReader(std::string filename) : mStream(filename, std::ios::in | std::ios::binary) { }\n\n  bool getNext() {\n    mStream.read((char *)&entry,sizeof(uint64_t) *2);\n    if (mStream.eof()) return false;\n    return true;\n  }\n\n  Pair entry;\n\n  private:\n  std::ifstream mStream;\n};\n\nclass Sorter {\nint MAX_RUN_SIZE = 64000000; // about 1 GB\npublic:\n  Sorter(std::string tempDir,std::string name) : mTempDir(tempDir), mName(name) { \n    mStorage.reserve(MAX_RUN_SIZE);\n  }\n\n  void put(uint64_t from, uint64_t to) {\n    mStorage.push_back(std::make_pair(from,to));\n    if (mStorage.size() > MAX_RUN_SIZE) persist();\n  }\n\n  void put(S2CellId from, uint64_t to) {\n    put(from.id(),to);\n  }\n\n  void persist() {\n    if (mStorage.size() == 0) return;\n    sort(mStorage.begin(),mStorage.end());\n    int runNumber = mSavedRuns.size();\n    std::ofstream stream;\n    std::stringstream fname;\n    fname << mTempDir << \"/\" << std::setw(2) << std::setfill('0') << mName << \"_\" << std::setw(3) << std::setfill('0') << runNumber << \".run\";\n    stream.open(fname.str(),std::ios::binary);\n    for (auto const &entry: mStorage) {\n      stream.write((char *)&entry.first,sizeof(uint64_t));\n      stream.write((char *)&entry.second,sizeof(uint64_t));\n    }\n    stream.close();\n    mStorage.clear();\n    mStorage.reserve(MAX_RUN_SIZE);\n    mSavedRuns.push_back(fname.str());\n  }\n\n  void writeDb(MDB_env *env) {\n    persist();\n\n    Timer timer(\"External sort \" + mName);\n    osmium::ProgressBar progress{MAX_RUN_SIZE * mSavedRuns.size(), osmium::isatty(2)};\n    int read = 0;\n    std::priority_queue<pqelem, std::vector<pqelem>, std::greater<pqelem>> q;\n    std::vector<SortReader> readers;\n    db::IndexWriter index(env,mName);\n\n    for (int i = 0; i < mSavedRuns.size(); i++) {\n      readers.emplace_back(mSavedRuns[i]);  \n      if (readers[i].getNext()) q.push(make_pair(readers[i].entry, i));\n    }\n\n    Pair last;\n\n    while (q.size() > 0) {\n      pqelem pair = q.top();\n      auto idx = pair.second;\n      if (pair.first != last) {\n        if (pair.first.first != last.first) index.put(pair.first.first,pair.first.second,MDB_APPEND);\n        else index.put(pair.first.first,pair.first.second,MDB_APPENDDUP);\n      }\n      q.pop();\n      if (readers[idx].getNext()) q.push(make_pair(readers[idx].entry, idx));\n      progress.update(read++);\n      last = pair.first;\n    }\n\n    index.commit();\n\n    progress.done();\n\n    for (auto const &run : mSavedRuns) {\n      remove(run.c_str());\n    }\n  }\n\nprivate:\n  Sorter( const Sorter& ) = delete;\n  Sorter& operator=( const Sorter& ) = delete;\n  std::vector<std::pair<uint64_t,uint64_t>> mStorage;\n  int mRunNumber = 0;\n  std::vector<std::string> mSavedRuns;\n  std::string mTempDir;\n  std::string mName;\n};\n\nclass Handler: public osmium::handler::Handler {\n  public:\n  Handler(MDB_env *env, MDB_txn *txn,string tempDir) : \n    mEnv(env),\n    mTxn(txn),\n    mCellNode(tempDir,\"cell_node\"), \n    mLocations(txn), \n    mNodes(txn,\"nodes\"),\n    mWays(txn,\"ways\"),\n    mRelations(txn,\"relations\"),\n    mNodeWay(tempDir,\"node_way\"),\n    mNodeRelation(tempDir,\"node_relation\"),\n    mWayRelation(tempDir,\"way_relation\"),\n    mRelationRelation(tempDir,\"relation_relation\")\n  {\n  }\n\n  ~Handler() {\n    CHECK_LMDB(mdb_txn_commit(mTxn));\n    mCellNode.writeDb(mEnv);\n    mNodeWay.writeDb(mEnv);\n    mNodeRelation.writeDb(mEnv);\n    mWayRelation.writeDb(mEnv);\n    mRelationRelation.writeDb(mEnv);\n  }\n\n  void node(const osmium::Node& node) {\n    mLocations.put(node.id(), db::Location{node.location(),(int32_t)node.version()},MDB_APPEND);\n    auto loc = node.location();\n    auto ll = S2LatLng::FromDegrees(loc.lat(),loc.lon());\n    auto cell = S2CellId(ll).parent(CELL_INDEX_LEVEL);\n    mCellNode.put(cell,node.id());\n\n    if (node.tags().size() > 0) {\n      ::capnp::MallocMessageBuilder message;\n      Node::Builder nodeMsg = message.initRoot<Node>();\n      setTags<Node::Builder>(node.tags(),nodeMsg);\n      auto metadata = nodeMsg.initMetadata();\n      metadata.setVersion(node.version());\n      metadata.setTimestamp(node.timestamp().seconds_since_epoch());\n      metadata.setChangeset(node.changeset());\n      metadata.setUid(node.uid());\n      metadata.setUser(node.user());\n      kj::VectorOutputStream output;\n      capnp::writeMessage(output,message);\n      mNodes.put(node.id(),output,MDB_APPEND);\n    }\n  }\n\n  void way(const osmium::Way& way) {\n  \tauto const &nodes = way.nodes();\n    ::capnp::MallocMessageBuilder message;\n    Way::Builder wayMsg = message.initRoot<Way>();\n    wayMsg.initNodes(nodes.size());\n    int i = 0;\n    for (int i = 0; i < nodes.size(); i++) {\n       wayMsg.getNodes().set(i,nodes[i].ref());\n       mNodeWay.put(nodes[i].ref(),way.id());\n    }\n    setTags<Way::Builder>(way.tags(),wayMsg);\n    auto metadata = wayMsg.initMetadata();\n    metadata.setVersion(way.version());\n    metadata.setTimestamp(way.timestamp().seconds_since_epoch());\n    metadata.setChangeset(way.changeset());\n    metadata.setUid(way.uid());\n    metadata.setUser(way.user());\n    kj::VectorOutputStream output;\n    capnp::writeMessage(output,message);\n    mWays.put(way.id(),output,MDB_APPEND);\n  }\n\n  void relation(const osmium::Relation& relation) {\n    ::capnp::MallocMessageBuilder message;\n    Relation::Builder relationMsg = message.initRoot<Relation>();\n    setTags<Relation::Builder>(relation.tags(),relationMsg);\n    auto members = relationMsg.initMembers(relation.members().size());\n    int i = 0;\n    for (auto const &member : relation.members()) {\n      members[i].setRef(member.ref());\n      members[i].setRole(member.role());\n      if (member.type() == osmium::item_type::node) {\n        members[i].setType(RelationMember::Type::NODE);\n        mNodeRelation.put(member.ref(),relation.id());\n      }\n      else if (member.type() == osmium::item_type::way) {\n        members[i].setType(RelationMember::Type::WAY);\n        mWayRelation.put(member.ref(),relation.id());\n      }\n      else if (member.type() == osmium::item_type::relation) {\n        members[i].setType(RelationMember::Type::RELATION);\n        mRelationRelation.put(member.ref(),relation.id());\n      }\n      i++;\n    }\n    auto metadata = relationMsg.initMetadata();\n    metadata.setVersion(relation.version());\n    metadata.setTimestamp(relation.timestamp().seconds_since_epoch());\n    metadata.setChangeset(relation.changeset());\n    metadata.setUid(relation.uid());\n    metadata.setUser(relation.user());\n    kj::VectorOutputStream output;\n    capnp::writeMessage(output,message);\n    mRelations.put(relation.id(),output,MDB_APPEND);\n  }\n\n  private:\n  MDB_env* mEnv;\n  MDB_txn* mTxn;\n  Sorter mCellNode;\n  db::Locations mLocations;\n\n  db::Elements mNodes;\n  db::Elements mWays;\n  db::Elements mRelations;\n\n  Sorter mNodeWay;\n  Sorter mNodeRelation;\n  Sorter mWayRelation;\n  Sorter mRelationRelation;\n};\n\nvoid cmdExpand(int argc, char* argv[]) {\n  cxxopts::Options options(\"Expand\", \"Expand a a .osm.pbf into an .osmx file\");\n  options.add_options()\n    (\"v,verbose\", \"Verbose output\")\n    (\"cmd\", \"Command to run\", cxxopts::value<string>())\n    (\"input\", \"Input .pbf\", cxxopts::value<string>())\n    (\"output\", \"Output .osmx\", cxxopts::value<string>())\n  ;\n  options.parse_positional({\"cmd\",\"input\", \"output\"});\n  auto result = options.parse(argc, argv);\n\n  if (result.count(\"input\") == 0 || result.count(\"output\") == 0) {\n    cout << \"Usage: osmx expand OSM_FILE OSMX_FILE [OPTIONS]\" << endl << endl;\n    cout << \"OSM_FILE must be an OSM XML or PBF.\" << endl << endl;\n    cout << \"EXAMPLE:\" << endl;\n    cout << \" osmx expand planet_latest.osm.pbf planet.osmx\" << endl << endl;\n    cout << \"OPTIONS:\" << endl;\n    cout << \" --v,--verbose: verbose output.\" << endl;\n    exit(1);\n  }\n\n  string input =result[\"input\"].as<string>();\n  string output = result[\"output\"].as<string>();\n\n  Timer timer(\"convert\");\n  MDB_env* env = db::createEnv(output,true);\n  MDB_txn* txn;\n  CHECK_LMDB(mdb_txn_begin(env, NULL, 0, &txn));\n\n  const osmium::io::File input_file{input};\n  osmium::io::ReaderWithProgressBar reader{true, input_file, osmium::osm_entity_bits::object};\n\n  db::Metadata metadata(txn);\n  auto header = reader.header();\n\n  for (auto option : header) {\n    cout << option.first << \" \" << option.second << endl;\n  }\n  cout << \"Box: \" << header.box() << endl;\n  cout << \"Timestamp: \" << header.get(\"osmosis_replication_timestamp\") << endl;\n  cout << \"Sequence#: \" << header.get(\"osmosis_replication_sequence_number\") << endl;\n  metadata.put(\"osmosis_replication_timestamp\",header.get(\"osmosis_replication_timestamp\"));\n  metadata.put(\"osmosis_replication_sequence_number\",header.get(\"osmosis_replication_sequence_number\"));\n  metadata.put(\"import_filename\",input);\n  string tempDir = output + \"-temp\";\n  assert(mkdir(tempDir.c_str(),S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) == 0);\n\n  {\n    Timer insert(\"insert\");\n    Handler handler(env,txn,tempDir);\n    osmium::apply(reader, handler);\n  }\n\n  assert(rmdir(tempDir.c_str()) == 0);\n}\n"
  },
  {
    "path": "src/extract.cpp",
    "content": "#include <string>\n#include <fstream>\n#include \"s2/s2latlng.h\"\n#include \"s2/s2region_coverer.h\"\n#include \"s2/s2latlng_rect.h\"\n#include \"osmium/io/any_output.hpp\"\n#include \"osmium/util/progress_bar.hpp\"\n#include \"osmium/memory/callback_buffer.hpp\"\n#include \"osmium/builder/attr.hpp\"\n#include \"osmium/builder/osm_object_builder.hpp\"\n#include \"cxxopts.hpp\"\n#include \"nlohmann/json.hpp\"\n#include \"osmx/storage.h\"\n#include \"osmx/region.h\"\n#include \"osmx/util.h\"\n\nusing namespace std;\nusing namespace osmx;\n\nstruct ExportProgress {\n  string timestamp = \"\";\n  uint64_t cells_total = 0;\n  uint64_t cells_prog = 0;\n  uint64_t nodes_total = 0;\n  uint64_t nodes_prog = 0;\n  uint64_t elems_total = 0;\n  uint64_t elems_prog = 0;\n\n  void print() {\n    cout << \"{\\\"Timestamp\\\":\\\"\" << timestamp << \"\\\",\\\"CellsTotal\\\":\" << cells_total << \",\\\"CellsProg\\\":\" << cells_prog << \",\\\"NodesTotal\\\":\" << nodes_total << \",\\\"NodesProg\\\":\" << nodes_prog << \",\\\"ElemsTotal\\\":\" << elems_total << \",\\\"ElemsProg\\\":\" << elems_prog << \"}\" << endl;\n  }\n};\n\nclass ProgressSection {\n\npublic:\n  ProgressSection(ExportProgress &expprog, uint64_t &total, uint64_t &prog, uint64_t total_to_set, bool jsonOutput) : expprog(expprog), total(total), prog(prog), progressbar(total_to_set, osmium::isatty(2) && !jsonOutput), jsonOutput(jsonOutput) {\n    total = total_to_set;\n  }\n\n  ~ProgressSection() {\n    progressbar.done();\n  }\n\n  void tick() {\n    prog++;\n    if (prog - last_prog > (total / 100)) {\n      if (jsonOutput) expprog.print();\n      else progressbar.update(prog);\n      last_prog = prog;\n    }\n  }\n\nprivate:\n    osmium::ProgressBar progressbar;\n    uint64_t& prog;\n    uint64_t& total;\n    bool jsonOutput;\n    ExportProgress &expprog;\n    uint64_t last_prog = 0;\n};\n\nstatic bool endsWith(const std::string& str, const std::string& suffix)\n{\n    return str.size() >= suffix.size() && 0 == str.compare(str.size()-suffix.size(), suffix.size(), suffix);\n}\n\n// must be --bbox, --disc, --poly or --json\n// or --region\nvoid cmdExtract(int argc, char * argv[]) {\n  cxxopts::Options cmd_options(\"Extract\", \"Create an .osm.pbf from an .osmx file.\");\n  cmd_options.add_options()\n    (\"v,verbose\", \"Verbose output\")\n    (\"noUserData\", \"Don't include changeset,uid,user fields (GDPR compliance)\")\n    (\"jsonOutput\", \"JSON progress output\")\n    (\"cmd\", \"Command to run\", cxxopts::value<string>())\n    (\"osmx\", \"Input .osmx\", cxxopts::value<string>())\n    (\"output\", \"Output file, pbf or xml\", cxxopts::value<string>())\n    (\"bbox\", \"rectangle in minLat,minLon,maxLat,maxLon\", cxxopts::value<string>())\n    (\"disc\", \"disc in centerLat,centerLon,radiusDegrees\", cxxopts::value<string>())\n    (\"geojson\",\"geoJson of region\", cxxopts::value<string>())\n    (\"poly\",\"osmosis .poly of region\", cxxopts::value<string>())\n    (\"region\",\"file for region with extension .bbox, .disc, .json or .poly\", cxxopts::value<string>())\n    (\"expand\",\"buffer at this cell level\",cxxopts::value<int>())\n  ;\n  cmd_options.parse_positional({\"cmd\",\"osmx\",\"output\"});\n  auto result = cmd_options.parse(argc, argv);\n\n  if (result.count(\"osmx\") == 0 || result.count(\"output\") == 0) {\n    cout << \"Usage: osmx extract OSMX_FILE OUTPUT_FILE [OPTIONS]\" << endl << endl;\n    cout << \"EXAMPLE:\" << endl;\n    cout << \" osmx extract planet.osmx extract.osm.pbf --region region.json\" << endl << endl;\n    cout << \"OPTIONS:\" << endl;\n    cout << \" --v,--verbose: verbose output.\" << endl;\n    cout << \" --jsonOutput: log progress as JSON messages.\" << endl;\n    cout << \" --bbox MIN_LAT,MIN_LON,MAX_LAT,MAX_LON: region is lat/lon bbox\" << endl;\n    cout << \" --disc CENTER_LAT,CENTER_LON,R_DEGREES: region is disc\" << endl;\n    cout << \" --geojson GEOJSON: region is an areal GeoJSON feature or geometry\" << endl;\n    cout << \" --poly POLY: region is an Osmosis polygon\" << endl;\n    cout << \" --region FILE: text file with .bbox, .disc, .json or .poly extension\" << endl;\n    cout << \" --expand CELL_LEVEL: buffer region with cells at this level, <= 16\" << endl;\n    exit(1);\n  }\n\n  auto startTime = std::chrono::high_resolution_clock::now();\n  ExportProgress prog;\n  string err;\n\n  bool jsonOutput = result.count(\"jsonOutput\") > 0;\n  if (jsonOutput) prog.print();\n\n  bool includeUserData = result.count(\"noUserData\") == 0;\n\n  std::unique_ptr<Region> region;\n  if (result.count(\"bbox\")) region = std::make_unique<Region>(result[\"bbox\"].as<string>(),\"bbox\");\n  else if (result.count(\"disc\")) region = std::make_unique<Region>(result[\"disc\"].as<string>(),\"disc\");\n  else if (result.count(\"geojson\")) region = std::make_unique<Region>(result[\"geojson\"].as<string>(),\"geojson\");\n  else if (result.count(\"poly\")) region = std::make_unique<Region>(result[\"poly\"].as<string>(),\"poly\");\n  else if (result.count(\"region\")) {\n    auto fname = result[\"region\"].as<string>();\n    std::ifstream t(fname);\n    std::stringstream buffer;\n    buffer << t.rdbuf();\n    if (endsWith(fname,\"bbox\")) region = std::make_unique<Region>(buffer.str(),\"bbox\");\n    if (endsWith(fname,\"disc\")) region = std::make_unique<Region>(buffer.str(),\"disc\");\n    if (endsWith(fname,\"json\")) region = std::make_unique<Region>(buffer.str(),\"geojson\");\n    if (endsWith(fname,\"poly\")) region = std::make_unique<Region>(buffer.str(),\"poly\");\n  } else {\n    cout << \"No region specified.\" << endl;\n    exit(0);\n  }\n\n  S2RegionCoverer::Options options;\n  options.set_max_cells(1024);\n  options.set_max_level(CELL_INDEX_LEVEL);\n  S2RegionCoverer coverer(options);\n  S2CellUnion covering = region->GetCovering(coverer);\n\n  if (result.count(\"expand\")) {\n    int expand = result[\"expand\"].as<int>();\n    if (expand >= 0 && expand <= 16) {\n      covering.Expand(expand);\n    }\n  }\n\n  if (!jsonOutput) {\n    cout << \"Query cells: \" << covering.cell_ids().size() << endl;\n  }\n\n  roaring::Roaring64Map node_ids;\n  roaring::Roaring64Map way_ids;\n  roaring::Roaring64Map relation_ids;\n\n  MDB_env* env = db::createEnv(result[\"osmx\"].as<string>(),false);\n  MDB_txn* txn;\n  CHECK_LMDB(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));\n\n  db::Metadata metadata(txn);\n  auto timestamp = metadata.get(\"osmosis_replication_timestamp\");\n  prog.timestamp = timestamp;\n  if (!jsonOutput) {\n    cout << \"Snapshot timestamp is \" << prog.timestamp  << endl;\n  }\n\n  {\n    ProgressSection section(prog,prog.cells_total,prog.cells_prog,covering.size(),jsonOutput);\n    MDB_dbi dbi;\n    MDB_cursor *cursor;\n    CHECK_LMDB(mdb_dbi_open(txn, \"cell_node\", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi));\n    CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor));\n    for (auto cell_id : covering.cell_ids()) {\n      db::traverseCell(cursor,cell_id,node_ids);\n      section.tick();\n    }\n    mdb_cursor_close(cursor);\n  }\n\n  {\n    ProgressSection section(prog,prog.nodes_total,prog.nodes_prog,node_ids.cardinality(),jsonOutput);\n    MDB_dbi dbi;\n    MDB_cursor *cursor;\n    CHECK_LMDB(mdb_dbi_open(txn, \"node_way\", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi));\n    CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor));\n    for (auto const &node_id : node_ids) {\n      db::traverseReverse(cursor,node_id,way_ids);\n      section.tick();\n    }\n  }\n\n\n  // find all Relations that these nodes or Ways are a member of.\n  {\n    MDB_dbi dbi;\n    MDB_cursor *cursor;\n    CHECK_LMDB(mdb_dbi_open(txn, \"node_relation\", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi));\n    CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor));\n    for (auto const &node_id : node_ids) {\n      db::traverseReverse(cursor,node_id,relation_ids);\n    }\n  }\n\n  {\n    MDB_dbi dbi;\n    MDB_cursor *cursor;\n    CHECK_LMDB(mdb_dbi_open(txn, \"way_relation\", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi));\n    CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor));\n    for (auto const &way_id : way_ids) {\n      db::traverseReverse(cursor,way_id,relation_ids);\n    }\n  }\n\n  {\n    MDB_dbi dbi;\n    MDB_cursor *cursor;\n    CHECK_LMDB(mdb_dbi_open(txn, \"relation_relation\", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi));\n    CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor));\n    roaring::Roaring64Map discovered_relations;\n    roaring::Roaring64Map discovered_relations_2;\n\n    for (auto const &relation_id : relation_ids) {\n      db::traverseReverse(cursor,relation_id,discovered_relations);\n    }\n\n    relation_ids |= discovered_relations;\n\n    while(true) {\n      for (auto const &relation_id : discovered_relations) {\n        db::traverseReverse(cursor,relation_id,discovered_relations_2);\n      }\n      int num_discovered = 0;\n      for (auto discovered_relation_id : discovered_relations_2) {\n        if (relation_ids.addChecked(discovered_relation_id)) num_discovered++;\n      }\n      if (num_discovered == 0) break;\n      discovered_relations = discovered_relations_2;\n      discovered_relations_2.clear();\n    }\n  }\n\n  if (!jsonOutput) cout << \"Relations: \" << relation_ids.cardinality() << endl;\n  db::Elements ways(txn,\"ways\");\n  db::Elements relations(txn,\"relations\");\n\n  // make it Multipolygon-complete: go through all Relations, finding any that have tag type=multipolygon, and add to Ways\n\n  for (auto relation_id : relation_ids) {\n    auto reader = relations.getReader(relation_id);\n    Relation::Reader relation = reader.getRoot<Relation>();\n    auto tags = relation.getTags();\n    for (int i = 0; i < tags.size() / 2; i++) {\n      if (tags[i*2] == \"type\" && tags[i*2+1] == \"multipolygon\") {\n        for (auto const &member : relation.getMembers()) {\n          if (member.getType() == RelationMember::Type::WAY) {\n            auto ref = member.getRef();\n            // check if the way exists, because this may be an extract\n            if (ways.exists(ref)) way_ids.add(member.getRef());\n          }\n        }\n      }\n    }\n  }\n\n  if (!jsonOutput) cout << \"Ways: \" << way_ids.cardinality() << endl;\n\n  // make it Way-complete: go through all Ways and add in any missing Nodes.\n\n  {\n    for (auto way_id : way_ids) {\n      auto reader = ways.getReader(way_id);\n      Way::Reader way = reader.getRoot<Way>();\n      for (auto node_id : way.getNodes()) {\n        node_ids.add(node_id);\n      }\n    }\n  }\n\n  if (!jsonOutput) cout << \"Nodes: \" << node_ids.cardinality() << endl;\n\n  // start Write\n\n  osmium::io::Header header;\n  header.set(\"generator\", \"osmx\");\n  header.set(\"timestamp\", timestamp);\n  header.set(\"osmosis_replication_timestamp\", timestamp);\n\n  auto bounds = region->GetBounds();\n\n\n  // the box header is used by some applications,\n  // for example: zooming to an overview in QGIS.\n  // however, osmium only supports writing one PBF box header and it must be in the -180 to 180 lng, -90 to 90 lat range.\n  // valid input regions can cross the antimeridian, but the output header box is omitted as it can't represent the input.\n  if (bounds.lng_lo().degrees() < bounds.lng_hi().degrees()) {\n    header.add_box(osmium::Box(bounds.lng_lo().degrees(),bounds.lat_lo().degrees(),bounds.lng_hi().degrees(),bounds.lat_hi().degrees()));\n  }\n  osmium::io::Writer writer{result[\"output\"].as<string>(), header, osmium::io::overwrite::allow};\n  osmium::memory::CallbackBuffer cb;\n  cb.set_callback([&](osmium::memory::Buffer&& buffer) {\n    writer(std::move(buffer));\n  });\n\n  {\n    ProgressSection section(prog,prog.elems_total,prog.elems_prog,node_ids.cardinality() + way_ids.cardinality() + relation_ids.cardinality(),jsonOutput);\n\n    {\n      db::Locations location_index(txn);\n      db::Elements nodes_table(txn,\"nodes\");\n      for (auto node_id : node_ids) {\n        section.tick();\n        auto loc = location_index.get(node_id);\n        if (loc.is_undefined()) continue;\n\n        {\n          using namespace osmium::builder::attr; \n          osmium::builder::NodeBuilder node_builder{cb.buffer()};\n          node_builder.set_id(node_id);\n          node_builder.set_location(loc.coords);\n          node_builder.set_version(loc.version);\n\n          if (!nodes_table.exists(node_id)) continue;\n          auto reader = nodes_table.getReader(node_id);\n          Node::Reader node = reader.getRoot<Node>();\n          auto metadata = node.getMetadata();\n          node_builder.set_timestamp(metadata.getTimestamp());\n          if (includeUserData) {\n            node_builder.set_changeset(metadata.getChangeset());\n            node_builder.set_user(metadata.getUser());\n            node_builder.set_uid(metadata.getUid());\n          }\n\n          auto tags = node.getTags();\n          osmium::builder::TagListBuilder tag_builder{node_builder};\n          for (int i = 0; i < tags.size() / 2; i++) {\n            tag_builder.add_tag(tags[i*2],tags[i*2+1]);\n          }\n        }\n        cb.buffer().commit();\n        cb.possibly_flush();\n      }\n    }\n    \n    // Writing ways pass\n    {\n      for (auto way_id : way_ids) {\n        section.tick();\n        auto reader = ways.getReader(way_id);\n        Way::Reader way = reader.getRoot<Way>();\n\n        {\n          using namespace osmium::builder::attr; \n          osmium::builder::WayBuilder way_builder{cb.buffer()};\n          way_builder.set_id(way_id);\n          auto metadata = way.getMetadata();\n          way_builder.set_version(metadata.getVersion());\n          way_builder.set_timestamp(metadata.getTimestamp());\n          if (includeUserData) {\n            way_builder.set_changeset(metadata.getChangeset());\n            way_builder.set_user(metadata.getUser());\n            way_builder.set_uid(metadata.getUid());\n          }\n\n          {\n            osmium::builder::WayNodeListBuilder way_node_list_builder{way_builder};\n            for (auto node_id : way.getNodes()) {\n              way_node_list_builder.add_node_ref(node_id);\n            }\n          }\n\n          auto tags = way.getTags();\n          osmium::builder::TagListBuilder tag_builder{way_builder};\n          for (int i = 0; i < tags.size() / 2; i++) {\n            tag_builder.add_tag(tags[i*2],tags[i*2+1]);\n          }\n        }\n        cb.buffer().commit();\n        cb.possibly_flush();\n      }\n    }\n\n    {\n      for (auto relation_id : relation_ids) {\n        section.tick();\n        auto reader = relations.getReader(relation_id);\n        Relation::Reader relation = reader.getRoot<Relation>();\n\n        {\n          using namespace osmium::builder::attr; \n          osmium::builder::RelationBuilder relation_builder{cb.buffer()};\n          relation_builder.set_id(relation_id);\n\n          auto metadata = relation.getMetadata();\n          relation_builder.set_version(metadata.getVersion());\n          relation_builder.set_timestamp(metadata.getTimestamp());\n          if (includeUserData) {\n            relation_builder.set_changeset(metadata.getChangeset());\n            relation_builder.set_user(metadata.getUser());\n            relation_builder.set_uid(metadata.getUid());\n          }\n\n          {\n            osmium::builder::RelationMemberListBuilder relation_member_list_builder{relation_builder};\n            for (auto const &member : relation.getMembers()) {\n              if (member.getType() == RelationMember::Type::NODE) {\n                relation_member_list_builder.add_member(osmium::item_type::node,member.getRef(),member.getRole());\n              } else if (member.getType() == RelationMember::Type::WAY) {\n                relation_member_list_builder.add_member(osmium::item_type::way,member.getRef(),member.getRole());\n              } else {\n                relation_member_list_builder.add_member(osmium::item_type::relation,member.getRef(),member.getRole());\n              }\n            }\n          }\n\n          auto tags = relation.getTags();\n          osmium::builder::TagListBuilder tag_builder{relation_builder};\n          for (int i = 0; i < tags.size() / 2; i++) {\n            tag_builder.add_tag(tags[i*2],tags[i*2+1]);\n          }\n        }\n        cb.buffer().commit();\n        cb.possibly_flush();\n      }\n    }\n  }\n\n\n  cb.flush();\n  writer.close();\n  mdb_env_close(env);\n  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>( std::chrono::high_resolution_clock::now() - startTime ).count();\n  if (!jsonOutput) cout << \"Finished export in \" << duration/1000.0 << \" seconds.\" << endl;\n}\n"
  },
  {
    "path": "src/region.cpp",
    "content": "#include <sstream>\n#include <iostream>\n#include \"s2/s2latlng.h\"\n#include \"s2/s2latlng_rect.h\"\n#include \"s2/s2cap.h\"\n#include \"s2/s2polygon.h\"\n#include \"s2/s2loop.h\"\n#include \"osmx/region.h\"\n\nstatic inline void rtrim(std::string &s) {\n    s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) {\n        return !std::isspace(ch);\n    }).base(), s.end());\n}\n\nstd::unique_ptr<S2Polygon> S2PolyFromCoordinates(nlohmann::json &coordinates) {\n    std::vector<std::unique_ptr<S2Loop>> loopRegions;\n    for (auto loop : coordinates) {\n        std::vector<S2Point> points;\n\n        // ignore the last repeated point\n        for (int i = 0; i < loop.size() - 1; i++) {\n            double lon = loop[i][0].get<double>();\n            double lat = loop[i][1].get<double>();\n            points.push_back(S2LatLng::FromDegrees(lat,lon).Normalized().ToPoint());\n        }\n\n        auto loopRegion = std::make_unique<S2Loop>(points);\n        loopRegion->Normalize();\n        loopRegions.push_back(std::move(loopRegion));\n    };\n    return std::make_unique<S2Polygon>(std::move(loopRegions));\n}\n\nvoid Region::AddS2RegionFromGeometry(nlohmann::json &geometry) {\n    if (geometry[\"type\"] == \"Polygon\") {\n        auto p = S2PolyFromCoordinates(geometry[\"coordinates\"]);\n        mRegions.push_back(std::move(p));\n    } else if (geometry[\"type\"] == \"MultiPolygon\") {\n        for (auto polygon : geometry[\"coordinates\"]) {\n            auto p = S2PolyFromCoordinates(polygon);\n            mRegions.push_back(std::move(p));\n        }\n    }\n}\n\nvoid Region::AddS2RegionFromPolyFile(std::istringstream &file) {\n    std::vector<S2Point> points;\n    std::string line;\n\n    while (std::getline(file, line)) {\n        rtrim(line);\n        double lat, lon;\n        // END of polygon\n        if (line == \"END\") {\n            break;\n        } else {\n            std::istringstream iss(line);\n            iss >> lon;\n            iss >> lat;\n            points.push_back(S2LatLng::FromDegrees(lat,lon).Normalized().ToPoint());\n        }\n    }\n\n    if (points[0] == points[points.size() - 1]) points.pop_back();\n\n    auto loop = std::make_unique<S2Loop>(points);\n    loop->Normalize();\n    mRegions.push_back(std::move(loop));\n}\n\nRegion::Region(const std::string &text, const std::string &ext) {\n    if (ext == \"bbox\") {\n        double minLat,minLon,maxLat,maxLon;\n        std::sscanf(text.c_str(), \"%lf,%lf,%lf,%lf\",&minLat,&minLon,&maxLat,&maxLon);\n        auto lo = S2LatLng::FromDegrees(minLat,minLon).Normalized();\n        auto hi = S2LatLng::FromDegrees(maxLat,maxLon).Normalized();\n        mRegions.push_back(std::make_unique<S2LatLngRect>(lo,hi));\n    } else if (ext == \"disc\") {\n        double lat,lon,radius;\n        std::sscanf(text.c_str(), \"%lf,%lf,%lf\",&lat,&lon,&radius);\n        auto center = S2LatLng::FromDegrees(lat,lon).Normalized();\n        auto angle = S1Angle::Degrees(radius);\n        mRegions.push_back(std::make_unique<S2Cap>(center.ToPoint(),angle));\n    } else if (ext == \"poly\") {\n        std::istringstream f(text);\n        std::string line;\n\n        // discard the first line\n        std::getline(f,line);\n\n        // this will either parse name of next polygon\n        // or END at end of file \n        while (std::getline(f, line)) {\n            // END of file\n            if (line == \"END\") {\n                break;\n            }\n            AddS2RegionFromPolyFile(f);\n        }\n    } else if (ext == \"geojson\") {\n        auto json = nlohmann::json::parse(text);\n        if (json[\"type\"] == \"Polygon\" || json[\"type\"] == \"MultiPolygon\") {\n            AddS2RegionFromGeometry(json);\n        } else if (json[\"type\"] == \"GeometryCollection\") {\n            for (auto geometry : json) {\n                AddS2RegionFromGeometry(json);\n            }\n        } else if (json[\"type\"] == \"Feature\") {\n            AddS2RegionFromGeometry(json[\"geometry\"]);\n        } else if (json[\"type\"] == \"FeatureCollection\") {\n            for (auto feature : json[\"features\"]) {\n                AddS2RegionFromGeometry(feature[\"geometry\"]);\n            }\n        }\n    } else {\n        std::cerr << \"Unknown ext\" << std::endl;\n        assert(false);\n    }\n}\n\nbool Region::Contains(S2Point p) {\n    for (auto const &region : mRegions) {\n        if (region->Contains(p)) return true;\n    }\n    return false;\n}\n\nS2CellUnion Region::GetCovering(S2RegionCoverer &coverer) {\n    S2CellUnion retval;\n    for (auto const &region : mRegions) {\n        retval = retval.Union(coverer.GetCovering(*region));\n    }\n    return retval;\n}\n\nS2LatLngRect Region::GetBounds() {\n    auto const &firstRegion = mRegions[0];\n    auto lat_min = firstRegion->GetRectBound().lat_lo();\n    auto lat_max = firstRegion->GetRectBound().lat_hi();\n    auto lng_min = firstRegion->GetRectBound().lng_lo();\n    auto lng_max = firstRegion->GetRectBound().lng_hi();\n\n    for (size_t i = 1; i < mRegions.size(); i++) {\n        auto const &r = mRegions[i];\n        auto lat_lo = r->GetRectBound().lat_lo();\n        auto lat_hi = r->GetRectBound().lat_hi();\n        auto lng_lo = r->GetRectBound().lng_lo();\n        auto lng_hi = r->GetRectBound().lng_hi();\n        if (lat_lo < lat_min) lat_min = lat_lo;\n        if (lat_hi > lat_max) lat_max = lat_hi;\n        if (lng_lo < lng_min) lng_min = lng_lo;\n        if (lng_hi > lng_max) lng_max = lng_hi;\n    }\n\n    return S2LatLngRect(S2LatLng(lat_min,lng_min),S2LatLng(lat_max,lng_max));\n}\n"
  },
  {
    "path": "src/storage.cpp",
    "content": "#include \"osmx/storage.h\"\n#include \"osmx/util.h\"\n\nnamespace osmx { namespace db {\n\n\nMDB_env *createEnv(std::string path, bool writable) {\n  MDB_env* env;\n  CHECK_LMDB(mdb_env_create(&env));\n\n  // the maximum size of any LMDB dataset. \n  // 2TB is a safe number for just OSM data as of 02/2023\n  // only affects the size of virtual memory, not real memory.\n  mdb_env_set_mapsize(env,2UL * 1024UL * 1024UL * 1024UL * 1024UL);\n  mdb_env_set_maxdbs(env,10);\n  int flags = 0;\n  if (!writable) flags |= MDB_RDONLY;\n  CHECK_LMDB(mdb_env_open(env, path.c_str(),MDB_NOSUBDIR | MDB_NORDAHEAD | MDB_NOSYNC | flags, 0664));\n  return env;\n}\n\nMetadata::Metadata(MDB_txn *txn) : mTxn(txn) {\n  CHECK_LMDB(mdb_dbi_open(mTxn, \"metadata\", MDB_CREATE, &mDbi));\n}\n\nvoid Metadata::put(const std::string &key_str, const std::string &value_str) {\n    MDB_val key, data;\n    key.mv_size = key_str.size();\n    key.mv_data = (void *)key_str.data();\n    data.mv_size = value_str.size();\n    data.mv_data = (void *)value_str.data();\n    CHECK_LMDB(mdb_put(mTxn,mDbi, &key, &data, 0));\n}\n\nstd::string Metadata::get(const std::string &key_str) {\n    MDB_val key, data;\n    key.mv_size = key_str.size();\n    key.mv_data = (void *)key_str.data();\n    auto retval = mdb_get(mTxn,mDbi, &key, &data);\n    if (retval == 0) return std::string((const char *)data.mv_data,data.mv_size);\n    else return \"\";\n}\n\nElements::Elements(MDB_txn *txn, const std::string &name) : mTxn(txn) {\n  CHECK_LMDB(mdb_dbi_open(txn, name.c_str(), MDB_INTEGERKEY | MDB_CREATE, &mDbi));\n}\n\nvoid Elements::put(uint64_t id, kj::VectorOutputStream &vos, int flags) {\n  MDB_val key, data;\n  key.mv_size = sizeof(uint64_t);\n  key.mv_data = (void *)&id;\n  data.mv_size = vos.getArray().size();\n  data.mv_data = (void *)vos.getArray().begin();\n  CHECK_LMDB(mdb_put(mTxn, mDbi, &key, &data, flags));\n}\n\nvoid Elements::del(uint64_t id) {\n  MDB_val key, data;\n  key.mv_size = sizeof(uint64_t);\n  key.mv_data = (void *)&id;\n  mdb_del(mTxn, mDbi, &key, &data);\n}\n\nbool Elements::exists(uint64_t id) {\n  MDB_val key, data;\n  key.mv_size = sizeof(uint64_t);\n  key.mv_data = (void *)&id;\n  return mdb_get(mTxn,mDbi,&key,&data) == 0;\n}\n\ncapnp::FlatArrayMessageReader Elements::getReader(uint64_t id) {\n  MDB_val key, data;\n  key.mv_size = sizeof(uint64_t);\n  key.mv_data = (void *)&id;\n  CHECK_LMDB(mdb_get(mTxn,mDbi,&key,&data));\n  auto arr = kj::ArrayPtr<const capnp::word>((const capnp::word *)data.mv_data,data.mv_size);\n  return capnp::FlatArrayMessageReader(arr);\n}\n\nLocations::Locations(MDB_txn *txn) : mTxn(txn) {\n    CHECK_LMDB(mdb_dbi_open(mTxn, \"locations\", MDB_INTEGERKEY | MDB_CREATE, &mDbi));\n}\n\nvoid Locations::put(uint64_t id, const Location value, int flags) {\n  MDB_val key, data;\n  key.mv_size = sizeof(uint64_t);\n  key.mv_data = (void *)&id;\n\n  int32_t buf[3];\n  buf[0] = value.coords.x();\n  buf[1] = value.coords.y();\n  buf[2] = value.version;\n\n  data.mv_size = sizeof(uint32_t) * 3;\n  data.mv_data = (void *)&buf;\n  CHECK_LMDB(mdb_put(mTxn, mDbi, &key, &data, flags));\n}\n\nvoid Locations::del(uint64_t id) {\n  MDB_val key, data;\n  key.mv_size = sizeof(uint64_t);\n  key.mv_data = (void *)&id;\n  mdb_del(mTxn,mDbi,&key,&data);\n}\n\nLocation Locations::get(uint64_t id) const {\n  MDB_val key, data;\n  key.mv_size = sizeof(uint64_t);\n  key.mv_data = (void *)&id;\n  int retval = mdb_get(mTxn, mDbi, &key, &data);\n  if (retval == MDB_NOTFOUND) return Location{};\n  CHECK_LMDB(retval);\n  int32_t *buf = (int32_t *)data.mv_data;\n  return Location{osmium::Location(buf[0],buf[1]),buf[2]};\n}\n\nbool Locations::exists(uint64_t id) {\n  MDB_val key, data;\n  key.mv_size = sizeof(uint64_t);\n  key.mv_data = (void *)&id;\n  int retval = mdb_get(mTxn, mDbi, &key, &data);\n  return retval != MDB_NOTFOUND;\n}\n\nIndex::Index(MDB_txn *txn, const std::string &name) : mTxn(txn) {\n  CHECK_LMDB(mdb_dbi_open(txn, name.c_str(), MDB_INTEGERKEY | MDB_CREATE | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &mDbi));\n}\n\nvoid Index::put(uint64_t from, uint64_t osm_id, int flags) {\n  MDB_val key, data;\n  key.mv_size = sizeof(uint64_t);\n  key.mv_data = (void *)&from;\n  data.mv_size = sizeof(uint64_t);\n  data.mv_data = (void *)&osm_id;\n  CHECK_LMDB(mdb_put(mTxn,mDbi,&key,&data,flags));\n}\n\nvoid Index::del(uint64_t from, uint64_t osm_id ) {\n  MDB_val key, data;\n  key.mv_size = sizeof(uint64_t);\n  key.mv_data = (void *)&from;\n  data.mv_size = sizeof(uint64_t);\n  data.mv_data = (void *)&osm_id;\n  mdb_del(mTxn,mDbi,&key,&data);\n}\n\nIndexWriter::IndexWriter(MDB_env *env, const std::string &name) : mEnv(env), mName(name) {\n  CHECK_LMDB(mdb_txn_begin(env, NULL, 0, &mTxn));\n  CHECK_LMDB(mdb_dbi_open(mTxn, name.c_str(), MDB_INTEGERKEY | MDB_CREATE | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &mDbi));\n}\n\nvoid IndexWriter::put(uint64_t from, uint64_t osm_id, int flags) {\n  MDB_val key, data;\n  key.mv_size = sizeof(uint64_t);\n  key.mv_data = (void *)&from;\n  data.mv_size = sizeof(uint64_t);\n  data.mv_data = (void *)&osm_id;\n  CHECK_LMDB(mdb_put(mTxn,mDbi,&key,&data,flags));\n  if (mWrites++ == 8000000) {\n    CHECK_LMDB(mdb_txn_commit(mTxn));\n    CHECK_LMDB(mdb_txn_begin(mEnv, NULL, 0, &mTxn));\n    CHECK_LMDB(mdb_dbi_open(mTxn, mName.c_str(), MDB_INTEGERKEY | MDB_CREATE | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &mDbi));\n    mWrites = 0;\n  }\n}\n\nvoid IndexWriter::commit() {\n  CHECK_LMDB(mdb_txn_commit(mTxn));\n}\n\nvoid traverseCell(MDB_cursor *cursor, S2CellId cell_id, roaring::Roaring64Map &set) {\n  S2CellId start = cell_id.child_begin(CELL_INDEX_LEVEL);\n  S2CellId end = cell_id.child_end(CELL_INDEX_LEVEL);\n  MDB_val key, data;\n  key.mv_size = sizeof(S2CellId);\n  key.mv_data = (void *)&start;\n\n  // reading past end of db\n  if (mdb_cursor_get(cursor,&key,&data,MDB_SET_RANGE) != 0) return;\n  while (*((S2CellId *)key.mv_data) < end) {\n    int retval_values = mdb_cursor_get(cursor,&key,&data,MDB_GET_MULTIPLE);\n    while (0 == retval_values) {\n      for (int i = 0; i < data.mv_size/sizeof(uint64_t); i++) {\n        uint64_t *d = (uint64_t*)data.mv_data;\n        set.add(*(d+i));\n      }\n      retval_values = mdb_cursor_get(cursor,&key,&data,MDB_NEXT_MULTIPLE);\n    }\n    // reached end of db\n    if (mdb_cursor_get(cursor,&key,&data,MDB_NEXT_NODUP) != 0) return;\n  }\n}\n\nvoid traverseReverse(MDB_cursor *cursor,uint64_t from, roaring::Roaring64Map &set) {\n  MDB_val key, data;\n  key.mv_size = sizeof(uint64_t);\n  key.mv_data = (void *)&from;\n\n  if (mdb_cursor_get(cursor,&key,&data,MDB_SET) != 0) return;\n  int retval_values = mdb_cursor_get(cursor,&key,&data,MDB_GET_MULTIPLE);\n  while (0 == retval_values) {\n    for (int i = 0; i < data.mv_size/sizeof(uint64_t); i++) {\n      uint64_t *d = (uint64_t*)data.mv_data;\n      uint64_t to_id = *(d+i);\n      set.add(to_id);\n    }\n    retval_values = mdb_cursor_get(cursor,&key,&data,MDB_NEXT_MULTIPLE);\n  }\n}\n\n}}\n\n"
  },
  {
    "path": "src/update.cpp",
    "content": "#include <iostream>\n#include <cassert>\n#include <set>\n#include \"cxxopts.hpp\"\n#include \"osmium/handler.hpp\"\n#include \"osmium/io/any_input.hpp\"\n#include \"osmium/visitor.hpp\"\n#include \"osmium/util/progress_bar.hpp\"\n#include \"roaring/roaring.hh\"\n\n// Historically, OSMExpress had vendored its dependencies, but we move away\n// from this. At the moment, the S2 geometry library is the last remaining\n// dependency still being vendored. Our (rather ancient) bundled version\n// of S2 tests for a number of implementations of the Standard C library.\n// If S2 happens to recognize the library, it includes <byteswap.h>\n// (unless it is being compiled by a Microsoft or Apple compiler);\n// otherwise, S2 falls back to its own byteswap implementation.\n// What S2 does (or rather did, in the old version we happen\n// to bundle) isn't so great; it would have been better for S2 to use\n// a standards-conforming way of byteswapping, test for its presence,\n// and only use the fallback if that test fails. But that's how it is.\n//\n// Anyhow, the (equally ancient) version of CRoaring, another library\n// that we previously vendored into OSMExpress, was polluting the\n// C macro namespace in a way that made our bundled version of S2\n// believe to be on a C library it knew about. Therefore, at the time\n// when OSMExpress still vendored that old version of CRoaring, the\n// bundled version of S2 would always include <byteswap.h> instead\n// of (re-)defining it, even if S2 did not recognize the C library.\n//\n// As we upgraded CRoaring to a newer version, which does not pollute\n// the C macro namespace anymore, the C preprocessor would now execute\n// the fallback path in the S2 headers when compiling with a Standard C\n// library that our old version of S2 does not recognize. This caused\n// compilation errors on Alpine Linux, which uses musl, a very lightweight\n// but fully standards-conforming implementation of the Standard C library.\n//\n// The following hack prevents our vendored old version of S2 from\n// supplying its own byteswap functions. On Bionic (and also other\n// modern libc implementations, including musl), it is sufficient to\n// include <byteswap.h>. Note we cannot explicitly test for musl here,\n// because musl does not define a __MUSL__ macro. (They don't want to,\n// since such a macro would not be standards-conforming; whether it's\n// really helpful to be so puristic has been the subject of much debate).\n//\n// TODO: Remove this hack once we stop vendoring the S2 geometry library.\n// https://github.com/bdon/OSMExpress/issues/20\n#if !defined(_MSC_VER) && !defined(__APPLE__) && !defined(__GLIBC__) \\\n    && !defined(__BIONIC__) && !defined(__ASYLO__)\n#define __BIONIC__ 1\n#endif\n\n#include \"s2/s2latlng.h\"\n#include \"s2/s2cell_union.h\"\n\n#include \"osmx/storage.h\"\n#include \"osmx/util.h\"\n\nusing namespace std;\nusing namespace osmx;\n\nclass DataUpdate : public osmium::handler::Handler {\n  public:\n  DataUpdate(MDB_txn *txn) : \n  mTxn(txn), \n  mLocations(txn), \n  mNodes(txn,\"nodes\"), \n  mWays(txn,\"ways\"), \n  mRelations(txn,\"relations\"),\n  mCellNode(txn,\"cell_node\"),\n  mNodeWay(txn,\"node_way\"),\n  mNodeRelation(txn,\"node_relation\"),\n  mWayRelation(txn,\"way_relation\"),\n  mRelationRelation(txn, \"relation_relation\")  {\n  }\n\n  // update location, node, cell_location tables\n  void node(const osmium::Node& node) {\n    uint64_t id = node.id();\n    db::Location prev_location = mLocations.get(id);\n    db::Location new_location = db::Location{node.location(),(int32_t)node.version()};\n    uint64_t prev_cell;\n    if (prev_location.is_defined()) prev_cell = S2CellId(S2LatLng::FromDegrees(prev_location.coords.lat(),prev_location.coords.lon())).parent(CELL_INDEX_LEVEL).id();\n\n    if (!node.visible()) {\n      mLocations.del(id);\n      mNodes.del(id);\n      mCellNode.del(prev_cell,id);\n      return;\n    } else {\n      mLocations.put(id,new_location);\n      if (node.tags().size() > 0) {\n        ::capnp::MallocMessageBuilder message;\n        Node::Builder nodeMsg = message.initRoot<Node>();\n        setTags<Node::Builder>(node.tags(),nodeMsg);\n        auto metadata = nodeMsg.initMetadata();\n        metadata.setVersion(node.version());\n        metadata.setTimestamp(node.timestamp().seconds_since_epoch());\n        metadata.setChangeset(node.changeset());\n        metadata.setUid(node.uid());\n        metadata.setUser(node.user());\n        kj::VectorOutputStream output;\n        capnp::writeMessage(output,message);\n        mNodes.put(id,output);\n      } else {\n        mNodes.del(id); \n      }\n    }\n\n    uint64_t new_cell = S2CellId(S2LatLng::FromDegrees(new_location.coords.lat(),new_location.coords.lon())).parent(CELL_INDEX_LEVEL).id();\n    if (!prev_location.is_defined()) {\n      mCellNode.put(new_cell,id);\n      return;\n    }\n\n    if (prev_cell != new_cell) {\n      mCellNode.del(prev_cell,id);\n      mCellNode.put(new_cell,id);\n    }\n  }\n\n  // update way, node_way tables\n  void way(const osmium::Way &way) {\n    uint64_t id = way.id();\n\n    set<uint64_t> prev_nodes;\n    set<uint64_t> new_nodes;\n\n    if (mWays.exists(id)) {\n      auto reader = mWays.getReader(id);\n      Way::Reader way = reader.getRoot<Way>();\n      for (auto const &node_id : way.getNodes()) {\n        prev_nodes.insert(node_id);\n      }\n    }\n\n    if (!way.visible()) {\n      mWays.del(id);\n    } else {\n      auto const &nodes = way.nodes();\n      ::capnp::MallocMessageBuilder message;\n      Way::Builder wayMsg = message.initRoot<Way>();\n      wayMsg.initNodes(nodes.size());\n      int i = 0;\n      for (int i = 0; i < nodes.size(); i++) {\n        wayMsg.getNodes().set(i,nodes[i].ref());\n        new_nodes.insert(nodes[i].ref());\n      }\n      setTags<Way::Builder>(way.tags(),wayMsg);\n      auto metadata = wayMsg.initMetadata();\n      metadata.setVersion(way.version());\n      metadata.setTimestamp(way.timestamp().seconds_since_epoch());\n      metadata.setChangeset(way.changeset());\n      metadata.setUid(way.uid());\n      metadata.setUser(way.user());\n      kj::VectorOutputStream output;\n      capnp::writeMessage(output,message);\n      mWays.put(id,output);\n    }\n\n    if (!way.visible()) {\n      for (uint64_t node_id : prev_nodes) mNodeWay.del(node_id,id);\n    } else {\n      for (uint64_t node_id : prev_nodes) {\n        if (new_nodes.count(node_id) == 0) mNodeWay.del(node_id,id);\n      }\n      for (uint64_t node_id : new_nodes) {\n        if (prev_nodes.count(node_id) == 0) mNodeWay.put(node_id,id);\n      }\n    }\n  }\n\n  // update relation, node_relation, way_relation and relation_relation tables\n  void relation(const osmium::Relation &relation) {\n    uint64_t id = relation.id();\n\n    set<uint64_t> prev_nodes;\n    set<uint64_t> prev_ways;\n    set<uint64_t> prev_relations;\n    set<uint64_t> new_nodes;\n    set<uint64_t> new_ways;\n    set<uint64_t> new_relations;\n\n    if (mRelations.exists(id)) {\n      auto reader = mRelations.getReader(id);\n      Relation::Reader relation = reader.getRoot<Relation>();\n      for (auto const &member : relation.getMembers()) {\n        if (member.getType() == RelationMember::Type::NODE) {\n          prev_nodes.insert(member.getRef());\n        } else if (member.getType() == RelationMember::Type::WAY) {\n          prev_ways.insert(member.getRef());\n        } else {\n          prev_relations.insert(member.getRef());\n        }\n      }\n    }\n\n    if (!relation.visible()) {\n      mRelations.del(relation.id());\n    } else {\n      ::capnp::MallocMessageBuilder message;\n      Relation::Builder relationMsg = message.initRoot<Relation>();\n      setTags<Relation::Builder>(relation.tags(),relationMsg);\n      auto members = relationMsg.initMembers(relation.members().size());\n      int i = 0;\n      for (auto const &member : relation.members()) {\n        members[i].setRef(member.ref());\n        members[i].setRole(member.role());\n        if (member.type() == osmium::item_type::node) {\n          new_nodes.insert(member.ref());\n          members[i].setType(RelationMember::Type::NODE);\n        }\n        else if (member.type() == osmium::item_type::way) {\n          new_ways.insert(member.ref());\n          members[i].setType(RelationMember::Type::WAY);\n        }\n        else if (member.type() == osmium::item_type::relation) {\n          new_relations.insert(member.ref());\n          members[i].setType(RelationMember::Type::RELATION);\n        }\n        i++;\n      }\n      auto metadata = relationMsg.initMetadata();\n      metadata.setVersion(relation.version());\n      metadata.setTimestamp(relation.timestamp().seconds_since_epoch());\n      metadata.setChangeset(relation.changeset());\n      metadata.setUid(relation.uid());\n      metadata.setUser(relation.user());\n      kj::VectorOutputStream output;\n      capnp::writeMessage(output,message);\n      mRelations.put(relation.id(),output);\n    }\n\n    if (!relation.visible()) {\n      for (uint64_t node_id : prev_nodes) mNodeRelation.del(node_id,id);\n      for (uint64_t way_id : prev_ways) mWayRelation.del(way_id,id);\n      for (uint64_t relation_id : prev_relations) mRelationRelation.del(relation_id,id);\n    } else {\n      for (uint64_t node_id : prev_nodes) {\n        if (new_nodes.count(node_id) == 0) mNodeRelation.del(node_id,id);\n      }\n      for (uint64_t node_id : new_nodes) {\n        if (prev_nodes.count(node_id) == 0) mNodeRelation.put(node_id,id);\n      }\n      for (uint64_t way_id : prev_ways) {\n        if (new_ways.count(way_id) == 0) mWayRelation.del(way_id,id);\n      }\n      for (uint64_t way_id : new_ways) {\n        if (prev_ways.count(way_id) == 0) mWayRelation.put(way_id,id);\n      }\n      for (uint64_t relation_id : prev_relations) {\n        if (new_relations.count(relation_id) == 0) mRelationRelation.del(relation_id,id);\n      }\n      for (uint64_t relation_id : new_relations) {\n        if (prev_relations.count(relation_id) == 0) mRelationRelation.put(relation_id,id);\n      }\n    }\n  }\n\n  private:\n  MDB_txn *mTxn;\n  db::Locations mLocations;\n  db::Elements mNodes;\n  db::Elements mWays;\n  db::Elements mRelations;\n  db::Index mNodeWay;\n  db::Index mNodeRelation;\n  db::Index mWayRelation;\n  db::Index mRelationRelation;\n  db::Index mCellNode;\n};\n\nvoid cmdUpdate(int argc, char* argv[]) {\n  cxxopts::Options cmdoptions(\"Update\", \"Update an .osmx file with a .osc diff.\");\n  cmdoptions.add_options()\n    (\"v,verbose\", \"Verbose output\")\n    (\"commit\", \"Commit the update\")\n    (\"cmd\", \"Command to run\", cxxopts::value<string>())\n    (\"osmx\", \".osmx to update\", cxxopts::value<string>())\n    (\"osc\", \".osc to apply\", cxxopts::value<string>())\n    (\"seqnum\", \"The sequence number of the .osc\", cxxopts::value<string>())\n    (\"timestamp\", \"The timestamp of the .osc\", cxxopts::value<string>())\n  ;\n\n  cmdoptions.parse_positional({\"cmd\",\"osmx\",\"osc\",\"seqnum\",\"timestamp\"});\n  auto result = cmdoptions.parse(argc, argv);\n\n  if (result.count(\"osmx\") == 0 || result.count(\"osc\") == 0 || \\\n    result.count(\"seqnum\") == 0 || result.count(\"timestamp\") == 0) {\n    cout << \"Usage: osmx update OSMX_FILE OSC_FILE SEQNUM TIMESTAMP [OPTIONS]\" << endl;\n    cout << \"Applies OSC_FILE and saves SEQNUM and TIMESTAMP into the metadata table.\" << endl << endl;\n    cout << \"EXAMPLE:\" << endl;\n    cout << \" osmx update planet.osmx 123456.osc 123456 2019-09-05T00:00:00Z --commit\" << endl << endl;\n    cout << \"OPTIONS:\" << endl;\n    cout << \" --v,--verbose: verbose output.\" << endl;\n    cout << \" --commit: Actually commit the transaction; otherwise runs the update and rolls back.\" << endl;\n    exit(1);\n  }\n\n  string osmx = result[\"osmx\"].as<string>();\n  string osc = result[\"osc\"].as<string>();\n  bool verbose = result.count(\"verbose\") > 0;\n  auto startTime = std::chrono::high_resolution_clock::now();\n\n  MDB_env* env = db::createEnv(osmx,true);\n  MDB_txn* txn;\n  CHECK_LMDB(mdb_txn_begin(env, NULL, 0, &txn));\n\n  string old_seqnum = \"UNKNOWN\";\n  auto new_seqnum = result[\"seqnum\"].as<string>();\n  auto new_timestamp = result[\"timestamp\"].as<string>();\n  db::Metadata metadata(txn);\n  if (verbose) cout << \"Timestamp: \" << metadata.get(\"osmosis_replication_timestamp\") << endl;\n  old_seqnum = metadata.get(\"osmosis_replication_sequence_number\");\n\n  if (verbose) cout << \"Starting update from \" << old_seqnum << \" to \" << new_seqnum << endl;\n  const osmium::io::File input_file{osc};\n\n  osmium::io::Reader reader{input_file, osmium::osm_entity_bits::object};\n  DataUpdate data_update(txn);\n  osmium::apply(reader, data_update);\n  \n  auto duration = (std::chrono::duration_cast<std::chrono::milliseconds>( std::chrono::high_resolution_clock::now() - startTime ).count()) / 1000.0;\n\n  if (result.count(\"commit\") > 0) {\n    {\n      metadata.put(\"osmosis_replication_sequence_number\",new_seqnum);\n      metadata.put(\"osmosis_replication_timestamp\",new_timestamp);\n    }\n    CHECK_LMDB(mdb_txn_commit(txn));\n    cout << \"Committed: \";\n  } else {\n    mdb_txn_abort(txn);\n    cout << \"Aborted: \";\n  }\n  cout << old_seqnum << \" -> \" << new_seqnum << \" in \" << duration << \" seconds.\" << endl;\n  mdb_env_sync(env,true);\n  mdb_env_close(env);\n}\n\n"
  },
  {
    "path": "test/test_region.cpp",
    "content": "#include \"catch2/catch_test_macros.hpp\"\n#include \"s2/s2latlng.h\"\n#include \"osmx/region.h\"\n\nusing namespace std;\n\n// osmium header format is like this: Box: (-79.82402,40.439216,-71.660801,45.07133)\n\n// small:  {\\\"bbox\\\":[40.7411\\,-73.9937\\,40.7486\\,-73.9821]}\n// big:    {\\\"bbox\\\":[40.6762\\,-74.0543\\,40.8093\\,-73.8603]}\n// radius: {\"center\":[40.7411,-73.9937],\"radius\":25.5}\n// indo:  {\\\"bbox\\\":[-12.039321\\,94.394531\\,8.407168\\,142.418292]}\n\n// bbox should be minLat,minLon,maxLat,maxLon (opposite of GeoJSON)\nTEST_CASE(\"rectangular bbox\") {\n  SECTION(\"basic bbox\") {\n    string bbox = \"-1.0,-1.0,1.0,1.0\";\n    Region s{bbox,\"bbox\"};\n    REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint()));\n    REQUIRE(s.Contains(S2LatLng::FromDegrees(0.9,0.9).ToPoint()));\n  }\n}\n\nTEST_CASE(\"disc\") {\n  SECTION(\"basic disc\") {\n    string disc = \"0.0,0.0,1.0\";\n    Region s{disc,\"disc\"};\n    REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint()));\n    REQUIRE(!s.Contains(S2LatLng::FromDegrees(0.9,0.9).ToPoint()));\n  }\n}\n\nTEST_CASE(\"geojson polygon\") {\n    SECTION(\"polygon geometry\") {\n        string json = R\"json({\n  \"type\": \"Polygon\",\n  \"coordinates\": [\n    [\n      [-1.0,-1.0],\n      [-1.0,1.0],\n      [1.0,1.0],\n      [1.0,-1.0],\n      [-1.0,-1.0]\n    ]\n  ]\n})json\";\n        Region s{json,\"geojson\"};\n        REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint()));\n        REQUIRE(!s.Contains(S2LatLng::FromDegrees(2.0,2.0).ToPoint()));\n    }\n\n    SECTION(\"polygon with a hole\") {\n        string json = R\"json({\n  \"type\": \"Polygon\",\n  \"coordinates\": [\n    [\n      [-2.0,-2.0],\n      [-2.0,2.0],\n      [2.0,2.0],\n      [2.0,-2.0],\n      [-2.0,-2.0]\n    ],\n    [\n      [-1.0,-1.0],\n      [-1.0,1.0],\n      [1.0,1.0],\n      [1.0,-1.0],\n      [-1.0,-1.0]\n    ]\n  ]\n})json\";\n        Region s{json,\"geojson\"};\n        REQUIRE(s.Contains(S2LatLng::FromDegrees(1.5,1.5).ToPoint()));\n        REQUIRE(!s.Contains(S2LatLng::FromDegrees(0.0,0.0).ToPoint()));\n    }\n\n    SECTION(\"multipolygon geometry\") {\n        string json = R\"json({\n  \"type\": \"MultiPolygon\",\n  \"coordinates\": [\n    [[\n      [0.0,0.0],\n      [1.0,0.0],\n      [1.0,1.0],\n      [0.0,1.0],\n      [0.0,0.0]\n    ]],\n    [[\n      [2.0,2.0],\n      [3.0,2.0],\n      [3.0,3.0],\n      [2.0,3.0],\n      [2.0,2.0]\n    ]]\n  ]\n})json\";\n        Region s{json,\"geojson\"};\n        REQUIRE(s.Contains(S2LatLng::FromDegrees(0.5,0.5).ToPoint()));\n        REQUIRE(s.Contains(S2LatLng::FromDegrees(2.5,2.5).ToPoint()));\n        auto bounds = s.GetBounds();\n        REQUIRE(bounds.lat_lo().degrees() <= 0.0);\n        REQUIRE(bounds.lat_hi().degrees() >= 3.0);\n        REQUIRE(bounds.lng_lo().degrees() <= 0.0);\n        REQUIRE(bounds.lng_hi().degrees() >= 3.0);\n    }\n\n    SECTION(\"bounds beyond antimeridian\") {\n        string json = R\"json({\n  \"type\": \"Polygon\",\n  \"coordinates\": [\n    [\n      [180.0,-1.0],\n      [180.0,1.0],\n      [181.0,1.0],\n      [181.0,-1.0],\n      [180.0,-1.0]\n    ]\n  ]\n})json\";\n        Region s{json,\"geojson\"};\n        auto bounds = s.GetBounds();\n        REQUIRE(bounds.lng_lo().degrees() == 180.0);\n        REQUIRE(bounds.lng_hi().degrees() <= -178.9); // hacky precision\n        REQUIRE(bounds.lng_hi().degrees() >= -179.1);\n    }\n}\n\n// .poly in Lon, Lat order\nTEST_CASE(\"osmosis .poly\") {\n    SECTION(\"simple polygon\") {\n        string poly = R\"poly(basic\nfirst_area\n    0.2e+01 0.1e+01\n    0.2e+01 -0.1e+01\n    -0.2e+01    -0.1e+01\n    -0.2e+01    0.1e+01\nEND\nEND\n)poly\";\n        Region s{poly,\"poly\"};\n        REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint()));\n        REQUIRE(!s.Contains(S2LatLng::FromDegrees(2.0,3.0).ToPoint()));\n        REQUIRE(s.Contains(S2LatLng::FromDegrees(0.5,1.5).ToPoint()));\n    }\n\n    SECTION(\"different whitespace, opposite orientation\") {\n        string poly = R\"poly(basic\nfirst_area\n    0.1E+01 0.1E+01\n    -0.1E+01    0.1E+01\n    -0.1E+01    -0.1E+01\n    0.1E+01 -0.1E+01\nEND\nEND\n)poly\";\n        Region s{poly,\"poly\"};\n        REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint()));\n        REQUIRE(!s.Contains(S2LatLng::FromDegrees(2.0,2.0).ToPoint()));\n    }\n\n    SECTION(\"repeated last point\") {\n        string poly = R\"poly(basic\nfirst_area\n    0.1e+01 0.1e+01\n    0.1e+01 -0.1e+01\n    -0.1e+01    -0.1e+01\n    -0.1e+01    0.1e+01\n    0.1e+01 0.1e+01\nEND\nEND\n)poly\";\n        Region s{poly,\"poly\"};\n        REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint()));\n        REQUIRE(!s.Contains(S2LatLng::FromDegrees(2.0,2.0).ToPoint()));\n    }\n\n    SECTION(\"multiple outer loops\") {\n        string poly = R\"poly(basic\nfirst_area\n    0.1E+01 0.1E+01\n    -0.1E+01    0.1E+01\n    -0.1E+01    -0.1E+01\n    0.1E+01 -0.1E+01\nEND\nsecond_area\n    0.4E+01 0.4E+01\n    0.3E+01    0.4E+01\n    0.3E+01    0.3E+01\n    0.4E+01 0.3E+01\nEND\nEND\n)poly\";\n        Region s{poly,\"poly\"};\n        REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint()));\n        REQUIRE(s.Contains(S2LatLng::FromDegrees(3.5,3.5).ToPoint()));\n        REQUIRE(!s.Contains(S2LatLng::FromDegrees(1.0,1.0).ToPoint()));\n    }\n\n    SECTION(\"loop with hole\") {\n\n    }\n}\n"
  },
  {
    "path": "utils/osmx-update",
    "content": "#!/usr/bin/env python\n\nfrom datetime import datetime, timezone\nimport subprocess\nimport tempfile\nimport os\nimport sys\nimport fcntl\n\nfrom server import ReplicationServer\n\n# expects osmx to be on the PATH.\nosmx = 'osmx'\n\ntry:\n  file = open('/tmp/osmx.lock','w')\n  fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB)\n\n  s = ReplicationServer(sys.argv[2])\n\n  # OSMX always uses minutely timestamps internally - try integrating daily\n  seqnum = subprocess.check_output([osmx,'query',sys.argv[1],'seqnum'])\n\n  if not seqnum.strip():\n    timestamp = subprocess.check_output([osmx,'query',sys.argv[1],'timestamp'])\n    timestamp = timestamp.decode('utf-8').strip()\n    timestamp = datetime.strptime(timestamp, \"%Y-%m-%dT%H:%M:%SZ\")\n    timestamp = timestamp.replace(tzinfo=timezone.utc)\n    print('Timestamp is {0}'.format(timestamp))\n    seqnum = s.timestamp_to_sequence(timestamp)\n\n  seqnum = int(seqnum)\n\n  print('Sequence number is {0}'.format(seqnum))\n\n  latest = s.get_state_info().sequence\n  print(\"Latest is {0}\".format(latest))\n\n  current_id = seqnum + 1\n  while current_id <= latest:\n    fd, path = tempfile.mkstemp(suffix='.osc.gz')\n    with open(fd,'wb') as f:\n      f.write(s.get_diff_block(current_id))\n    info = s.get_state_info(current_id)\n    timestamp = info.timestamp.strftime('%Y-%m-%dT%H:%M:%SZ')\n    subprocess.check_call([osmx,'update',sys.argv[1],path,str(current_id),timestamp,'--commit'])\n    os.unlink(path)\n    current_id = current_id + 1\n\nexcept BlockingIOError:\n  print(\"Process is running - exiting.\")\nfinally:\n  fcntl.lockf(file, fcntl.LOCK_UN)\n  file.close()\n\n"
  },
  {
    "path": "utils/server.py",
    "content": "\"\"\" Helper functions to communicate with replication servers.\nderived from https://github.com/osmcode/pyosmium\n\"\"\"\n\nimport sys\nimport urllib.request as urlrequest\nimport urllib.error as urlerror\nimport datetime as dt\nfrom collections import namedtuple\nfrom math import ceil\n\nOsmosisState = namedtuple('OsmosisState', ['sequence', 'timestamp'])\nDownloadResult = namedtuple('DownloadResult', ['id', 'newest'])\n\nimport logging\n\nlog = logging.getLogger()\n\nclass ReplicationServer(object):\n    def __init__(self, url, diff_type='osc.gz'):\n        self.baseurl = url\n        self.diff_type = diff_type\n\n    def open_url(self, url):\n        return urlrequest.urlopen(url,None,10)\n\n    def timestamp_to_sequence(self, timestamp, balanced_search=False):\n        \"\"\" Get the sequence number of the replication file that contains the\n            given timestamp. The search algorithm is optimised for replication\n            servers that publish updates in regular intervals. For servers\n            with irregular change file publication dates 'balanced_search`\n            should be set to true so that a standard binary search for the\n            sequence will be used. The default is good for all known\n            OSM replication services.\n        \"\"\"\n\n        # get the current timestamp from the server\n        upper = self.get_state_info()\n\n        if upper is None:\n            return None\n        if timestamp >= upper.timestamp or upper.sequence <= 0:\n            return upper.sequence\n\n        # find a state file that is before the required timestamp\n        lower = None\n        lowerid = 0\n        while lower is None:\n            log.info(\"Trying with Id %s\" % lowerid)\n            lower = self.get_state_info(lowerid)\n\n            if lower is not None and lower.timestamp >= timestamp:\n                if lower.sequence == 0 or lower.sequence + 1 >= upper.sequence:\n                    return lower.sequence\n                upper = lower\n                lower = None\n                lowerid = 0\n\n            if lower is None:\n                # no lower yet, so try a higher id (binary search wise)\n                newid = int((lowerid + upper.sequence) / 2)\n                if newid <= lowerid:\n                    # nothing suitable found, so upper is probably the best we can do\n                    return upper.sequence\n                lowerid = newid\n\n        # Now do a binary search between upper and lower.\n        # We could be clever here and compute the most likely state file\n        # by interpolating over the timestamps but that creates a whole ton of\n        # special cases that need to be handled correctly.\n        while True:\n            if balanced_search:\n                base_splitid = int((lower.sequence + upper.sequence) / 2)\n            else:\n                ts_int = (upper.timestamp - lower.timestamp).total_seconds()\n                seq_int = upper.sequence - lower.sequence\n                goal = (timestamp - lower.timestamp).total_seconds()\n                base_splitid = lower.sequence + ceil(goal * seq_int / ts_int)\n                if base_splitid >= upper.sequence:\n                    base_splitid = upper.sequence - 1\n            split = self.get_state_info(base_splitid)\n\n            if split is None:\n                # file missing, search the next towards lower\n                splitid = base_splitid - 1\n                while split is None and splitid > lower.sequence:\n                    split = self.get_state_info(splitid)\n                    splitid -= 1\n            if split is None:\n                # still nothing? search towards upper\n                splitid = base_splitid + 1\n                while split is None and splitid < upper.sequence:\n                    split = self.get_state_info(splitid)\n                    splitid += 1\n            if split is None:\n                # still nothing? Then lower has to do\n                return lower.sequence\n\n            # set new boundary\n            if split.timestamp < timestamp:\n                lower = split\n            else:\n                upper = split\n\n            if lower.sequence + 1 >= upper.sequence:\n                return lower.sequence\n\n\n    def get_state_info(self, seq=None):\n        \"\"\" Downloads and returns the state information for the given\n            sequence. If the download is successful, a namedtuple with\n            `sequence` and `timestamp` is returned, otherwise the function\n            returns `None`.\n        \"\"\"\n        try:\n            response = self.open_url(self.get_state_url(seq))\n        except Exception as err:\n            logging.error(err)\n            return None\n\n        ts = None\n        seq = None\n        line = response.readline()\n        while line:\n            line = line.decode('utf-8')\n            if '#' in line:\n                line = line[0:line.index('#')]\n            else:\n                line = line.strip()\n            if line:\n                kv = line.split('=', 2)\n                if len(kv) != 2:\n                    return None\n                if kv[0] == 'sequenceNumber':\n                    seq = int(kv[1])\n                elif kv[0] == 'timestamp':\n                    ts = dt.datetime.strptime(kv[1], \"%Y-%m-%dT%H\\\\:%M\\\\:%SZ\")\n                    if sys.version_info >= (3,0):\n                        ts = ts.replace(tzinfo=dt.timezone.utc)\n            line = response.readline()\n\n        return OsmosisState(sequence=seq, timestamp=ts)\n\n    def get_diff_block(self, seq):\n        \"\"\" Downloads the diff with the given sequence number and returns\n            it as a byte sequence. Throws a :code:`urllib.error.HTTPError`\n            (or :code:`urllib2.HTTPError` in python2)\n            if the file cannot be downloaded.\n        \"\"\"\n        return self.open_url(self.get_diff_url(seq)).read()\n\n\n    def get_state_url(self, seq):\n        \"\"\" Returns the URL of the state.txt files for a given sequence id.\n\n            If seq is `None` the URL for the latest state info is returned,\n            i.e. the state file in the root directory of the replication\n            service.\n        \"\"\"\n        if seq is None:\n            return self.baseurl + '/state.txt'\n\n        return '%s/%03i/%03i/%03i.state.txt' % (self.baseurl,\n                     seq / 1000000, (seq % 1000000) / 1000, seq % 1000)\n\n\n    def get_diff_url(self, seq):\n        \"\"\" Returns the URL to the diff file for the given sequence id.\n        \"\"\"\n        return '%s/%03i/%03i/%03i.%s' % (self.baseurl,\n                     seq / 1000000, (seq % 1000000) / 1000, seq % 1000,\n                     self.diff_type)\n\n"
  }
]