Repository: protomaps/OSMExpress Branch: main Commit: 045a515132e9 Files: 39 Total size: 128.2 KB Directory structure: gitextract_2jzln99k/ ├── .github/ │ ├── dependabot.yml │ └── workflows/ │ ├── build-container.yml │ └── codeql.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── Dockerfile ├── LICENSE.md ├── README.md ├── dist/ │ └── archive.sh ├── docs/ │ ├── MANUAL.md │ └── PROGRAMMING_GUIDE.md ├── examples/ │ ├── .gitignore │ ├── CMakeLists.txt │ ├── bbox_wkt.cpp │ └── way_wkt.cpp ├── include/ │ └── osmx/ │ ├── cmd.h │ ├── messages.capnp │ ├── region.h │ ├── storage.h │ └── util.h ├── python/ │ ├── .gitignore │ ├── README.md │ ├── examples/ │ │ ├── augmented_diff.py │ │ ├── read_way.py │ │ └── web_server.py │ ├── osmx/ │ │ ├── __init__.py │ │ ├── messages.capnp │ │ └── osmx.py │ └── setup.py ├── src/ │ ├── cmd.cpp │ ├── expand.cpp │ ├── extract.cpp │ ├── region.cpp │ ├── storage.cpp │ └── update.cpp ├── test/ │ └── test_region.cpp └── utils/ ├── osmx-update └── server.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: "docker" directory: "/" schedule: interval: "daily" - package-ecosystem: "github-actions" directory: "/" schedule: interval: "daily" ================================================ FILE: .github/workflows/build-container.yml ================================================ name: Build and push container image on: push: branches: [ main ] pull_request: branches: [ main ] release: types: [published] jobs: build: runs-on: ubuntu-latest permissions: contents: read packages: write attestations: write id-token: write steps: - name: Check out repository uses: actions/checkout@v6 with: submodules: recursive - name: Set up Podman run: | sudo apt-get update sudo apt-get install -y podman - name: Log into the container registry if: github.event_name != 'pull_request' run: echo "${{ secrets.GITHUB_TOKEN }}" | podman login ghcr.io -u ${{ github.actor }} --password-stdin - name: Build the container image run: | # Container image identifiers must be all-lowercase. # The two commas transform "User/OSMExpress" to "user/osmexpress". IMAGE_ID=ghcr.io/${GITHUB_REPOSITORY,,} SHA_TAG=${{ github.sha }} LATEST_TAG=latest # Build the container image with SHA and latest tags. podman build -t ${IMAGE_ID}:${SHA_TAG} -t ${IMAGE_ID}:${LATEST_TAG} . # If this is a release event, tag the image with the release tag. if [ "${{ github.event_name }}" = "release" ]; then RELEASE_TAG=${{ github.event.release.tag_name }} podman tag ${IMAGE_ID}:${SHA_TAG} ${IMAGE_ID}:${RELEASE_TAG} fi - name: Push the container image to the registry if: github.event_name != 'pull_request' run: | IMAGE_ID=ghcr.io/${GITHUB_REPOSITORY,,} SHA_TAG=${{ github.sha }} LATEST_TAG=latest # Push the container image with SHA and latest tags. podman push $IMAGE_ID:$SHA_TAG podman push $IMAGE_ID:$LATEST_TAG # If this is a release event, push the image with the release tag. if [ "${{ github.event_name }}" = "release" ]; then RELEASE_TAG=${{ github.event.release.tag_name }} podman push $IMAGE_ID:$RELEASE_TAG fi ================================================ FILE: .github/workflows/codeql.yml ================================================ name: Scan for security problems with CodeQL on: push: branches: [ "main" ] pull_request: branches: [ "main" ] schedule: - cron: '17 4 * * 0' jobs: analyze: name: Analyze (${{ matrix.language }}) runs-on: 'ubuntu-latest' permissions: security-events: write packages: read # required to fetch internal or private CodeQL packs strategy: fail-fast: false matrix: include: - language: actions build-mode: none - language: c-cpp build-mode: none - language: python build-mode: none steps: - name: Check out repository uses: actions/checkout@v6 - name: Initialize CodeQL uses: github/codeql-action/init@v4 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} - name: Perform CodeQL analysis uses: github/codeql-action/analyze@v4 with: category: "/language:${{matrix.language}}" ================================================ FILE: .gitignore ================================================ CMakeCache.txt CMakeFiles *.swp *.osmx *.osmx-lock Makefile *.pbf *.cmake osmxTest venv depends a.out *.osc *.osc.gz __pycache__ /osmx *.dylib Testing/ compile_commands.json install_manifest.txt dist/*.tgz ================================================ FILE: .gitmodules ================================================ [submodule "vendor/s2geometry"] path = vendor/s2geometry url = https://github.com/google/s2geometry.git ================================================ FILE: CMakeLists.txt ================================================ cmake_minimum_required (VERSION 3.5) set(CMAKE_C_COMPILER "/usr/bin/clang") set(CMAKE_CXX_COMPILER "/usr/bin/clang++") project(OSMExpress) set(CMAKE_CXX_FLAGS_RELEASE "-O3") set(CMAKE_CXX_FLAGS_DEBUG "-DDEBUG -g") set(CMAKE_CXX_FLAGS "-Wno-deprecated") set(CMAKE_CXX_FLAGS "-Wno-deprecated-declarations") set(CMAKE_CXX_FLAGS "-pthread") set(OSMX_VERSION "0.2.0") set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "") set(ROARING_USE_CPM OFF) set(ENABLE_ROARING_TESTS OFF) list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") include(FetchContent) # TODO: Switch to a released version after next CapnProto release (post 1.2.0). # Reason: https://github.com/capnproto/capnproto/issues/2353 # Change for v1: https://github.com/capnproto/capnproto/pull/2355 # Change for v2: https://github.com/capnproto/capnproto/pull/2354 FetchContent_Declare( CapnProto GIT_REPOSITORY https://github.com/capnproto/capnproto.git GIT_TAG master EXCLUDE_FROM_ALL FIND_PACKAGE_ARGS) FetchContent_Declare( Catch2 GIT_REPOSITORY https://github.com/catchorg/Catch2.git GIT_TAG v3.8.1 EXCLUDE_FROM_ALL FIND_PACKAGE_ARGS 3) FetchContent_Declare( cxxopts GIT_REPOSITORY https://github.com/jarro2783/cxxopts.git GIT_TAG v3.3.1 EXCLUDE_FROM_ALL FIND_PACKAGE_ARGS) FetchContent_Declare( LMDB GIT_REPOSITORY https://git.openldap.org/openldap/openldap.git GIT_TAG OPENLDAP_REL_ENG_2_6_10 EXCLUDE_FROM_ALL FIND_PACKAGE_ARGS) FetchContent_Declare( nlohmann_json GIT_REPOSITORY https://github.com/nlohmann/json.git GIT_TAG v3.12.0 EXCLUDE_FROM_ALL FIND_PACKAGE_ARGS) FetchContent_Declare( Osmium GIT_REPOSITORY https://github.com/osmcode/libosmium.git GIT_TAG v2.22.0 SOURCE_SUBDIR test/catch EXCLUDE_FROM_ALL FIND_PACKAGE_ARGS) FetchContent_Declare( Protozero GIT_REPOSITORY https://github.com/mapbox/protozero.git GIT_TAG v1.8.0 EXCLUDE_FROM_ALL FIND_PACKAGE_ARGS) FetchContent_Declare( roaring GIT_REPOSITORY https://github.com/RoaringBitmap/CRoaring.git GIT_TAG v4.3.6 EXCLUDE_FROM_ALL FIND_PACKAGE_ARGS) FetchContent_MakeAvailable( CapnProto Catch2 cxxopts LMDB nlohmann_json Osmium Protozero roaring) if(NOT CapnProto_FOUND) add_subdirectory(${capnproto_SOURCE_DIR} EXCLUDE_FROM_ALL) endif() if(NOT TARGET LMDB::LMDB) set(LMDB_INCLUDE_DIR ${lmdb_SOURCE_DIR}/libraries/liblmdb) add_library( LMDB_LMDB STATIC ${lmdb_SOURCE_DIR}/libraries/liblmdb/mdb.c ${lmdb_SOURCE_DIR}/libraries/liblmdb/midl.c) target_include_directories(LMDB_LMDB PUBLIC ${LMDB_INCLUDE_DIR}) add_library(LMDB::LMDB INTERFACE IMPORTED) set_target_properties( LMDB::LMDB PROPERTIES INTERFACE_LINK_LIBRARIES LMDB_LMDB INTERFACE_INCLUDE_DIRECTORIES ${LMDB_INCLUDE_DIR}) endif() if(NOT OSMIUM_FOUND) add_library(Osmium INTERFACE) include_directories(SYSTEM ${osmium_SOURCE_DIR}/include) endif() if(NOT Protozero_FOUND) add_library(Protozero INTERFACE) include_directories(SYSTEM ${protozero_SOURCE_DIR}/include) endif() add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/vendor/s2geometry EXCLUDE_FROM_ALL) include_directories(vendor/s2geometry/src) include_directories(include) # needed for Expat install dir if(CMAKE_SYSTEM_NAME STREQUAL FreeBSD) include_directories(/usr/local/include) link_directories(osmx /usr/local/lib) endif() set(CAPNPC_OUTPUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/capnpc_generated) file(MAKE_DIRECTORY ${CAPNPC_OUTPUT_DIR}) capnp_generate_cpp(CAPNP_SRCS CAPNP_HDRS include/osmx/messages.capnp) add_executable( osmx src/cmd.cpp src/storage.cpp src/expand.cpp src/extract.cpp src/update.cpp src/region.cpp ${CAPNP_SRCS}) add_dependencies(osmx s2) target_include_directories( osmx PUBLIC include ${CAPNPC_OUTPUT_DIR}/include) target_link_libraries( osmx bz2 CapnProto::capnp cxxopts::cxxopts expat LMDB::LMDB nlohmann_json::nlohmann_json roaring s2 z) set_property(TARGET osmx PROPERTY CXX_STANDARD 14) add_executable(osmxTest test/test_region.cpp src/region.cpp) set_property(TARGET osmxTest PROPERTY CXX_STANDARD 14) target_include_directories( osmxTest PUBLIC include ${CAPNPC_OUTPUT_DIR}/include) target_link_libraries( osmxTest bz2 CapnProto::capnp cxxopts::cxxopts expat LMDB::LMDB nlohmann_json::nlohmann_json roaring s2 z Catch2::Catch2WithMain) enable_testing() add_test(osmxTest osmxTest) install(TARGETS osmx DESTINATION bin) add_custom_target(archive COMMAND dist/archive.sh ${OSMX_VERSION} ${CMAKE_SYSTEM_NAME}) add_dependencies(archive osmx) add_library( osmx-static STATIC src/storage.cpp src/expand.cpp src/extract.cpp src/update.cpp src/region.cpp) set_property(TARGET osmx-static PROPERTY CXX_STANDARD 14) target_include_directories( osmx-static PUBLIC include ${CAPNPC_OUTPUT_DIR}/include) target_link_libraries( osmx-static bz2 CapnProto::capnp cxxopts::cxxopts expat LMDB::LMDB nlohmann_json::nlohmann_json roaring s2 z) ================================================ FILE: Dockerfile ================================================ FROM alpine:3.22 AS builder # TODO: Add croaring-dev once available in Alpine Linux. # https://gitlab.alpinelinux.org/alpine/aports/-/merge_requests/87769 RUN apk add --no-cache \ clang \ cmake \ git \ linux-headers \ make \ python3-dev \ \ bzip2-dev \ catch2-3 \ capnproto-dev \ cxxopts-dev \ expat-dev \ libosmium-dev \ lmdb-dev \ nlohmann-json \ openssl-dev \ protozero-dev \ zlib-dev WORKDIR /usr/src/osmexpress COPY . /usr/src/osmexpress RUN cmake -DCMAKE_BUILD_TYPE=Release . RUN make -j16 && ./osmxTest && make install FROM alpine:3.22 # cxxopts, libosmium, nlohmann-json and protozero are header-only # C++ libraries; catch2 is only used for testing. We do not need # them in the production container. RUN apk add --no-cache \ libbz2 \ libcrypto3 \ capnproto \ libexpat \ libssl3 \ lmdb \ zlib COPY --from=builder /usr/local/bin/osmx /usr/local/bin/osmx ENTRYPOINT [ "/usr/local/bin/osmx" ] ================================================ FILE: LICENSE.md ================================================ Copyright 2019 Protomaps. Some source code from https://github.com/osmcode/pyosmium Copyright (c) 2014-2018, Sarah Hoffmann, All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ # OSM Express ![Screenshot](examples/screenshot.png) [Manual](docs/MANUAL.md), [Programming Guide](docs/PROGRAMMING_GUIDE.md) OSM Express is a fast storage format for OpenStreetMap that powers [SliceOSM](https://github.com/SliceOSM). It's designed as a low level building block specific to the OSM data model; common access patterns such as random lookups by ID, in-place minutely updates, and spatial queries are efficient and simple to manage in production applications. ## Features * **Random access:** Look up nodes, ways and relations and their metadata by ID; fetch member elements of ways and relations to construct geometries. * **Spatial indexing:** Nodes are bucketed into [S2 Geometry](http://s2geometry.io) cells. Access a region by providing a cell covering; works for nonrectangular regions. * **Scalable:** OSM Express works the same way for OSM data of any size, from a small city to the entire planet. The entire planet can be worked with efficiently on typical hardware such as a laptop computer. * **In-place updates:** Included are scripts to download minutely changesets from [planet.openstreetmap.org](https://planet.openstreetmap.org) and apply them to an .osmx database. * **Concurrent access:** Multiple processes can open the database file for reading simultaneously. No running server process is required. Writing minutely updates doesn't block reader access. Reads and writes are transactional. * **Portable:** An .osmx file can be read and written to from either C++ or Python. ## Details OSM Express is a compact 1,500 LOC, and really a cobbling together of a few low-level libraries: * [Libosmium](https://osmcode.org/libosmium/index.html) for the reading and writing of .osm.pbf files. * [LMDB](https://symas.com/lmdb) for a memory-mapped ACID key-value store with fast cursor iteration. * [Cap'n Proto](https://capnproto.org) for in-memory and on-disk representation of OSM elements. * [CRoaring](https://roaringbitmap.org) for in-memory representation of ID sets as compressed bitmaps. * [S2 Geometry](http://s2geometry.io) for indexing of geographic coordinates. ## Installation [See the manual for instructions on building from source](/docs/PROGRAMMING_GUIDE.md). ## Usage OSM Express is being used in production for [SliceOSM](https://slice.openstreetmap.us) and the file format is stable. * Use the `osmx` command line tool to expand a .osm.pbf to an .osmx database and perform basic tasks such as extracting regions or querying by ID. No programming required. * Use the [Python library](python/) library via `pip install osmx` to access an .osmx database programatically. See the [Python Examples](python/examples) for how to create command line tools, webservers or detailed diffs based on minutely data. * Use the C++ library to access an .osmx database programatically. ### Command line ```bash osmx expand planet.osm.pbf planet.osmx # converts a pbf or xml to osmx. Takes 5-10 hours for the planet, resulting in a ~600GB file. osmx extract planet.osmx extract.osm.pbf --bbox 40.7411\,-73.9937\,40.7486\,-73.9821 # extract a new pbf for the given bounding box. osmx update planet.osmx 3648548.osc 3648548 2019-08-29T17:50:02Z --commit # applies an OsmChange diff. osmx query planet.osmx # Print statistics, seqnum and timestamp. osmx query planet.osmx way 34633854 # look up an element by ID. ``` `osmx extract` has a flag `--noUserData` intended for public facing instances which will remove the user, uid and changeset fields to comply with [GDPR guidelines](https://wiki.openstreetmap.org/wiki/GDPR). Detailed command line usage can be found in the [Manual](docs/MANUAL.md). ### Headers The C++ API is very rough with minimal abstraction. [examples/way_wkt.cpp](examples/way_wkt.cpp) is a short, commented C++ program that uses the headers to read a way from a .osmx file and outputs its [Well-Known Text](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) LineString geometry. ```bash ./way_wkt ../ny.osmx 34633854 Empire State Building LINESTRING (-73.9864855 40.7484833,-73.9851554 40.7479226,-73.9848259 40.7483735,-73.9861526 40.7489422,-73.9863111 40.7487242,-73.9863282 40.7487007,-73.9864684 40.7485078,-73.9864855 40.7484833) ``` [examples/bbox_wkt.cpp](examples/bbox_wkt.cpp) is a more complex example that takes a bounding box as input, and returns WKT LineStrings for ways that overlap the bbox. This overlap is an approximation based on cells and may include ways outside the bounding box. Detailed C++ usage can be found in the [Programming Guide](docs/PROGRAMMING_GUIDE.md). ### Docker (experimental) A `Dockerfile` is provided but users will need to build their own container. To do so, run: ``` docker build -t osmx . ``` ## License and Development 2-Clause BSD, see [LICENSE.md](LICENSE.md). ================================================ FILE: dist/archive.sh ================================================ #!/bin/bash set -e FILENAME=dist/osmexpress-$1-$2.tgz rm -f LICENSES printf "osmexpress\n===========\n" >> LICENSES cat LICENSE.md >> LICENSES printf "\ncapnproto\n===========\n" >> LICENSES cat vendor/capnproto/LICENSE >> LICENSES printf "\ncroaring\n===========\n" >> LICENSES cat vendor/CRoaring/LICENSE >> LICENSES printf "\ncxxopts\n===========\n" >> LICENSES cat vendor/cxxopts/LICENSE >> LICENSES printf "\njson\n===========\n" >> LICENSES cat vendor/json/LICENSE.MIT >> LICENSES printf "\nlibosmium\n===========\n" >> LICENSES cat vendor/libosmium/LICENSE >> LICENSES printf "\nlmdb\n===========\n" >> LICENSES cat vendor/lmdb/libraries/liblmdb/LICENSE >> LICENSES printf "\nprotozero\n===========\n" >> LICENSES cat vendor/protozero/LICENSE.md >> LICENSES printf "\ns2\n===========\n" >> LICENSES cat vendor/s2geometry/LICENSE >> LICENSES tar -cvzf $FILENAME osmx LICENSES rm LICENSES echo "created $FILENAME" ================================================ FILE: docs/MANUAL.md ================================================ **OSM Express** is a database file format for OpenStreetMap data (.osmx), as well as a command line tool and C++ library for reading and writing .osmx files. Find it on GitHub at [github.com/bdon/OSMExpress](https://github.com/bdon/OSMExpress) ![screenshot](https://github.com/bdon/OSMExpress/blob/main/examples/screenshot.png?raw=true) *Illustration of the cell covering for a rectangular input region and its overlap with indexed OpenStreetMap geometries.* ## Motivation Here are some use cases that OSM Express fits well. * You want an offline copy of OpenStreetMap, which can be updated every day, hour or minute from the main openstreetmap.org database, instead of redownloading the entire planet. * You want to quickly access all OSM objects in a geographical region, such as as neighborhood, city or small country. * You want to quickly look up OSM objects by ID, such as getting the `height` and `name` tags for a given way that represents a building, and construct geometries for ways and relations. * You want to embed a database that does any of the above, such as in a web application that returns OSM objects as GeoJSON. ## Quick Start ### Command Line For information on how to compile the `osmx` program from source, see the [Programming Guide.](/docs/PROGRAMMING_GUIDE.md) Once you have the `osmx` command line program, you'll need to start with an .osm.pbf or OSM XML file. The Planet file is available at [planet.openstreetmap.org](https://planet.openstreetmap.org), but it's preferable to begin with something smaller to learn with. There are numerous sites for downloading .osm.pbf extracts, including [SliceOSM](https://slice.openstreetmap.us), a service itself powered by OSM Express. Example: create an .osmx file by using the `expand` command on the .osm.pbf file: osmx expand new_york_county.osm.pbf new_york_county.osmx This will result in a 91 MB .osmx file. We can access objects inside this .osmx file by ID, displaying the node IDs of its member nodes and all tags: osmx query new_york_county.osmx way 34633854 > 402743563 402743567 402743571 402743573 2709307502 2709307499 2709307464 402743563 addr:city=New York City addr:housenumber=350 addr:postcode=10018 ... We can also extract regions of the .osmx file into a new .osm.pbf file, which is useful for interoperability with other OSM tools. osmx extract new_york_county.osmx downtown.osm.pbf --bbox 40.7411\,-73.9937\,40.7486\,-73.9821 ### Updating `utils/osmx-update` is provided to update `.osmx` to the most recent file on a replication server using `osmx update`. For example to update a planet.osmx file with minutely updates: python utils/osmx-update planet.osmx https://planet.openstreetmap.org/replication/minute/ ## Library the OSM Express library is intentionally minimal and non-opinionated - for example, no attempt is made to transform OSM tags to a fixed schema, distinguish between polygon and linear ways, or assemble multipolygon relations into polygons. For these typical tasks it's recommended to use OSM Express as a library in your own program. Documentation and example code are available at the [Programming Guide.](/docs/PROGRAMMING_GUIDE.md) ## Other Languages An .osmx file can be opened and queried direcly in a Python program using the `osmx` Python package. See [Python](/docs/PROGRAMMING_GUIDE.md#python) for details. Languages other than Python may be supported in the future by either language-specific libraries or a new C API. See [Development](#Development) if you're interested or discuss on GitHub. ## Technical Details ### Storage Requirements A full planet.osmx created from planet.osm.pbf (47 GB) is around 580 GB. OSM Express is optimized for fast lookups, extracts and updates, goals opposed to making the database size as compact as possible. A typical .osmx file can be 10 times the size of the corresponding .osm.pbf, because: * Relationships between parent elements and member elements are encoded in both directions, to enable lookups from node to way, way to relation, etc. * The storage engine (LMDB) has no built-in compression, unlike some LSM-tree storage engines such as LevelDB. * The `mmap`-based design of LMDB and Cap'n Proto requires that fields are word-aligned on disk, causing storage overhead. * Keys and values are stored in full as strings. Keys could be hardcoded in a lookup table, saving about 10% space, but this would make the database less portable. As of 2019, fast local storage is cheap; 1 terabyte solid state drives are less than 150 USD. On managed hosting providers like AWS and Google Cloud, extra storage is affordable compared to more memory or CPU cores. If it's necessary to optimize for storage space, an .osmx file can be stored on a filesystem with transparent compression such as ZFS or Btrfs, at the cost of CPU overhead. This can reduce planet.osmx to around 200GB. ### Privacy OSM Express stores all metadata - version, timestamp, changeset, username and user ID - for all OSM objects, except for untagged nodes. The `osmx extract` `--noUserData` flag ignores changeset, username and user ID information for extracts, to comply with [GDPR guidelines](https://wiki.openstreetmap.org/wiki/GDPR). ### Performance OSM Express should work with reasonable amounts of memory, less than 8 gigabytes, even for `expand` and `extract` on planet.osmx. The strongest predictor of performance is I/O latency. If benchmarking different storage environments, I/O latency can be best measured via IOPS at queue depth 1. *WIP: benchmarks* ## Alternatives * [osmium-tool](https://osmcode.org/osmium-tool/index.html) for creating extracts from osm.pbf files. This is more efficient for large country or continent sized extracts, or any task where the entire dataset needs to be read. * [Overpass API](http://overpass-api.de) is a powerful server application for interactive querying and tag-based lookup of OSM data. * [conveyal/osm-lib](https://github.com/conveyal/osm-lib) is a similar design, written in Java. * [imposm3](https://github.com/omniscale/imposm3), [osm2pgsql](https://github.com/openstreetmap/osm2pgsql) if you want OSM data in PostgreSQL and/or want to render maps. ## Concepts ### File Layout The `osmx query` command with no arguments reveals the layout of an .osmx database: osmx query planet.osmx locations: 5313351219 nodes: 144307630 ways: 590470034 relations: 6895065 cell_node: 5313351219 node_way: 5906888644 node_relation: 10242142 way_relation: 63350432 relation_relation: 497137 an .osmx file is a LMDB database with 10 sub-databases. All keys are 64 bit integers in [host byte order](https://en.wikipedia.org/wiki/Endianness) (little-endian on most modern CPUs). * `locations`: maps OSM node IDs to Locations, which store the coordinates and version number of the node (documented below). * `nodes`, `ways`, `relations` map OSM object IDs to a Cap'n Proto message defined in [`include/osmx/messages.capnp`](https://github.com/bdon/OSMExpress/blob/main/include/osmx/messages.capnp). - `nodes` only contains *tagged* nodes; the value for each key describes the node's tags and other metadata. Untagged nodes are included only in `locations` to save space on disk. - `ways` contains all ways; the value for each key describes the way's tags, metadata, and the list of node IDs that are part of the way. - `relations` contains all relations; the value for each key contains the relation's tags, metadata, and the IDs and roles of its members. * `cell_node` maps a level 16 [S2 cell ID](http://s2geometry.io/devguide/s2cell_hierarchy.html) to a node ID, using LMDB's `DUPSORT` to store multiple values for each key (since each S2 cell will intersect many OSM objects). * `node_way`, `node_relation`, `way_relation` and `relation_relation` map OSM object IDs to their parent object IDs, also using `DUPSORT` (since nodes can belong to multiple ways, ways to multiple relations, etc). Finally, the `metadata` sub-database holds arbitrary string:string values. This is used to store the replication sequence number and timestamp. It is important to note that LMDB transactions span all sub-databases. This means that a read operation will retrieve the correct `timestamp` for the data it fetches, even if the database is written to while the read is happening. #### Encoding of Locations Values in the `locations` sub-database are structs with the following layout: ```c struct Location { int32_t longitude_i; int32_t latitude_i; int32_t version; }; ``` Each field is serialized in host byte order. Longitude and latitude are stored as integers. To obtain the actual longitude and latitude as decimal numbers, divide the integer value by 10000000 (1e7). This integer-based encoding is precise to within a few centimeters anywhere on Earth. The same encoding is used by [libosmium](https://docs.osmcode.org/libosmium/latest/classosmium_1_1Location.html) and by the openstreetmap.org database internally. ### Spatial Indexing OSM Express avoids expensive point-in-polygon computations for spatial operations. Instead, a query region is approximated by S2 cells with maximum level 16. The level 16 is chosen as a reasonable tradeoff between covering precision and storage space. *Author's note: the S2 Covering of a region may differ depending on choice of architecture and compiler, while still being valid. Let me know if you know how to make this consistent.* ## Presentations [State of the Map US 2019, Minneapolis - Video](https://2019.stateofthemap.us/program/sun/osm-express-a-spatial-file-format-for-the-planet.html) ================================================ FILE: docs/PROGRAMMING_GUIDE.md ================================================ ## Building from source OSM Express uses CMake for its build scripts. It's only been tested with the Clang C++ compiler so far. Most dependencies are included as Git submodules in the `vendor/` directory, but a few stable, common libraries are expected to exist on your system, including bzip2, zlib, Expat and OpenSSL. ### FreeBSD 12 `sudo pkg install cmake expat` ### macOS via Homebrew: `brew install cmake bzip2 zlib openssl expat` *Additional macOS notes: the Clang compiler should be available via XCode Command Line Tools.* ### Ubuntu 22.04 via Apt package manager: `sudo apt install cmake clang libbz2-dev libz-dev libexpat-dev libssl-dev python3-dev` ### Build Instructions git clone --recursive https://github.com/bdon/OSMExpress.git cd OSMExpress cmake -DCMAKE_BUILD_TYPE=Release . make *macOS note: If OpenSSL is installed through Homebrew, you may need to add an option to your cmake command: `-DOPENSSL_ROOT_DIR=/usr/local/opt/openssl\@3` For macOS systems with Apple Silicon, this path is `-DOPENSSL_ROOT_DIR=/opt/homebrew/opt/openssl\@3` ## Using the C++ Headers ### Example: Way ID to WKT See [examples/way_wkt.cpp](https://github.com/bdon/OSMExpress/blob/main/examples/way_wkt.cpp) for a commented program. ### Example: Bbox to Way WKTs See [examples/bbox_wkt.cpp](https://github.com/bdon/OSMExpress/blob/main/examples/way_wkt.cpp) for a commented program. ## Python Install the library with `pip install osmx` . This will also download and install the `pycapnp` and `lmdb` Python libraries. The Python API supports only location, node, way and relation lookups at the moment. Example: import osmx env = osmx.Environment('planet.osmx') txn = osmx.Transaction(env) locations = osmx.Locations(txn) nodes = osmx.Nodes(txn) ways = osmx.Ways(txn) way = ways.get(123456) for node_id in way.nodes: print(locations.get(node_id)) print(osmx.tag_dict(way.tags)) ================================================ FILE: examples/.gitignore ================================================ way_wkt bbox_wkt ================================================ FILE: examples/CMakeLists.txt ================================================ cmake_minimum_required (VERSION 3.5) set(CMAKE_CXX_FLAGS_RELEASE "-O3") set(CMAKE_CXX_FLAGS_DEBUG "-DDEBUG -g") set(CMAKE_CXX_FLAGS "-Wno-deprecated") set(CMAKE_CXX_FLAGS "-Wno-deprecated-declarations") set(CMAKE_CXX_FLAGS "-pthread") include_directories(../vendor/libosmium/include) include_directories(../vendor/protozero/include) include_directories(../vendor/s2geometry/src) include_directories(../vendor/CRoaring/cpp) include_directories(../vendor/CRoaring/include) include_directories(../vendor/cxxopts/include) include_directories(/usr/local/include) include_directories(../depends) include_directories(../include) include_directories(../vendor/lmdb/libraries/liblmdb) include_directories(../vendor/capnproto/c++/src) link_directories(../vendor/s2geometry) link_directories(/usr/local/lib) link_directories(../vendor/CRoaring) link_directories(../vendor/capnproto) link_directories(../vendor/lmdb/libraries/liblmdb/) link_directories(../vendor/capnproto/c++/src/capnp/) link_directories(../vendor/capnproto/c++/src/kj/) link_directories(../vendor/CRoaring/src/) link_directories(${OPENSSL_ROOT_DIR}lib/) add_executable(way_wkt way_wkt.cpp ../src/storage.cpp) target_link_libraries(way_wkt lmdb z expat bz2 s2 capnp kj roaring ssl crypto) set_property(TARGET way_wkt PROPERTY CXX_STANDARD 14) add_executable(bbox_wkt bbox_wkt.cpp ../src/storage.cpp) target_link_libraries(bbox_wkt lmdb z expat bz2 s2 capnp kj roaring ssl crypto) set_property(TARGET bbox_wkt PROPERTY CXX_STANDARD 14) ================================================ FILE: examples/bbox_wkt.cpp ================================================ #include #include #include "osmx/storage.h" #include "osmx/util.h" #include "s2/s2latlng.h" #include "s2/s2region_coverer.h" #include "s2/s2latlng_rect.h" #include "roaring/roaring64map.hh" using namespace std; // Example of a very simple program to get OSM objects in a region // and print them out as WKT. // see way_wkt for a simpler example. // This program does not handle Relations at all, // so it can't be used to find all Polygons in a region, since they may be Multipolygon relations. // Usage: ./bbox_wkt OSMX_FILE MIN_LON MIN_LAT MAX_LON MAX_LAT int main(int argc, char* argv[]) { vector args(argv, argv+argc); MDB_env* env = osmx::db::createEnv(args[1]); MDB_txn* txn; CHECK_LMDB(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn)); // Create a S2LatLngRect. auto lo = S2LatLng::FromDegrees(stof(args[3]),stof(args[2])); auto hi = S2LatLng::FromDegrees(stof(args[5]),stof(args[4])); auto bbox = S2LatLngRect{lo,hi}; // Find the cell covering for the LatLngRect, // with a maximum cell level of 16. // Although nodes in the database are stored at level=16, // Cells with levels less than 16 will be correctly handled by the traverseCell function. // This allows for more compact representations of large regions. S2RegionCoverer::Options options; options.set_max_level(16); S2RegionCoverer coverer(options); S2CellUnion covering = coverer.GetCovering(bbox); cerr << "Cell covering size: " << covering.size() << endl; // Get all node_ids that match the given region. Roaring64Map node_ids; MDB_dbi dbi; MDB_cursor *cursor; CHECK_LMDB(mdb_dbi_open(txn, "cell_node", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi)); CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor)); for (auto cell_id : covering.cell_ids()) { osmx::db::traverseCell(cursor,cell_id,node_ids); } mdb_cursor_close(cursor); cerr << "Nodes in region: " << node_ids.cardinality() << endl; // Get all way_ids that are referred to by node_ids. Roaring64Map way_ids; CHECK_LMDB(mdb_dbi_open(txn, "node_way", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi)); CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor)); for (auto const &node_id : node_ids) { osmx::db::traverseReverse(cursor,node_id,way_ids); } mdb_cursor_close(cursor); cerr << "Ways in region: " << way_ids.cardinality() << endl; osmx::db::Locations locations(txn); osmx::db::Elements ways(txn,"ways"); for (auto way_id : way_ids) { // Fetch a Way element by ID. auto message = ways.getReader(way_id); auto way = message.getRoot(); // Tags are stored as a vector of key,value. // Iterate through all tags and print the value if key = name. auto tags = way.getTags(); for (int i = 0; i < tags.size() / 2; i++) { if (tags[i*2] == "name") cout << tags[i*2+1].cStr(); } // Assemble a WKT LineString geometry. cout << "\tLINESTRING ("; cout << std::fixed << std::setprecision(7); // the output should have 7 decimal places. auto nodes = way.getNodes(); for (int i = 0; i < nodes.size(); i++) { auto location = locations.get(nodes[i]); if (i > 0) cout << ","; cout << location.coords.lon() << " " << location.coords.lat(); } cout << ")" << endl; } mdb_env_close(env); // close the database. } ================================================ FILE: examples/way_wkt.cpp ================================================ #include #include #include "osmx/storage.h" #include "osmx/util.h" using namespace std; // Example of a very simple C++ program that uses osmx headers // to open a database, look up a way by ID, and assemble a WKT geometry from its nodes. // Usage: ./print_wkt OSMX_FILE WAY_ID int main(int argc, char* argv[]) { vector args(argv, argv+argc); // Opening a database: create an Environment, and then a Transaction within the environment. MDB_env* env = osmx::db::createEnv(args[1]); MDB_txn* txn; CHECK_LMDB(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn)); // Create a Database handle for each element type within the Transaction. osmx::db::Locations locations(txn); osmx::db::Elements ways(txn,"ways"); // Fetch a Way element by ID. auto message = ways.getReader(stol(args[2])); auto way = message.getRoot(); // Tags are stored as a vector of key,value. // Iterate through all tags and print the value if key = name. auto tags = way.getTags(); for (int i = 0; i < tags.size() / 2; i++) { if (tags[i*2] == "name") cout << tags[i*2+1].cStr(); } // Assemble a WKT LineString geometry. cout << "\tLINESTRING ("; cout << std::fixed << std::setprecision(7); // the output should have 7 decimal places. auto nodes = way.getNodes(); for (int i = 0; i < nodes.size(); i++) { auto location = locations.get(nodes[i]); if (i > 0) cout << ","; cout << location.coords.lon() << " " << location.coords.lat(); } cout << ")" << endl; mdb_env_close(env); // close the database. } ================================================ FILE: include/osmx/cmd.h ================================================ void cmdExpand(int argc, char* argv[]); void cmdExtract(int argc, char* argv[]); void cmdUpdate(int argc, char* argv[]); ================================================ FILE: include/osmx/messages.capnp ================================================ @0xd3a7e843a9c03421; struct Metadata { version @0 :UInt32; timestamp @1 :UInt64; changeset @2 :UInt32; uid @3 :UInt32; user @4 :Text; } struct Node { tags @0 :List(Text); metadata @1 :Metadata; } struct Way { nodes @0 :List(UInt64); tags @1 :List(Text); metadata @2 :Metadata; } struct RelationMember { ref @0 :UInt64; type @1 :Type; role @2 :Text; enum Type { node @0; way @1; relation @2; } } struct Relation { tags @0 :List(Text); members @1 :List(RelationMember); metadata @2 :Metadata; } ================================================ FILE: include/osmx/region.h ================================================ #include #include #include "s2/s2region.h" #include "s2/s2cell_union.h" #include "s2/s2region_coverer.h" #include "s2/s2latlng_rect.h" class Region { public: Region(const std::string &text, const std::string &ext); bool Contains(S2Point p); S2CellUnion GetCovering(S2RegionCoverer &coverer); S2LatLngRect GetBounds(); private: void AddS2RegionFromGeometry(nlohmann::json &geometry); void AddS2RegionFromPolyFile(std::istringstream &file); std::vector> mRegions; }; ================================================ FILE: include/osmx/storage.h ================================================ #pragma once #include "lmdb.h" #include "osmium/osm/location.hpp" #include "kj/io.h" #include "capnp/message.h" #include "capnp/serialize.h" #include "osmx/messages.capnp.h" #include "osmx/util.h" #include "s2/s2cell_id.h" #include "roaring/roaring64map.hh" namespace osmx { namespace db { uint64_t to64(osmium::Location loc); osmium::Location toLoc(uint64_t val); MDB_env *createEnv(std::string path, bool writable = false); class Noncopyable { public: Noncopyable() { } Noncopyable( const Noncopyable& ) = delete; Noncopyable& operator=( const Noncopyable& ) = delete; }; class Metadata : public Noncopyable { public: Metadata(MDB_txn *txn); void put(const std::string &key_str, const std::string &value_str); std::string get(const std::string &key_str); private: MDB_txn* mTxn; MDB_dbi mDbi; }; class Elements : public Noncopyable { public: Elements(MDB_txn *txn, const std::string &name); void put(uint64_t id, kj::VectorOutputStream &vos, int flags = 0); void del(uint64_t id); bool exists(uint64_t id); capnp::FlatArrayMessageReader getReader(uint64_t id); private: MDB_txn *mTxn; MDB_dbi mDbi; }; class Location { public: Location() { }; Location(osmium::Location l, int32_t v) : coords(l), version(v) { } bool is_undefined() { return coords.is_undefined(); } bool is_defined() { return coords.is_defined(); } osmium::Location coords; int32_t version; }; class Locations : public Noncopyable { public: Locations(MDB_txn *txn); void put(uint64_t id, const Location value, int flags = 0); void del(uint64_t id); bool exists(uint64_t id); Location get(uint64_t id) const; private: MDB_txn* mTxn; MDB_dbi mDbi; }; class Index : public Noncopyable { public: Index(MDB_txn *txn, const std::string &name); void put(uint64_t from, uint64_t osm_id, int flags = 0); void del(uint64_t from, uint64_t osm_id ); private: MDB_dbi mDbi; MDB_txn *mTxn; }; class IndexWriter : public Noncopyable { public: IndexWriter(MDB_env *env, const std::string &name); void put(uint64_t from, uint64_t osm_id, int flags = 0); void commit(); private: MDB_env *mEnv; MDB_dbi mDbi; MDB_txn *mTxn; std::string mName; int mWrites = 0; }; void traverseCell(MDB_cursor *cursor, S2CellId cell_id, roaring::Roaring64Map &set); void traverseReverse(MDB_cursor *cursor, uint64_t from, roaring::Roaring64Map &set); } } ================================================ FILE: include/osmx/util.h ================================================ #pragma once #include #include #include "lmdb.h" #include "osmium/tags/taglist.hpp" #define CHECK_LMDB(x) if (0 != x) { printf("%s, file %s, line %d.\n", mdb_strerror(x), __FILE__, __LINE__); abort(); } // a higher cell level results in more precise extracts, as the size of 1 cell is the minimum index resolution. #define CELL_INDEX_LEVEL 16 class Timer { public: Timer(std::string name) : mName(name) { mStartTime = std::chrono::high_resolution_clock::now(); std::cout << "Start " << mName << std::endl; } ~Timer() { auto duration = std::chrono::duration_cast( std::chrono::high_resolution_clock::now() - mStartTime ).count(); std::cout << "Finished " << mName << " in " << duration/1000.0 << " seconds." << std::endl; } private: std::chrono::high_resolution_clock::time_point mStartTime; std::string mName; }; template void setTags(const osmium::TagList &tags, T &builder) { builder.initTags(tags.size() * 2); auto tagBuilder = builder.getTags(); int i = 0; for (auto const &tag : tags) { tagBuilder.set(i,tag.key()); i++; tagBuilder.set(i,tag.value()); i++; } } ================================================ FILE: python/.gitignore ================================================ build dist *.egg-info ================================================ FILE: python/README.md ================================================ A Python package to read OSM Express (.osmx) database files. ## Installation ```bash pip install osmx ``` ## Usage [examples/read_way.py](examples/read_way.py) : Simple program: given a way ID, print the coordinates of its member nodes, its metadata and all the relations it directly belongs to. [examples/web_server.py](examples/web_server.py) Uses only the Python standard library; starts an HTTP server that takes a url like /way/WAY_ID and returns a GeoJSON feature for that OSM object. Shows example of how to descend into relation members. [examples/augmented_diff.py](examples/augmented_diff.py) Creates an [augmented diff](https://wiki.openstreetmap.org/wiki/Overpass_API/Augmented_Diffs) similar to those implemented by Overpass API, but limited to a single OsmChange (.osc) replication sequence file. Requires that the OSMX database represents the replication sequence state directly before that of the .OSC file. ================================================ FILE: python/examples/augmented_diff.py ================================================ from collections import namedtuple from datetime import datetime import copy import sys import xml.etree.ElementTree as ET import xml.dom.minidom import osmx # generates an augmented diff for an OSC (OsmChange) file. # see https://wiki.openstreetmap.org/wiki/Overpass_API/Augmented_Diffs # this is intended to be run before the OSC file is applied to the osmx file. if len(sys.argv) < 4: print("Usage: augmented_diff.py OSMX_FILE OSC_FILE OUTPUT") exit(1) # 1st pass: # populate the collection of actions # create dictionary from osm_type/osm_id to action # e.g. node/12345 > Node() Action = namedtuple('Action',['type','element']) actions = {} osc = ET.parse(sys.argv[2]).getroot() for block in osc: for e in block: action_key = e.tag + "/" + e.get("id") # Always ensure we're updating to the latest version of an object for the diff if action_key in actions: newest_version = int(actions[action_key].element.get("version")) e_version = int(e.get("version")) if e_version < newest_version: print("Found element {}, version {} is less than previously visited version {}" .format(action_key, e_version, newest_version)) continue actions[action_key] = Action(block.tag,e) action_list = [v for k,v in actions.items()] env = osmx.Environment(sys.argv[1]) with osmx.Transaction(env) as txn: locations = osmx.Locations(txn) nodes = osmx.Nodes(txn) ways = osmx.Ways(txn) relations = osmx.Relations(txn) def not_in_db(elem): elem_id = int(elem.get('id')) if elem.tag == 'node': return not locations.get(elem_id) elif elem.tag == 'way': return not ways.get(elem_id) else: return not relations.get(elem_id) def get_lat_lon(ref, use_new): if use_new and ('node/' + ref in actions): node = actions['node/' + ref] return (node.element.get('lon'),node.element.get('lat')) else: ll = locations.get(ref) return (str(ll[1]),str(ll[0])) def set_old_metadata(elem): elem_id = int(elem.get('id')) if elem.tag == 'node': o = nodes.get(elem_id) elif elem.tag == 'way': o = ways.get(elem_id) else: o = relations.get(elem_id) if o: elem.set('version',str(o.metadata.version)) elem.set('user',str(o.metadata.user)) elem.set('uid',str(o.metadata.uid)) # convert to ISO8601 timestamp timestamp = o.metadata.timestamp formatted = datetime.utcfromtimestamp(timestamp).isoformat() elem.set('timestamp',formatted + 'Z') elem.set('changeset',str(o.metadata.changeset)) else: # tagless nodes try: version = locations.get(elem_id)[2] except TypeError: # If loc is None here, it typically means that a node was created and # then deleted within the diff interval. In the future we should # remove these operations from the diff entirely. print("No old loc found for tagless node {}".format(elem_id)) version = "?" elem.set('version',str(version)) elem.set('user','?') elem.set('uid','?') elem.set('timestamp','?') elem.set('changeset','?') # 2nd pass # create an XML tree of actions with old and new sub-elements o = ET.Element('osm') o.set("version","0.6") o.set("generator","Overpass API not used, but achavi detects it at the start of string; OSMExpress/python/examples/augmented_diff.py") for action in action_list: a = ET.SubElement(o,'action') a.set('type',action.type) old = ET.SubElement(a,'old') new = ET.SubElement(a,'new') if action.type == 'create': new.append(action.element) elif action.type == 'delete': # get the old metadata modified = copy.deepcopy(action.element) set_old_metadata(action.element) old.append(action.element) modified.set('visible','false') for child in list(modified): modified.remove(child) # TODO the Geofabrik deleted elements seem to have the old metadata and old version numbers # check if this is true of planet replication files new.append(modified) else: obj_id = action.element.get('id') if not_in_db(action.element): # Typically occurs when: # 1. TODO: An element is deleted but then restored later, # which should remain a modify operation. This will be difficult # because objects are not retained in OSMX when deleted in OSM. # 2. OK: An element was created and then modified within the diff interval print("Could not find {0} {1} in db, changing to create".format(action.element.tag,action.element.get('id'))) new.append(action.element) a.set('type','create') else: prev_version = ET.SubElement(old,action.element.tag) prev_version.set('id',obj_id) set_old_metadata(prev_version) if action.element.tag == 'node': ll = get_lat_lon(obj_id,False) prev_version.set('lon',ll[0]) prev_version.set('lat',ll[1]) elif action.element.tag == 'way': way = ways.get(obj_id) for n in way.nodes: node = ET.SubElement(prev_version,'nd') node.set('ref',str(n)) it = iter(way.tags) for t in it: tag = ET.SubElement(prev_version,'tag') tag.set('k',t) tag.set('v',next(it)) else: relation = relations.get(obj_id) for m in relation.members: member = ET.SubElement(prev_version,'member') member.set('ref',str(m.ref)) member.set('role',m.role) member.set('type',str(m.type)) it = iter(relation.tags) for t in it: tag = ET.SubElement(prev_version,'tag') tag.set('k',t) tag.set('v',next(it)) new.append(action.element) # 3rd pass # Augment the created "old" and "new" elements def augment_nd(nd,use_new): ll = get_lat_lon(nd.get('ref'),use_new) nd.set('lon',ll[0]) nd.set('lat',ll[1]) def augment_member(mem,use_new): if mem.get('type') == 'way': ref = mem.get('ref') if use_new and ('way/' + ref in actions): way = actions['way/' + ref] for child in way.element: if child.tag == 'nd': ll = get_lat_lon(child.get('ref'),use_new) nd = ET.SubElement(mem,'nd') nd.set('lon',ll[0]) nd.set('lat',ll[1]) else: for node_id in ways.get(ref).nodes: ll = get_lat_lon(str(node_id),use_new) nd = ET.SubElement(mem,'nd') nd.set('lon',ll[0]) nd.set('lat',ll[1]) elif mem.get('type') == 'node': ll = get_lat_lon(mem.get('ref'),use_new) mem.set('lon',ll[0]) mem.set('lat',ll[1]) def augment(elem,use_new): if len(elem) == 0: return if elem[0].tag == 'way': for child in elem[0]: if child.tag == 'nd': augment_nd(child,use_new) elif elem[0].tag == 'relation': for child in elem[0]: if child.tag == 'member': augment_member(child,use_new) for elem in o: try: augment(elem[0],False) augment(elem[1],True) except (TypeError, AttributeError): print("Changed {0} {1} is incomplete in db".format(elem[1][0].tag, elem[1][0].get('id'))) # 4th pass: # find changes that propagate to referencing elements: # when a node's location changes, that propagates to any ways it belongs to, relations it belongs to # and also any relations that the way belongs to # when a way's member list changes, it propagates to any relations it belongs to node_way = osmx.NodeWay(txn) node_relation = osmx.NodeRelation(txn) way_relation = osmx.WayRelation(txn) affected_ways = set() affected_relations = set() for elem in o: if elem.get('type') == 'modify': if elem[0][0].tag == 'node': old_loc = (elem[0][0].get('lat'),elem[0][0].get('lon')) new_loc = (elem[1][0].get('lat'),elem[1][0].get('lon')) if old_loc != new_loc: node_id = elem[0][0].get('id') for rel in node_relation.get(node_id): if 'relation/' + str(rel) not in actions: affected_relations.add(rel) for way in node_way.get(node_id): if 'way/' + str(way) not in actions: affected_ways.add(way) for rel in way_relation.get(way): if 'relation/' + str(rel) not in actions: affected_relations.add(rel) elif elem[0][0].tag == 'way': old_way = [nd.get('ref') for nd in elem[0][0] if nd.tag == 'nd'] new_way = [nd.get('ref') for nd in elem[1][0] if nd.tag == 'nd'] if old_way != new_way: way_id = elem[0][0].get('id') for rel in way_relation.get(way_id): if 'relation/' + str(rel) not in actions: affected_relations.add(rel) for w in affected_ways: a = ET.SubElement(o,'action') a.set('type','modify') old = ET.SubElement(a,'old') way_element = ET.SubElement(old,'way') way_element.set('id',str(w)) set_old_metadata(way_element) way = ways.get(w) for n in way.nodes: node = ET.SubElement(way_element,'nd') node.set('ref',str(n)) it = iter(way.tags) for t in it: tag = ET.SubElement(way_element,'tag') tag.set('k',t) tag.set('v',next(it)) new = ET.SubElement(a,'new') new_elem = copy.deepcopy(way_element) new.append(new_elem) augment(old,False) augment(new,True) for r in affected_relations: old = ET.Element('old') relation_element = ET.SubElement(old,'relation') relation_element.set('id',str(r)) set_old_metadata(relation_element) relation = relations.get(r) for m in relation.members: member = ET.SubElement(relation_element,'member') member.set('ref',str(m.ref)) member.set('role',m.role) member.set('type',str(m.type)) it = iter(relation.tags) for t in it: tag = ET.SubElement(relation_element,'tag') tag.set('k',t) tag.set('v',next(it)) new_elem = copy.deepcopy(relation_element) new = ET.Element('new') new.append(new_elem) try: augment(old,False) augment(new,True) a = ET.SubElement(o,'action') a.set('type','modify') a.append(old) a.append(new) except (TypeError, AttributeError): print("Affected relation {0} is incomplete in db".format(r)) # 5th pass: add bounding boxes class Bounds: def __init__(self): self.minx = 180 self.maxx = -180 self.miny = 90 self.maxy = -90 def add(self,x,y): if x < self.minx: self.minx = x if x > self.maxx: self.maxx = x if y < self.miny: self.miny = y if y > self.maxy: self.maxy = y def elem(self): e = ET.Element('bounds') e.set('minlat',str(self.miny)) e.set('minlon',str(self.minx)) e.set('maxlat',str(self.maxy)) e.set('maxlon',str(self.maxx)) return e for child in o: if len(child[0]) > 0: osm_obj = child[0][0] nds = osm_obj.findall('.//nd') if nds: bounds = Bounds() for nd in nds: bounds.add(float(nd.get('lon')),float(nd.get('lat'))) osm_obj.insert(0,bounds.elem()) # 6th pass # sort by node, way, relation # within each, sorted by increasing ID def sort_by_type(x): if x[1][0].tag == 'node': return 1 elif x[1][0].tag == 'way': return 2 return 3 o[:] = sorted(o, key=lambda x:int(x[1][0].get('id'))) o[:] = sorted(o, key=sort_by_type) note = ET.Element('note') note.text = "The data included in this document is from www.openstreetmap.org. The data is made available under ODbL." o.insert(0,note) # pretty print helper # http://effbot.org/zone/element-lib.htm#prettyprint def indent(elem, level=0): i = "\n" + level*" " if len(elem): if not elem.text or not elem.text.strip(): elem.text = i + " " if not elem.tail or not elem.tail.strip(): elem.tail = i for elem in elem: indent(elem, level+1) if not elem.tail or not elem.tail.strip(): elem.tail = i else: if level and (not elem.tail or not elem.tail.strip()): elem.tail = i indent(o) ET.ElementTree(o).write(sys.argv[3]) ================================================ FILE: python/examples/read_way.py ================================================ import sys import osmx if len(sys.argv) <= 1: print("Usage: read_way.py OSMX_FILE WAY_ID") exit(1) env = osmx.Environment(sys.argv[1]) with osmx.Transaction(env) as txn: locations = osmx.Locations(txn) nodes = osmx.Nodes(txn) ways = osmx.Ways(txn) way_relation = osmx.WayRelation(txn) way_id = sys.argv[2] way = ways.get(way_id) for node_id in way.nodes: print(locations.get(node_id)) print(osmx.tag_dict(way.tags)) print(way.metadata) print(way_relation.get(way_id)) ================================================ FILE: python/examples/web_server.py ================================================ import json import sys from http.server import BaseHTTPRequestHandler, HTTPServer import osmx if len(sys.argv) <= 1: print("Usage: web_server.py OSMX_FILE") env = osmx.Environment(sys.argv[1]) # simple implementation of OSM GeoJSON API using osmx + Python standard library. # not production ready! class Handler(BaseHTTPRequestHandler): def do_GET(self): parts = self.path.split("/") if len(parts) < 3: self.send_response(400) self.wfile.write("bad request".encode('utf-8')) return self.send_response(200) self.send_header('Content-type','application/json') self.end_headers() osm_id = parts[2] resp = {'type':'Feature','properties':{}} with osmx.Transaction(env) as txn: locations = osmx.Locations(txn) def coord(node_id): loc = locations.get(node_id) return (loc[1],loc[0]) nodes = osmx.Nodes(txn) if parts[1] == "node": node = nodes.get(osm_id) if node: for k,v in osmx.tag_dict(node.tags).items(): resp['properties'][k] = v resp['geometry'] = {'type':'Point','coordinates':coord(osm_id)} elif parts[1] == "way": ways = osmx.Ways(txn) way = ways.get(osm_id) for k,v in osmx.tag_dict(way.tags).items(): resp['properties'][k] = v coords = [coord(node_id) for node_id in way.nodes] resp['geometry'] = {'type':'LineString','coordinates':coords} elif parts[1] == "relation": ways = osmx.Ways(txn) relations = osmx.Relations(txn) relation = relations.get(osm_id) for k,v in osmx.tag_dict(relation.tags).items(): resp['properties'][k] = v geometries = [] def add_relation_geoms(r): for member in r.members: if member.type == 'node': geometries.append({'type':'Point','coordinates':locations.get(member.ref)}) if member.type == 'way': way = ways.get(member.ref) coords = [coord(node_id) for node_id in way.nodes] geometries.append({'type':'LineString','coordinates':coords}) if member.type == 'relation': add_relation_geoms(relations.get(member.ref)) add_relation_geoms(relation) resp['geometry'] = {'type':'GeometryCollection','geometries':geometries} self.wfile.write(json.dumps(resp).encode('utf-8')) print('Server listening on port 8000...') httpd = HTTPServer(('', 8000), Handler) httpd.serve_forever() ================================================ FILE: python/osmx/__init__.py ================================================ from .osmx import * ================================================ FILE: python/osmx/messages.capnp ================================================ @0xd3a7e843a9c03421; struct Metadata { version @0 :UInt32; timestamp @1 :UInt64; changeset @2 :UInt32; uid @3 :UInt32; user @4 :Text; } struct Node { tags @0 :List(Text); metadata @1 :Metadata; } struct Way { nodes @0 :List(UInt64); tags @1 :List(Text); metadata @2 :Metadata; } struct RelationMember { ref @0 :UInt64; type @1 :Type; role @2 :Text; enum Type { node @0; way @1; relation @2; } } struct Relation { tags @0 :List(Text); members @1 :List(RelationMember); metadata @2 :Metadata; } ================================================ FILE: python/osmx/osmx.py ================================================ import sys import os import lmdb import capnp capnp.remove_import_hook() messages_capnp = capnp.load(os.path.join(os.path.dirname(__file__), 'messages.capnp')) def tag_dict(tag_list): it = enumerate(tag_list) d = {} for x in it: d[x[1]] = next(it)[1] return d class Environment: def __init__(self,fname): self._handle = lmdb.Environment(fname,max_dbs=10,readonly=True,readahead=False,subdir=False) class Transaction: def __init__(self,env): self.env = env self._handle = lmdb.Transaction(self.env._handle, buffers=True) def __enter__(self,*args,**kwargs): self._handle.__enter__(*args,**kwargs) return self def __exit__(self,*args,**kwargs): self._handle.__exit__(*args,**kwargs) class Index: def __init__(self): pass class Index: def __init__(self,txn,name): self.txn = txn self._handle = txn.env._handle.open_db(name,txn=txn._handle,integerkey=True,create=False,dupsort=True,integerdup=True,dupfixed=True) def get(self,obj_id): cursor = self.txn._handle.cursor(self._handle) cursor.set_key(int(obj_id).to_bytes(8,byteorder=sys.byteorder)) retval = [int.from_bytes(data,byteorder=sys.byteorder,signed=False) for data in cursor.iternext_dup()] cursor.close() return retval class Table: def __init__(self,txn,name): self.txn = txn self._handle = txn.env._handle.open_db(name,txn=txn._handle,integerkey=True,create=False) def _get_bytes(self,elem_id): return self.txn._handle.get(int(elem_id).to_bytes(8,byteorder=sys.byteorder),db=self._handle) class Locations(Table): def __init__(self,txn): super().__init__(txn,b'locations') def get(self,node_id): msg = self._get_bytes(node_id) if not msg: return None return ( int.from_bytes(msg[4:8],byteorder=sys.byteorder,signed=True) / 10000000, int.from_bytes(msg[0:4],byteorder=sys.byteorder,signed=True) / 10000000, int.from_bytes(msg[8:12],byteorder=sys.byteorder,signed=False) ) class Nodes(Table): def __init__(self,txn): super().__init__(txn,b'nodes') def get(self,node_id): msg = self._get_bytes(node_id) if not msg: return None return messages_capnp.Node.from_bytes(msg) class Ways(Table): def __init__(self,txn): super().__init__(txn,b'ways') def get(self,way_id): msg = self._get_bytes(way_id) if not msg: return None return messages_capnp.Way.from_bytes(msg) class Relations(Table): def __init__(self,txn): super().__init__(txn,b'relations') def get(self,relation_id): msg = self._get_bytes(relation_id) if not msg: return None return messages_capnp.Relation.from_bytes(msg) class NodeWay(Index): def __init__(self,txn): super().__init__(txn,b'node_way') class NodeRelation(Index): def __init__(self,txn): super().__init__(txn,b'node_relation') class WayRelation(Index): def __init__(self,txn): super().__init__(txn,b'way_relation') class RelationRelation(Index): def __init__(self,txn): super().__init__(txn,b'relation_relation') ================================================ FILE: python/setup.py ================================================ import setuptools with open("README.md", "r") as fh: long_description = fh.read() requirements = [ 'lmdb~=1.4.1', 'pycapnp~=2.0.0', ] setuptools.setup( name='osmx', version='0.0.5', author="Brandon Liu", author_email='brandon@protomaps.com', description='Read OSM Express (.osmx) database files.', license="BSD-2-Clause", long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/bdon/OSMExpress", packages=setuptools.find_packages(), classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", ], install_requires = requirements, requires_python='>=3.0', package_data={'osmx':['messages.capnp']} ) ================================================ FILE: src/cmd.cpp ================================================ #include #include "osmx/storage.h" #include "osmx/cmd.h" #include "osmx/util.h" using namespace std; using namespace osmx; void printHelp() { cout << "Usage: osmx COMMAND [ARG...]" << endl << endl; cout << "COMMANDS:" << endl; cout << " expand Convert an OSM PBF or XML to an osmx database." << endl; cout << " extract Create a regional extract PBF from an osmx database." << endl; cout << " update Apply an OSM changeset to an osmx database." << endl; cout << " query Look up objects by ID in an osmx database." << endl; exit(1); } void printQueryHelp() { cout << "USAGE:" << endl; cout << " osmx query OSMX_FILE [OPTIONS]" << endl << endl; cout << "EXAMPLES:" << endl; cout << " osmx query planet.osmx" << endl; cout << " osmx query planet.osmx way 123456" << endl << endl; cout << "OPTIONS:" << endl; cout << " none specified: print table statistics." << endl; cout << " [node,way,relation] ID: print OSM object" << endl; cout << " timestamp: print data timestamp" << endl; cout << " seqnum: print replication seqence number" << endl; exit(1); } int main(int argc, char* argv[]) { vector args(argv, argv+argc); auto db_cmds = {"stat","node","way","relation"}; if (argc < 2) { printHelp(); } if (args[1] == "expand") { cmdExpand(argc,argv); } else if (args[1] == "extract") { cmdExtract(argc,argv); } else if (args[1] == "update") { cmdUpdate(argc,argv); } else if (args[1] == "query") { if (args.size() == 2) { printQueryHelp(); } MDB_env* env = db::createEnv(args[2]); MDB_txn* txn; CHECK_LMDB(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn)); if (args.size() >= 4) { if (args[3] == "node") { auto id = stol(args[4]); auto location = db::Locations(txn).get(id); cout << location.coords << endl; auto tags = db::Elements(txn,"nodes").getReader(id).getRoot().getTags(); for (int i = 0; i < tags.size() / 2; i++) { cout << tags[i*2].cStr() << "=" << tags[i*2+1].cStr() << "\n"; } } else if (args[3] == "way") { db::Elements ways(txn,"ways"); auto message = ways.getReader(stol(args[4])); auto way = message.getRoot(); for (auto node_id : way.getNodes()) { cout << node_id << " "; } cout << endl; auto tags = way.getTags(); for (int i = 0; i < tags.size() / 2; i++) { cout << tags[i*2].cStr() << "=" << tags[i*2+1].cStr() << " "; } cout << endl; } else if (args[3] == "relation") { db::Elements relations(txn,"relations"); uint64_t relation_id = stol(args[4]); auto message = relations.getReader(relation_id); auto relation = message.getRoot(); auto tags = relation.getTags(); for (int i = 0; i < tags.size() / 2; i++) { cout << tags[i*2].cStr() << "=" << tags[i*2+1].cStr() << " "; } auto members = relation.getMembers(); for (auto const &member : members) { cout << member.getRef() << endl; } } else if (args[3] == "timestamp") { db::Metadata metadata(txn); cout << metadata.get("osmosis_replication_timestamp") << endl; } else if (args[3] == "seqnum") { db::Metadata metadata(txn); cout << metadata.get("osmosis_replication_sequence_number") << endl; } else { printQueryHelp(); } } else { auto tables = {"locations","nodes","ways","relations","cell_node","node_way","node_relation","way_relation","relation_relation"}; for (auto const &table : tables) { MDB_dbi dbi; CHECK_LMDB(mdb_dbi_open(txn, table, MDB_INTEGERKEY, &dbi)); MDB_stat stat; CHECK_LMDB(mdb_stat(txn,dbi,&stat)); cout << table << ": " << stat.ms_entries << endl; } db::Metadata metadata(txn); cout << "Timestamp: " << metadata.get("osmosis_replication_timestamp") << endl; cout << "Sequence #: " << metadata.get("osmosis_replication_sequence_number") << endl; } mdb_env_sync(env,true); mdb_env_close(env); } else { printHelp(); } } ================================================ FILE: src/expand.cpp ================================================ #include #include #include "osmium/handler.hpp" #include "osmium/visitor.hpp" #include "osmium/io/any_input.hpp" #include "osmium/util/progress_bar.hpp" #include "osmium/io/reader_with_progress_bar.hpp" #include "cxxopts.hpp" #include "kj/io.h" #include "capnp/message.h" #include "capnp/serialize.h" #include "s2/s2latlng.h" #include "s2/s2cell_id.h" #include "osmx/storage.h" #include "osmx/util.h" #include "osmx/messages.capnp.h" using namespace std; using namespace osmx; typedef std::pair Pair; typedef std::pair pqelem; class SortReader { public: SortReader(std::string filename) : mStream(filename, std::ios::in | std::ios::binary) { } bool getNext() { mStream.read((char *)&entry,sizeof(uint64_t) *2); if (mStream.eof()) return false; return true; } Pair entry; private: std::ifstream mStream; }; class Sorter { int MAX_RUN_SIZE = 64000000; // about 1 GB public: Sorter(std::string tempDir,std::string name) : mTempDir(tempDir), mName(name) { mStorage.reserve(MAX_RUN_SIZE); } void put(uint64_t from, uint64_t to) { mStorage.push_back(std::make_pair(from,to)); if (mStorage.size() > MAX_RUN_SIZE) persist(); } void put(S2CellId from, uint64_t to) { put(from.id(),to); } void persist() { if (mStorage.size() == 0) return; sort(mStorage.begin(),mStorage.end()); int runNumber = mSavedRuns.size(); std::ofstream stream; std::stringstream fname; fname << mTempDir << "/" << std::setw(2) << std::setfill('0') << mName << "_" << std::setw(3) << std::setfill('0') << runNumber << ".run"; stream.open(fname.str(),std::ios::binary); for (auto const &entry: mStorage) { stream.write((char *)&entry.first,sizeof(uint64_t)); stream.write((char *)&entry.second,sizeof(uint64_t)); } stream.close(); mStorage.clear(); mStorage.reserve(MAX_RUN_SIZE); mSavedRuns.push_back(fname.str()); } void writeDb(MDB_env *env) { persist(); Timer timer("External sort " + mName); osmium::ProgressBar progress{MAX_RUN_SIZE * mSavedRuns.size(), osmium::isatty(2)}; int read = 0; std::priority_queue, std::greater> q; std::vector readers; db::IndexWriter index(env,mName); for (int i = 0; i < mSavedRuns.size(); i++) { readers.emplace_back(mSavedRuns[i]); if (readers[i].getNext()) q.push(make_pair(readers[i].entry, i)); } Pair last; while (q.size() > 0) { pqelem pair = q.top(); auto idx = pair.second; if (pair.first != last) { if (pair.first.first != last.first) index.put(pair.first.first,pair.first.second,MDB_APPEND); else index.put(pair.first.first,pair.first.second,MDB_APPENDDUP); } q.pop(); if (readers[idx].getNext()) q.push(make_pair(readers[idx].entry, idx)); progress.update(read++); last = pair.first; } index.commit(); progress.done(); for (auto const &run : mSavedRuns) { remove(run.c_str()); } } private: Sorter( const Sorter& ) = delete; Sorter& operator=( const Sorter& ) = delete; std::vector> mStorage; int mRunNumber = 0; std::vector mSavedRuns; std::string mTempDir; std::string mName; }; class Handler: public osmium::handler::Handler { public: Handler(MDB_env *env, MDB_txn *txn,string tempDir) : mEnv(env), mTxn(txn), mCellNode(tempDir,"cell_node"), mLocations(txn), mNodes(txn,"nodes"), mWays(txn,"ways"), mRelations(txn,"relations"), mNodeWay(tempDir,"node_way"), mNodeRelation(tempDir,"node_relation"), mWayRelation(tempDir,"way_relation"), mRelationRelation(tempDir,"relation_relation") { } ~Handler() { CHECK_LMDB(mdb_txn_commit(mTxn)); mCellNode.writeDb(mEnv); mNodeWay.writeDb(mEnv); mNodeRelation.writeDb(mEnv); mWayRelation.writeDb(mEnv); mRelationRelation.writeDb(mEnv); } void node(const osmium::Node& node) { mLocations.put(node.id(), db::Location{node.location(),(int32_t)node.version()},MDB_APPEND); auto loc = node.location(); auto ll = S2LatLng::FromDegrees(loc.lat(),loc.lon()); auto cell = S2CellId(ll).parent(CELL_INDEX_LEVEL); mCellNode.put(cell,node.id()); if (node.tags().size() > 0) { ::capnp::MallocMessageBuilder message; Node::Builder nodeMsg = message.initRoot(); setTags(node.tags(),nodeMsg); auto metadata = nodeMsg.initMetadata(); metadata.setVersion(node.version()); metadata.setTimestamp(node.timestamp().seconds_since_epoch()); metadata.setChangeset(node.changeset()); metadata.setUid(node.uid()); metadata.setUser(node.user()); kj::VectorOutputStream output; capnp::writeMessage(output,message); mNodes.put(node.id(),output,MDB_APPEND); } } void way(const osmium::Way& way) { auto const &nodes = way.nodes(); ::capnp::MallocMessageBuilder message; Way::Builder wayMsg = message.initRoot(); wayMsg.initNodes(nodes.size()); int i = 0; for (int i = 0; i < nodes.size(); i++) { wayMsg.getNodes().set(i,nodes[i].ref()); mNodeWay.put(nodes[i].ref(),way.id()); } setTags(way.tags(),wayMsg); auto metadata = wayMsg.initMetadata(); metadata.setVersion(way.version()); metadata.setTimestamp(way.timestamp().seconds_since_epoch()); metadata.setChangeset(way.changeset()); metadata.setUid(way.uid()); metadata.setUser(way.user()); kj::VectorOutputStream output; capnp::writeMessage(output,message); mWays.put(way.id(),output,MDB_APPEND); } void relation(const osmium::Relation& relation) { ::capnp::MallocMessageBuilder message; Relation::Builder relationMsg = message.initRoot(); setTags(relation.tags(),relationMsg); auto members = relationMsg.initMembers(relation.members().size()); int i = 0; for (auto const &member : relation.members()) { members[i].setRef(member.ref()); members[i].setRole(member.role()); if (member.type() == osmium::item_type::node) { members[i].setType(RelationMember::Type::NODE); mNodeRelation.put(member.ref(),relation.id()); } else if (member.type() == osmium::item_type::way) { members[i].setType(RelationMember::Type::WAY); mWayRelation.put(member.ref(),relation.id()); } else if (member.type() == osmium::item_type::relation) { members[i].setType(RelationMember::Type::RELATION); mRelationRelation.put(member.ref(),relation.id()); } i++; } auto metadata = relationMsg.initMetadata(); metadata.setVersion(relation.version()); metadata.setTimestamp(relation.timestamp().seconds_since_epoch()); metadata.setChangeset(relation.changeset()); metadata.setUid(relation.uid()); metadata.setUser(relation.user()); kj::VectorOutputStream output; capnp::writeMessage(output,message); mRelations.put(relation.id(),output,MDB_APPEND); } private: MDB_env* mEnv; MDB_txn* mTxn; Sorter mCellNode; db::Locations mLocations; db::Elements mNodes; db::Elements mWays; db::Elements mRelations; Sorter mNodeWay; Sorter mNodeRelation; Sorter mWayRelation; Sorter mRelationRelation; }; void cmdExpand(int argc, char* argv[]) { cxxopts::Options options("Expand", "Expand a a .osm.pbf into an .osmx file"); options.add_options() ("v,verbose", "Verbose output") ("cmd", "Command to run", cxxopts::value()) ("input", "Input .pbf", cxxopts::value()) ("output", "Output .osmx", cxxopts::value()) ; options.parse_positional({"cmd","input", "output"}); auto result = options.parse(argc, argv); if (result.count("input") == 0 || result.count("output") == 0) { cout << "Usage: osmx expand OSM_FILE OSMX_FILE [OPTIONS]" << endl << endl; cout << "OSM_FILE must be an OSM XML or PBF." << endl << endl; cout << "EXAMPLE:" << endl; cout << " osmx expand planet_latest.osm.pbf planet.osmx" << endl << endl; cout << "OPTIONS:" << endl; cout << " --v,--verbose: verbose output." << endl; exit(1); } string input =result["input"].as(); string output = result["output"].as(); Timer timer("convert"); MDB_env* env = db::createEnv(output,true); MDB_txn* txn; CHECK_LMDB(mdb_txn_begin(env, NULL, 0, &txn)); const osmium::io::File input_file{input}; osmium::io::ReaderWithProgressBar reader{true, input_file, osmium::osm_entity_bits::object}; db::Metadata metadata(txn); auto header = reader.header(); for (auto option : header) { cout << option.first << " " << option.second << endl; } cout << "Box: " << header.box() << endl; cout << "Timestamp: " << header.get("osmosis_replication_timestamp") << endl; cout << "Sequence#: " << header.get("osmosis_replication_sequence_number") << endl; metadata.put("osmosis_replication_timestamp",header.get("osmosis_replication_timestamp")); metadata.put("osmosis_replication_sequence_number",header.get("osmosis_replication_sequence_number")); metadata.put("import_filename",input); string tempDir = output + "-temp"; assert(mkdir(tempDir.c_str(),S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) == 0); { Timer insert("insert"); Handler handler(env,txn,tempDir); osmium::apply(reader, handler); } assert(rmdir(tempDir.c_str()) == 0); } ================================================ FILE: src/extract.cpp ================================================ #include #include #include "s2/s2latlng.h" #include "s2/s2region_coverer.h" #include "s2/s2latlng_rect.h" #include "osmium/io/any_output.hpp" #include "osmium/util/progress_bar.hpp" #include "osmium/memory/callback_buffer.hpp" #include "osmium/builder/attr.hpp" #include "osmium/builder/osm_object_builder.hpp" #include "cxxopts.hpp" #include "nlohmann/json.hpp" #include "osmx/storage.h" #include "osmx/region.h" #include "osmx/util.h" using namespace std; using namespace osmx; struct ExportProgress { string timestamp = ""; uint64_t cells_total = 0; uint64_t cells_prog = 0; uint64_t nodes_total = 0; uint64_t nodes_prog = 0; uint64_t elems_total = 0; uint64_t elems_prog = 0; void print() { cout << "{\"Timestamp\":\"" << timestamp << "\",\"CellsTotal\":" << cells_total << ",\"CellsProg\":" << cells_prog << ",\"NodesTotal\":" << nodes_total << ",\"NodesProg\":" << nodes_prog << ",\"ElemsTotal\":" << elems_total << ",\"ElemsProg\":" << elems_prog << "}" << endl; } }; class ProgressSection { public: ProgressSection(ExportProgress &expprog, uint64_t &total, uint64_t &prog, uint64_t total_to_set, bool jsonOutput) : expprog(expprog), total(total), prog(prog), progressbar(total_to_set, osmium::isatty(2) && !jsonOutput), jsonOutput(jsonOutput) { total = total_to_set; } ~ProgressSection() { progressbar.done(); } void tick() { prog++; if (prog - last_prog > (total / 100)) { if (jsonOutput) expprog.print(); else progressbar.update(prog); last_prog = prog; } } private: osmium::ProgressBar progressbar; uint64_t& prog; uint64_t& total; bool jsonOutput; ExportProgress &expprog; uint64_t last_prog = 0; }; static bool endsWith(const std::string& str, const std::string& suffix) { return str.size() >= suffix.size() && 0 == str.compare(str.size()-suffix.size(), suffix.size(), suffix); } // must be --bbox, --disc, --poly or --json // or --region void cmdExtract(int argc, char * argv[]) { cxxopts::Options cmd_options("Extract", "Create an .osm.pbf from an .osmx file."); cmd_options.add_options() ("v,verbose", "Verbose output") ("noUserData", "Don't include changeset,uid,user fields (GDPR compliance)") ("jsonOutput", "JSON progress output") ("cmd", "Command to run", cxxopts::value()) ("osmx", "Input .osmx", cxxopts::value()) ("output", "Output file, pbf or xml", cxxopts::value()) ("bbox", "rectangle in minLat,minLon,maxLat,maxLon", cxxopts::value()) ("disc", "disc in centerLat,centerLon,radiusDegrees", cxxopts::value()) ("geojson","geoJson of region", cxxopts::value()) ("poly","osmosis .poly of region", cxxopts::value()) ("region","file for region with extension .bbox, .disc, .json or .poly", cxxopts::value()) ("expand","buffer at this cell level",cxxopts::value()) ; cmd_options.parse_positional({"cmd","osmx","output"}); auto result = cmd_options.parse(argc, argv); if (result.count("osmx") == 0 || result.count("output") == 0) { cout << "Usage: osmx extract OSMX_FILE OUTPUT_FILE [OPTIONS]" << endl << endl; cout << "EXAMPLE:" << endl; cout << " osmx extract planet.osmx extract.osm.pbf --region region.json" << endl << endl; cout << "OPTIONS:" << endl; cout << " --v,--verbose: verbose output." << endl; cout << " --jsonOutput: log progress as JSON messages." << endl; cout << " --bbox MIN_LAT,MIN_LON,MAX_LAT,MAX_LON: region is lat/lon bbox" << endl; cout << " --disc CENTER_LAT,CENTER_LON,R_DEGREES: region is disc" << endl; cout << " --geojson GEOJSON: region is an areal GeoJSON feature or geometry" << endl; cout << " --poly POLY: region is an Osmosis polygon" << endl; cout << " --region FILE: text file with .bbox, .disc, .json or .poly extension" << endl; cout << " --expand CELL_LEVEL: buffer region with cells at this level, <= 16" << endl; exit(1); } auto startTime = std::chrono::high_resolution_clock::now(); ExportProgress prog; string err; bool jsonOutput = result.count("jsonOutput") > 0; if (jsonOutput) prog.print(); bool includeUserData = result.count("noUserData") == 0; std::unique_ptr region; if (result.count("bbox")) region = std::make_unique(result["bbox"].as(),"bbox"); else if (result.count("disc")) region = std::make_unique(result["disc"].as(),"disc"); else if (result.count("geojson")) region = std::make_unique(result["geojson"].as(),"geojson"); else if (result.count("poly")) region = std::make_unique(result["poly"].as(),"poly"); else if (result.count("region")) { auto fname = result["region"].as(); std::ifstream t(fname); std::stringstream buffer; buffer << t.rdbuf(); if (endsWith(fname,"bbox")) region = std::make_unique(buffer.str(),"bbox"); if (endsWith(fname,"disc")) region = std::make_unique(buffer.str(),"disc"); if (endsWith(fname,"json")) region = std::make_unique(buffer.str(),"geojson"); if (endsWith(fname,"poly")) region = std::make_unique(buffer.str(),"poly"); } else { cout << "No region specified." << endl; exit(0); } S2RegionCoverer::Options options; options.set_max_cells(1024); options.set_max_level(CELL_INDEX_LEVEL); S2RegionCoverer coverer(options); S2CellUnion covering = region->GetCovering(coverer); if (result.count("expand")) { int expand = result["expand"].as(); if (expand >= 0 && expand <= 16) { covering.Expand(expand); } } if (!jsonOutput) { cout << "Query cells: " << covering.cell_ids().size() << endl; } roaring::Roaring64Map node_ids; roaring::Roaring64Map way_ids; roaring::Roaring64Map relation_ids; MDB_env* env = db::createEnv(result["osmx"].as(),false); MDB_txn* txn; CHECK_LMDB(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn)); db::Metadata metadata(txn); auto timestamp = metadata.get("osmosis_replication_timestamp"); prog.timestamp = timestamp; if (!jsonOutput) { cout << "Snapshot timestamp is " << prog.timestamp << endl; } { ProgressSection section(prog,prog.cells_total,prog.cells_prog,covering.size(),jsonOutput); MDB_dbi dbi; MDB_cursor *cursor; CHECK_LMDB(mdb_dbi_open(txn, "cell_node", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi)); CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor)); for (auto cell_id : covering.cell_ids()) { db::traverseCell(cursor,cell_id,node_ids); section.tick(); } mdb_cursor_close(cursor); } { ProgressSection section(prog,prog.nodes_total,prog.nodes_prog,node_ids.cardinality(),jsonOutput); MDB_dbi dbi; MDB_cursor *cursor; CHECK_LMDB(mdb_dbi_open(txn, "node_way", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi)); CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor)); for (auto const &node_id : node_ids) { db::traverseReverse(cursor,node_id,way_ids); section.tick(); } } // find all Relations that these nodes or Ways are a member of. { MDB_dbi dbi; MDB_cursor *cursor; CHECK_LMDB(mdb_dbi_open(txn, "node_relation", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi)); CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor)); for (auto const &node_id : node_ids) { db::traverseReverse(cursor,node_id,relation_ids); } } { MDB_dbi dbi; MDB_cursor *cursor; CHECK_LMDB(mdb_dbi_open(txn, "way_relation", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi)); CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor)); for (auto const &way_id : way_ids) { db::traverseReverse(cursor,way_id,relation_ids); } } { MDB_dbi dbi; MDB_cursor *cursor; CHECK_LMDB(mdb_dbi_open(txn, "relation_relation", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi)); CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor)); roaring::Roaring64Map discovered_relations; roaring::Roaring64Map discovered_relations_2; for (auto const &relation_id : relation_ids) { db::traverseReverse(cursor,relation_id,discovered_relations); } relation_ids |= discovered_relations; while(true) { for (auto const &relation_id : discovered_relations) { db::traverseReverse(cursor,relation_id,discovered_relations_2); } int num_discovered = 0; for (auto discovered_relation_id : discovered_relations_2) { if (relation_ids.addChecked(discovered_relation_id)) num_discovered++; } if (num_discovered == 0) break; discovered_relations = discovered_relations_2; discovered_relations_2.clear(); } } if (!jsonOutput) cout << "Relations: " << relation_ids.cardinality() << endl; db::Elements ways(txn,"ways"); db::Elements relations(txn,"relations"); // make it Multipolygon-complete: go through all Relations, finding any that have tag type=multipolygon, and add to Ways for (auto relation_id : relation_ids) { auto reader = relations.getReader(relation_id); Relation::Reader relation = reader.getRoot(); auto tags = relation.getTags(); for (int i = 0; i < tags.size() / 2; i++) { if (tags[i*2] == "type" && tags[i*2+1] == "multipolygon") { for (auto const &member : relation.getMembers()) { if (member.getType() == RelationMember::Type::WAY) { auto ref = member.getRef(); // check if the way exists, because this may be an extract if (ways.exists(ref)) way_ids.add(member.getRef()); } } } } } if (!jsonOutput) cout << "Ways: " << way_ids.cardinality() << endl; // make it Way-complete: go through all Ways and add in any missing Nodes. { for (auto way_id : way_ids) { auto reader = ways.getReader(way_id); Way::Reader way = reader.getRoot(); for (auto node_id : way.getNodes()) { node_ids.add(node_id); } } } if (!jsonOutput) cout << "Nodes: " << node_ids.cardinality() << endl; // start Write osmium::io::Header header; header.set("generator", "osmx"); header.set("timestamp", timestamp); header.set("osmosis_replication_timestamp", timestamp); auto bounds = region->GetBounds(); // the box header is used by some applications, // for example: zooming to an overview in QGIS. // however, osmium only supports writing one PBF box header and it must be in the -180 to 180 lng, -90 to 90 lat range. // valid input regions can cross the antimeridian, but the output header box is omitted as it can't represent the input. if (bounds.lng_lo().degrees() < bounds.lng_hi().degrees()) { header.add_box(osmium::Box(bounds.lng_lo().degrees(),bounds.lat_lo().degrees(),bounds.lng_hi().degrees(),bounds.lat_hi().degrees())); } osmium::io::Writer writer{result["output"].as(), header, osmium::io::overwrite::allow}; osmium::memory::CallbackBuffer cb; cb.set_callback([&](osmium::memory::Buffer&& buffer) { writer(std::move(buffer)); }); { ProgressSection section(prog,prog.elems_total,prog.elems_prog,node_ids.cardinality() + way_ids.cardinality() + relation_ids.cardinality(),jsonOutput); { db::Locations location_index(txn); db::Elements nodes_table(txn,"nodes"); for (auto node_id : node_ids) { section.tick(); auto loc = location_index.get(node_id); if (loc.is_undefined()) continue; { using namespace osmium::builder::attr; osmium::builder::NodeBuilder node_builder{cb.buffer()}; node_builder.set_id(node_id); node_builder.set_location(loc.coords); node_builder.set_version(loc.version); if (!nodes_table.exists(node_id)) continue; auto reader = nodes_table.getReader(node_id); Node::Reader node = reader.getRoot(); auto metadata = node.getMetadata(); node_builder.set_timestamp(metadata.getTimestamp()); if (includeUserData) { node_builder.set_changeset(metadata.getChangeset()); node_builder.set_user(metadata.getUser()); node_builder.set_uid(metadata.getUid()); } auto tags = node.getTags(); osmium::builder::TagListBuilder tag_builder{node_builder}; for (int i = 0; i < tags.size() / 2; i++) { tag_builder.add_tag(tags[i*2],tags[i*2+1]); } } cb.buffer().commit(); cb.possibly_flush(); } } // Writing ways pass { for (auto way_id : way_ids) { section.tick(); auto reader = ways.getReader(way_id); Way::Reader way = reader.getRoot(); { using namespace osmium::builder::attr; osmium::builder::WayBuilder way_builder{cb.buffer()}; way_builder.set_id(way_id); auto metadata = way.getMetadata(); way_builder.set_version(metadata.getVersion()); way_builder.set_timestamp(metadata.getTimestamp()); if (includeUserData) { way_builder.set_changeset(metadata.getChangeset()); way_builder.set_user(metadata.getUser()); way_builder.set_uid(metadata.getUid()); } { osmium::builder::WayNodeListBuilder way_node_list_builder{way_builder}; for (auto node_id : way.getNodes()) { way_node_list_builder.add_node_ref(node_id); } } auto tags = way.getTags(); osmium::builder::TagListBuilder tag_builder{way_builder}; for (int i = 0; i < tags.size() / 2; i++) { tag_builder.add_tag(tags[i*2],tags[i*2+1]); } } cb.buffer().commit(); cb.possibly_flush(); } } { for (auto relation_id : relation_ids) { section.tick(); auto reader = relations.getReader(relation_id); Relation::Reader relation = reader.getRoot(); { using namespace osmium::builder::attr; osmium::builder::RelationBuilder relation_builder{cb.buffer()}; relation_builder.set_id(relation_id); auto metadata = relation.getMetadata(); relation_builder.set_version(metadata.getVersion()); relation_builder.set_timestamp(metadata.getTimestamp()); if (includeUserData) { relation_builder.set_changeset(metadata.getChangeset()); relation_builder.set_user(metadata.getUser()); relation_builder.set_uid(metadata.getUid()); } { osmium::builder::RelationMemberListBuilder relation_member_list_builder{relation_builder}; for (auto const &member : relation.getMembers()) { if (member.getType() == RelationMember::Type::NODE) { relation_member_list_builder.add_member(osmium::item_type::node,member.getRef(),member.getRole()); } else if (member.getType() == RelationMember::Type::WAY) { relation_member_list_builder.add_member(osmium::item_type::way,member.getRef(),member.getRole()); } else { relation_member_list_builder.add_member(osmium::item_type::relation,member.getRef(),member.getRole()); } } } auto tags = relation.getTags(); osmium::builder::TagListBuilder tag_builder{relation_builder}; for (int i = 0; i < tags.size() / 2; i++) { tag_builder.add_tag(tags[i*2],tags[i*2+1]); } } cb.buffer().commit(); cb.possibly_flush(); } } } cb.flush(); writer.close(); mdb_env_close(env); auto duration = std::chrono::duration_cast( std::chrono::high_resolution_clock::now() - startTime ).count(); if (!jsonOutput) cout << "Finished export in " << duration/1000.0 << " seconds." << endl; } ================================================ FILE: src/region.cpp ================================================ #include #include #include "s2/s2latlng.h" #include "s2/s2latlng_rect.h" #include "s2/s2cap.h" #include "s2/s2polygon.h" #include "s2/s2loop.h" #include "osmx/region.h" static inline void rtrim(std::string &s) { s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end()); } std::unique_ptr S2PolyFromCoordinates(nlohmann::json &coordinates) { std::vector> loopRegions; for (auto loop : coordinates) { std::vector points; // ignore the last repeated point for (int i = 0; i < loop.size() - 1; i++) { double lon = loop[i][0].get(); double lat = loop[i][1].get(); points.push_back(S2LatLng::FromDegrees(lat,lon).Normalized().ToPoint()); } auto loopRegion = std::make_unique(points); loopRegion->Normalize(); loopRegions.push_back(std::move(loopRegion)); }; return std::make_unique(std::move(loopRegions)); } void Region::AddS2RegionFromGeometry(nlohmann::json &geometry) { if (geometry["type"] == "Polygon") { auto p = S2PolyFromCoordinates(geometry["coordinates"]); mRegions.push_back(std::move(p)); } else if (geometry["type"] == "MultiPolygon") { for (auto polygon : geometry["coordinates"]) { auto p = S2PolyFromCoordinates(polygon); mRegions.push_back(std::move(p)); } } } void Region::AddS2RegionFromPolyFile(std::istringstream &file) { std::vector points; std::string line; while (std::getline(file, line)) { rtrim(line); double lat, lon; // END of polygon if (line == "END") { break; } else { std::istringstream iss(line); iss >> lon; iss >> lat; points.push_back(S2LatLng::FromDegrees(lat,lon).Normalized().ToPoint()); } } if (points[0] == points[points.size() - 1]) points.pop_back(); auto loop = std::make_unique(points); loop->Normalize(); mRegions.push_back(std::move(loop)); } Region::Region(const std::string &text, const std::string &ext) { if (ext == "bbox") { double minLat,minLon,maxLat,maxLon; std::sscanf(text.c_str(), "%lf,%lf,%lf,%lf",&minLat,&minLon,&maxLat,&maxLon); auto lo = S2LatLng::FromDegrees(minLat,minLon).Normalized(); auto hi = S2LatLng::FromDegrees(maxLat,maxLon).Normalized(); mRegions.push_back(std::make_unique(lo,hi)); } else if (ext == "disc") { double lat,lon,radius; std::sscanf(text.c_str(), "%lf,%lf,%lf",&lat,&lon,&radius); auto center = S2LatLng::FromDegrees(lat,lon).Normalized(); auto angle = S1Angle::Degrees(radius); mRegions.push_back(std::make_unique(center.ToPoint(),angle)); } else if (ext == "poly") { std::istringstream f(text); std::string line; // discard the first line std::getline(f,line); // this will either parse name of next polygon // or END at end of file while (std::getline(f, line)) { // END of file if (line == "END") { break; } AddS2RegionFromPolyFile(f); } } else if (ext == "geojson") { auto json = nlohmann::json::parse(text); if (json["type"] == "Polygon" || json["type"] == "MultiPolygon") { AddS2RegionFromGeometry(json); } else if (json["type"] == "GeometryCollection") { for (auto geometry : json) { AddS2RegionFromGeometry(json); } } else if (json["type"] == "Feature") { AddS2RegionFromGeometry(json["geometry"]); } else if (json["type"] == "FeatureCollection") { for (auto feature : json["features"]) { AddS2RegionFromGeometry(feature["geometry"]); } } } else { std::cerr << "Unknown ext" << std::endl; assert(false); } } bool Region::Contains(S2Point p) { for (auto const ®ion : mRegions) { if (region->Contains(p)) return true; } return false; } S2CellUnion Region::GetCovering(S2RegionCoverer &coverer) { S2CellUnion retval; for (auto const ®ion : mRegions) { retval = retval.Union(coverer.GetCovering(*region)); } return retval; } S2LatLngRect Region::GetBounds() { auto const &firstRegion = mRegions[0]; auto lat_min = firstRegion->GetRectBound().lat_lo(); auto lat_max = firstRegion->GetRectBound().lat_hi(); auto lng_min = firstRegion->GetRectBound().lng_lo(); auto lng_max = firstRegion->GetRectBound().lng_hi(); for (size_t i = 1; i < mRegions.size(); i++) { auto const &r = mRegions[i]; auto lat_lo = r->GetRectBound().lat_lo(); auto lat_hi = r->GetRectBound().lat_hi(); auto lng_lo = r->GetRectBound().lng_lo(); auto lng_hi = r->GetRectBound().lng_hi(); if (lat_lo < lat_min) lat_min = lat_lo; if (lat_hi > lat_max) lat_max = lat_hi; if (lng_lo < lng_min) lng_min = lng_lo; if (lng_hi > lng_max) lng_max = lng_hi; } return S2LatLngRect(S2LatLng(lat_min,lng_min),S2LatLng(lat_max,lng_max)); } ================================================ FILE: src/storage.cpp ================================================ #include "osmx/storage.h" #include "osmx/util.h" namespace osmx { namespace db { MDB_env *createEnv(std::string path, bool writable) { MDB_env* env; CHECK_LMDB(mdb_env_create(&env)); // the maximum size of any LMDB dataset. // 2TB is a safe number for just OSM data as of 02/2023 // only affects the size of virtual memory, not real memory. mdb_env_set_mapsize(env,2UL * 1024UL * 1024UL * 1024UL * 1024UL); mdb_env_set_maxdbs(env,10); int flags = 0; if (!writable) flags |= MDB_RDONLY; CHECK_LMDB(mdb_env_open(env, path.c_str(),MDB_NOSUBDIR | MDB_NORDAHEAD | MDB_NOSYNC | flags, 0664)); return env; } Metadata::Metadata(MDB_txn *txn) : mTxn(txn) { CHECK_LMDB(mdb_dbi_open(mTxn, "metadata", MDB_CREATE, &mDbi)); } void Metadata::put(const std::string &key_str, const std::string &value_str) { MDB_val key, data; key.mv_size = key_str.size(); key.mv_data = (void *)key_str.data(); data.mv_size = value_str.size(); data.mv_data = (void *)value_str.data(); CHECK_LMDB(mdb_put(mTxn,mDbi, &key, &data, 0)); } std::string Metadata::get(const std::string &key_str) { MDB_val key, data; key.mv_size = key_str.size(); key.mv_data = (void *)key_str.data(); auto retval = mdb_get(mTxn,mDbi, &key, &data); if (retval == 0) return std::string((const char *)data.mv_data,data.mv_size); else return ""; } Elements::Elements(MDB_txn *txn, const std::string &name) : mTxn(txn) { CHECK_LMDB(mdb_dbi_open(txn, name.c_str(), MDB_INTEGERKEY | MDB_CREATE, &mDbi)); } void Elements::put(uint64_t id, kj::VectorOutputStream &vos, int flags) { MDB_val key, data; key.mv_size = sizeof(uint64_t); key.mv_data = (void *)&id; data.mv_size = vos.getArray().size(); data.mv_data = (void *)vos.getArray().begin(); CHECK_LMDB(mdb_put(mTxn, mDbi, &key, &data, flags)); } void Elements::del(uint64_t id) { MDB_val key, data; key.mv_size = sizeof(uint64_t); key.mv_data = (void *)&id; mdb_del(mTxn, mDbi, &key, &data); } bool Elements::exists(uint64_t id) { MDB_val key, data; key.mv_size = sizeof(uint64_t); key.mv_data = (void *)&id; return mdb_get(mTxn,mDbi,&key,&data) == 0; } capnp::FlatArrayMessageReader Elements::getReader(uint64_t id) { MDB_val key, data; key.mv_size = sizeof(uint64_t); key.mv_data = (void *)&id; CHECK_LMDB(mdb_get(mTxn,mDbi,&key,&data)); auto arr = kj::ArrayPtr((const capnp::word *)data.mv_data,data.mv_size); return capnp::FlatArrayMessageReader(arr); } Locations::Locations(MDB_txn *txn) : mTxn(txn) { CHECK_LMDB(mdb_dbi_open(mTxn, "locations", MDB_INTEGERKEY | MDB_CREATE, &mDbi)); } void Locations::put(uint64_t id, const Location value, int flags) { MDB_val key, data; key.mv_size = sizeof(uint64_t); key.mv_data = (void *)&id; int32_t buf[3]; buf[0] = value.coords.x(); buf[1] = value.coords.y(); buf[2] = value.version; data.mv_size = sizeof(uint32_t) * 3; data.mv_data = (void *)&buf; CHECK_LMDB(mdb_put(mTxn, mDbi, &key, &data, flags)); } void Locations::del(uint64_t id) { MDB_val key, data; key.mv_size = sizeof(uint64_t); key.mv_data = (void *)&id; mdb_del(mTxn,mDbi,&key,&data); } Location Locations::get(uint64_t id) const { MDB_val key, data; key.mv_size = sizeof(uint64_t); key.mv_data = (void *)&id; int retval = mdb_get(mTxn, mDbi, &key, &data); if (retval == MDB_NOTFOUND) return Location{}; CHECK_LMDB(retval); int32_t *buf = (int32_t *)data.mv_data; return Location{osmium::Location(buf[0],buf[1]),buf[2]}; } bool Locations::exists(uint64_t id) { MDB_val key, data; key.mv_size = sizeof(uint64_t); key.mv_data = (void *)&id; int retval = mdb_get(mTxn, mDbi, &key, &data); return retval != MDB_NOTFOUND; } Index::Index(MDB_txn *txn, const std::string &name) : mTxn(txn) { CHECK_LMDB(mdb_dbi_open(txn, name.c_str(), MDB_INTEGERKEY | MDB_CREATE | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &mDbi)); } void Index::put(uint64_t from, uint64_t osm_id, int flags) { MDB_val key, data; key.mv_size = sizeof(uint64_t); key.mv_data = (void *)&from; data.mv_size = sizeof(uint64_t); data.mv_data = (void *)&osm_id; CHECK_LMDB(mdb_put(mTxn,mDbi,&key,&data,flags)); } void Index::del(uint64_t from, uint64_t osm_id ) { MDB_val key, data; key.mv_size = sizeof(uint64_t); key.mv_data = (void *)&from; data.mv_size = sizeof(uint64_t); data.mv_data = (void *)&osm_id; mdb_del(mTxn,mDbi,&key,&data); } IndexWriter::IndexWriter(MDB_env *env, const std::string &name) : mEnv(env), mName(name) { CHECK_LMDB(mdb_txn_begin(env, NULL, 0, &mTxn)); CHECK_LMDB(mdb_dbi_open(mTxn, name.c_str(), MDB_INTEGERKEY | MDB_CREATE | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &mDbi)); } void IndexWriter::put(uint64_t from, uint64_t osm_id, int flags) { MDB_val key, data; key.mv_size = sizeof(uint64_t); key.mv_data = (void *)&from; data.mv_size = sizeof(uint64_t); data.mv_data = (void *)&osm_id; CHECK_LMDB(mdb_put(mTxn,mDbi,&key,&data,flags)); if (mWrites++ == 8000000) { CHECK_LMDB(mdb_txn_commit(mTxn)); CHECK_LMDB(mdb_txn_begin(mEnv, NULL, 0, &mTxn)); CHECK_LMDB(mdb_dbi_open(mTxn, mName.c_str(), MDB_INTEGERKEY | MDB_CREATE | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &mDbi)); mWrites = 0; } } void IndexWriter::commit() { CHECK_LMDB(mdb_txn_commit(mTxn)); } void traverseCell(MDB_cursor *cursor, S2CellId cell_id, roaring::Roaring64Map &set) { S2CellId start = cell_id.child_begin(CELL_INDEX_LEVEL); S2CellId end = cell_id.child_end(CELL_INDEX_LEVEL); MDB_val key, data; key.mv_size = sizeof(S2CellId); key.mv_data = (void *)&start; // reading past end of db if (mdb_cursor_get(cursor,&key,&data,MDB_SET_RANGE) != 0) return; while (*((S2CellId *)key.mv_data) < end) { int retval_values = mdb_cursor_get(cursor,&key,&data,MDB_GET_MULTIPLE); while (0 == retval_values) { for (int i = 0; i < data.mv_size/sizeof(uint64_t); i++) { uint64_t *d = (uint64_t*)data.mv_data; set.add(*(d+i)); } retval_values = mdb_cursor_get(cursor,&key,&data,MDB_NEXT_MULTIPLE); } // reached end of db if (mdb_cursor_get(cursor,&key,&data,MDB_NEXT_NODUP) != 0) return; } } void traverseReverse(MDB_cursor *cursor,uint64_t from, roaring::Roaring64Map &set) { MDB_val key, data; key.mv_size = sizeof(uint64_t); key.mv_data = (void *)&from; if (mdb_cursor_get(cursor,&key,&data,MDB_SET) != 0) return; int retval_values = mdb_cursor_get(cursor,&key,&data,MDB_GET_MULTIPLE); while (0 == retval_values) { for (int i = 0; i < data.mv_size/sizeof(uint64_t); i++) { uint64_t *d = (uint64_t*)data.mv_data; uint64_t to_id = *(d+i); set.add(to_id); } retval_values = mdb_cursor_get(cursor,&key,&data,MDB_NEXT_MULTIPLE); } } }} ================================================ FILE: src/update.cpp ================================================ #include #include #include #include "cxxopts.hpp" #include "osmium/handler.hpp" #include "osmium/io/any_input.hpp" #include "osmium/visitor.hpp" #include "osmium/util/progress_bar.hpp" #include "roaring/roaring.hh" // Historically, OSMExpress had vendored its dependencies, but we move away // from this. At the moment, the S2 geometry library is the last remaining // dependency still being vendored. Our (rather ancient) bundled version // of S2 tests for a number of implementations of the Standard C library. // If S2 happens to recognize the library, it includes // (unless it is being compiled by a Microsoft or Apple compiler); // otherwise, S2 falls back to its own byteswap implementation. // What S2 does (or rather did, in the old version we happen // to bundle) isn't so great; it would have been better for S2 to use // a standards-conforming way of byteswapping, test for its presence, // and only use the fallback if that test fails. But that's how it is. // // Anyhow, the (equally ancient) version of CRoaring, another library // that we previously vendored into OSMExpress, was polluting the // C macro namespace in a way that made our bundled version of S2 // believe to be on a C library it knew about. Therefore, at the time // when OSMExpress still vendored that old version of CRoaring, the // bundled version of S2 would always include instead // of (re-)defining it, even if S2 did not recognize the C library. // // As we upgraded CRoaring to a newer version, which does not pollute // the C macro namespace anymore, the C preprocessor would now execute // the fallback path in the S2 headers when compiling with a Standard C // library that our old version of S2 does not recognize. This caused // compilation errors on Alpine Linux, which uses musl, a very lightweight // but fully standards-conforming implementation of the Standard C library. // // The following hack prevents our vendored old version of S2 from // supplying its own byteswap functions. On Bionic (and also other // modern libc implementations, including musl), it is sufficient to // include . Note we cannot explicitly test for musl here, // because musl does not define a __MUSL__ macro. (They don't want to, // since such a macro would not be standards-conforming; whether it's // really helpful to be so puristic has been the subject of much debate). // // TODO: Remove this hack once we stop vendoring the S2 geometry library. // https://github.com/bdon/OSMExpress/issues/20 #if !defined(_MSC_VER) && !defined(__APPLE__) && !defined(__GLIBC__) \ && !defined(__BIONIC__) && !defined(__ASYLO__) #define __BIONIC__ 1 #endif #include "s2/s2latlng.h" #include "s2/s2cell_union.h" #include "osmx/storage.h" #include "osmx/util.h" using namespace std; using namespace osmx; class DataUpdate : public osmium::handler::Handler { public: DataUpdate(MDB_txn *txn) : mTxn(txn), mLocations(txn), mNodes(txn,"nodes"), mWays(txn,"ways"), mRelations(txn,"relations"), mCellNode(txn,"cell_node"), mNodeWay(txn,"node_way"), mNodeRelation(txn,"node_relation"), mWayRelation(txn,"way_relation"), mRelationRelation(txn, "relation_relation") { } // update location, node, cell_location tables void node(const osmium::Node& node) { uint64_t id = node.id(); db::Location prev_location = mLocations.get(id); db::Location new_location = db::Location{node.location(),(int32_t)node.version()}; uint64_t prev_cell; if (prev_location.is_defined()) prev_cell = S2CellId(S2LatLng::FromDegrees(prev_location.coords.lat(),prev_location.coords.lon())).parent(CELL_INDEX_LEVEL).id(); if (!node.visible()) { mLocations.del(id); mNodes.del(id); mCellNode.del(prev_cell,id); return; } else { mLocations.put(id,new_location); if (node.tags().size() > 0) { ::capnp::MallocMessageBuilder message; Node::Builder nodeMsg = message.initRoot(); setTags(node.tags(),nodeMsg); auto metadata = nodeMsg.initMetadata(); metadata.setVersion(node.version()); metadata.setTimestamp(node.timestamp().seconds_since_epoch()); metadata.setChangeset(node.changeset()); metadata.setUid(node.uid()); metadata.setUser(node.user()); kj::VectorOutputStream output; capnp::writeMessage(output,message); mNodes.put(id,output); } else { mNodes.del(id); } } uint64_t new_cell = S2CellId(S2LatLng::FromDegrees(new_location.coords.lat(),new_location.coords.lon())).parent(CELL_INDEX_LEVEL).id(); if (!prev_location.is_defined()) { mCellNode.put(new_cell,id); return; } if (prev_cell != new_cell) { mCellNode.del(prev_cell,id); mCellNode.put(new_cell,id); } } // update way, node_way tables void way(const osmium::Way &way) { uint64_t id = way.id(); set prev_nodes; set new_nodes; if (mWays.exists(id)) { auto reader = mWays.getReader(id); Way::Reader way = reader.getRoot(); for (auto const &node_id : way.getNodes()) { prev_nodes.insert(node_id); } } if (!way.visible()) { mWays.del(id); } else { auto const &nodes = way.nodes(); ::capnp::MallocMessageBuilder message; Way::Builder wayMsg = message.initRoot(); wayMsg.initNodes(nodes.size()); int i = 0; for (int i = 0; i < nodes.size(); i++) { wayMsg.getNodes().set(i,nodes[i].ref()); new_nodes.insert(nodes[i].ref()); } setTags(way.tags(),wayMsg); auto metadata = wayMsg.initMetadata(); metadata.setVersion(way.version()); metadata.setTimestamp(way.timestamp().seconds_since_epoch()); metadata.setChangeset(way.changeset()); metadata.setUid(way.uid()); metadata.setUser(way.user()); kj::VectorOutputStream output; capnp::writeMessage(output,message); mWays.put(id,output); } if (!way.visible()) { for (uint64_t node_id : prev_nodes) mNodeWay.del(node_id,id); } else { for (uint64_t node_id : prev_nodes) { if (new_nodes.count(node_id) == 0) mNodeWay.del(node_id,id); } for (uint64_t node_id : new_nodes) { if (prev_nodes.count(node_id) == 0) mNodeWay.put(node_id,id); } } } // update relation, node_relation, way_relation and relation_relation tables void relation(const osmium::Relation &relation) { uint64_t id = relation.id(); set prev_nodes; set prev_ways; set prev_relations; set new_nodes; set new_ways; set new_relations; if (mRelations.exists(id)) { auto reader = mRelations.getReader(id); Relation::Reader relation = reader.getRoot(); for (auto const &member : relation.getMembers()) { if (member.getType() == RelationMember::Type::NODE) { prev_nodes.insert(member.getRef()); } else if (member.getType() == RelationMember::Type::WAY) { prev_ways.insert(member.getRef()); } else { prev_relations.insert(member.getRef()); } } } if (!relation.visible()) { mRelations.del(relation.id()); } else { ::capnp::MallocMessageBuilder message; Relation::Builder relationMsg = message.initRoot(); setTags(relation.tags(),relationMsg); auto members = relationMsg.initMembers(relation.members().size()); int i = 0; for (auto const &member : relation.members()) { members[i].setRef(member.ref()); members[i].setRole(member.role()); if (member.type() == osmium::item_type::node) { new_nodes.insert(member.ref()); members[i].setType(RelationMember::Type::NODE); } else if (member.type() == osmium::item_type::way) { new_ways.insert(member.ref()); members[i].setType(RelationMember::Type::WAY); } else if (member.type() == osmium::item_type::relation) { new_relations.insert(member.ref()); members[i].setType(RelationMember::Type::RELATION); } i++; } auto metadata = relationMsg.initMetadata(); metadata.setVersion(relation.version()); metadata.setTimestamp(relation.timestamp().seconds_since_epoch()); metadata.setChangeset(relation.changeset()); metadata.setUid(relation.uid()); metadata.setUser(relation.user()); kj::VectorOutputStream output; capnp::writeMessage(output,message); mRelations.put(relation.id(),output); } if (!relation.visible()) { for (uint64_t node_id : prev_nodes) mNodeRelation.del(node_id,id); for (uint64_t way_id : prev_ways) mWayRelation.del(way_id,id); for (uint64_t relation_id : prev_relations) mRelationRelation.del(relation_id,id); } else { for (uint64_t node_id : prev_nodes) { if (new_nodes.count(node_id) == 0) mNodeRelation.del(node_id,id); } for (uint64_t node_id : new_nodes) { if (prev_nodes.count(node_id) == 0) mNodeRelation.put(node_id,id); } for (uint64_t way_id : prev_ways) { if (new_ways.count(way_id) == 0) mWayRelation.del(way_id,id); } for (uint64_t way_id : new_ways) { if (prev_ways.count(way_id) == 0) mWayRelation.put(way_id,id); } for (uint64_t relation_id : prev_relations) { if (new_relations.count(relation_id) == 0) mRelationRelation.del(relation_id,id); } for (uint64_t relation_id : new_relations) { if (prev_relations.count(relation_id) == 0) mRelationRelation.put(relation_id,id); } } } private: MDB_txn *mTxn; db::Locations mLocations; db::Elements mNodes; db::Elements mWays; db::Elements mRelations; db::Index mNodeWay; db::Index mNodeRelation; db::Index mWayRelation; db::Index mRelationRelation; db::Index mCellNode; }; void cmdUpdate(int argc, char* argv[]) { cxxopts::Options cmdoptions("Update", "Update an .osmx file with a .osc diff."); cmdoptions.add_options() ("v,verbose", "Verbose output") ("commit", "Commit the update") ("cmd", "Command to run", cxxopts::value()) ("osmx", ".osmx to update", cxxopts::value()) ("osc", ".osc to apply", cxxopts::value()) ("seqnum", "The sequence number of the .osc", cxxopts::value()) ("timestamp", "The timestamp of the .osc", cxxopts::value()) ; cmdoptions.parse_positional({"cmd","osmx","osc","seqnum","timestamp"}); auto result = cmdoptions.parse(argc, argv); if (result.count("osmx") == 0 || result.count("osc") == 0 || \ result.count("seqnum") == 0 || result.count("timestamp") == 0) { cout << "Usage: osmx update OSMX_FILE OSC_FILE SEQNUM TIMESTAMP [OPTIONS]" << endl; cout << "Applies OSC_FILE and saves SEQNUM and TIMESTAMP into the metadata table." << endl << endl; cout << "EXAMPLE:" << endl; cout << " osmx update planet.osmx 123456.osc 123456 2019-09-05T00:00:00Z --commit" << endl << endl; cout << "OPTIONS:" << endl; cout << " --v,--verbose: verbose output." << endl; cout << " --commit: Actually commit the transaction; otherwise runs the update and rolls back." << endl; exit(1); } string osmx = result["osmx"].as(); string osc = result["osc"].as(); bool verbose = result.count("verbose") > 0; auto startTime = std::chrono::high_resolution_clock::now(); MDB_env* env = db::createEnv(osmx,true); MDB_txn* txn; CHECK_LMDB(mdb_txn_begin(env, NULL, 0, &txn)); string old_seqnum = "UNKNOWN"; auto new_seqnum = result["seqnum"].as(); auto new_timestamp = result["timestamp"].as(); db::Metadata metadata(txn); if (verbose) cout << "Timestamp: " << metadata.get("osmosis_replication_timestamp") << endl; old_seqnum = metadata.get("osmosis_replication_sequence_number"); if (verbose) cout << "Starting update from " << old_seqnum << " to " << new_seqnum << endl; const osmium::io::File input_file{osc}; osmium::io::Reader reader{input_file, osmium::osm_entity_bits::object}; DataUpdate data_update(txn); osmium::apply(reader, data_update); auto duration = (std::chrono::duration_cast( std::chrono::high_resolution_clock::now() - startTime ).count()) / 1000.0; if (result.count("commit") > 0) { { metadata.put("osmosis_replication_sequence_number",new_seqnum); metadata.put("osmosis_replication_timestamp",new_timestamp); } CHECK_LMDB(mdb_txn_commit(txn)); cout << "Committed: "; } else { mdb_txn_abort(txn); cout << "Aborted: "; } cout << old_seqnum << " -> " << new_seqnum << " in " << duration << " seconds." << endl; mdb_env_sync(env,true); mdb_env_close(env); } ================================================ FILE: test/test_region.cpp ================================================ #include "catch2/catch_test_macros.hpp" #include "s2/s2latlng.h" #include "osmx/region.h" using namespace std; // osmium header format is like this: Box: (-79.82402,40.439216,-71.660801,45.07133) // small: {\"bbox\":[40.7411\,-73.9937\,40.7486\,-73.9821]} // big: {\"bbox\":[40.6762\,-74.0543\,40.8093\,-73.8603]} // radius: {"center":[40.7411,-73.9937],"radius":25.5} // indo: {\"bbox\":[-12.039321\,94.394531\,8.407168\,142.418292]} // bbox should be minLat,minLon,maxLat,maxLon (opposite of GeoJSON) TEST_CASE("rectangular bbox") { SECTION("basic bbox") { string bbox = "-1.0,-1.0,1.0,1.0"; Region s{bbox,"bbox"}; REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint())); REQUIRE(s.Contains(S2LatLng::FromDegrees(0.9,0.9).ToPoint())); } } TEST_CASE("disc") { SECTION("basic disc") { string disc = "0.0,0.0,1.0"; Region s{disc,"disc"}; REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint())); REQUIRE(!s.Contains(S2LatLng::FromDegrees(0.9,0.9).ToPoint())); } } TEST_CASE("geojson polygon") { SECTION("polygon geometry") { string json = R"json({ "type": "Polygon", "coordinates": [ [ [-1.0,-1.0], [-1.0,1.0], [1.0,1.0], [1.0,-1.0], [-1.0,-1.0] ] ] })json"; Region s{json,"geojson"}; REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint())); REQUIRE(!s.Contains(S2LatLng::FromDegrees(2.0,2.0).ToPoint())); } SECTION("polygon with a hole") { string json = R"json({ "type": "Polygon", "coordinates": [ [ [-2.0,-2.0], [-2.0,2.0], [2.0,2.0], [2.0,-2.0], [-2.0,-2.0] ], [ [-1.0,-1.0], [-1.0,1.0], [1.0,1.0], [1.0,-1.0], [-1.0,-1.0] ] ] })json"; Region s{json,"geojson"}; REQUIRE(s.Contains(S2LatLng::FromDegrees(1.5,1.5).ToPoint())); REQUIRE(!s.Contains(S2LatLng::FromDegrees(0.0,0.0).ToPoint())); } SECTION("multipolygon geometry") { string json = R"json({ "type": "MultiPolygon", "coordinates": [ [[ [0.0,0.0], [1.0,0.0], [1.0,1.0], [0.0,1.0], [0.0,0.0] ]], [[ [2.0,2.0], [3.0,2.0], [3.0,3.0], [2.0,3.0], [2.0,2.0] ]] ] })json"; Region s{json,"geojson"}; REQUIRE(s.Contains(S2LatLng::FromDegrees(0.5,0.5).ToPoint())); REQUIRE(s.Contains(S2LatLng::FromDegrees(2.5,2.5).ToPoint())); auto bounds = s.GetBounds(); REQUIRE(bounds.lat_lo().degrees() <= 0.0); REQUIRE(bounds.lat_hi().degrees() >= 3.0); REQUIRE(bounds.lng_lo().degrees() <= 0.0); REQUIRE(bounds.lng_hi().degrees() >= 3.0); } SECTION("bounds beyond antimeridian") { string json = R"json({ "type": "Polygon", "coordinates": [ [ [180.0,-1.0], [180.0,1.0], [181.0,1.0], [181.0,-1.0], [180.0,-1.0] ] ] })json"; Region s{json,"geojson"}; auto bounds = s.GetBounds(); REQUIRE(bounds.lng_lo().degrees() == 180.0); REQUIRE(bounds.lng_hi().degrees() <= -178.9); // hacky precision REQUIRE(bounds.lng_hi().degrees() >= -179.1); } } // .poly in Lon, Lat order TEST_CASE("osmosis .poly") { SECTION("simple polygon") { string poly = R"poly(basic first_area 0.2e+01 0.1e+01 0.2e+01 -0.1e+01 -0.2e+01 -0.1e+01 -0.2e+01 0.1e+01 END END )poly"; Region s{poly,"poly"}; REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint())); REQUIRE(!s.Contains(S2LatLng::FromDegrees(2.0,3.0).ToPoint())); REQUIRE(s.Contains(S2LatLng::FromDegrees(0.5,1.5).ToPoint())); } SECTION("different whitespace, opposite orientation") { string poly = R"poly(basic first_area 0.1E+01 0.1E+01 -0.1E+01 0.1E+01 -0.1E+01 -0.1E+01 0.1E+01 -0.1E+01 END END )poly"; Region s{poly,"poly"}; REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint())); REQUIRE(!s.Contains(S2LatLng::FromDegrees(2.0,2.0).ToPoint())); } SECTION("repeated last point") { string poly = R"poly(basic first_area 0.1e+01 0.1e+01 0.1e+01 -0.1e+01 -0.1e+01 -0.1e+01 -0.1e+01 0.1e+01 0.1e+01 0.1e+01 END END )poly"; Region s{poly,"poly"}; REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint())); REQUIRE(!s.Contains(S2LatLng::FromDegrees(2.0,2.0).ToPoint())); } SECTION("multiple outer loops") { string poly = R"poly(basic first_area 0.1E+01 0.1E+01 -0.1E+01 0.1E+01 -0.1E+01 -0.1E+01 0.1E+01 -0.1E+01 END second_area 0.4E+01 0.4E+01 0.3E+01 0.4E+01 0.3E+01 0.3E+01 0.4E+01 0.3E+01 END END )poly"; Region s{poly,"poly"}; REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint())); REQUIRE(s.Contains(S2LatLng::FromDegrees(3.5,3.5).ToPoint())); REQUIRE(!s.Contains(S2LatLng::FromDegrees(1.0,1.0).ToPoint())); } SECTION("loop with hole") { } } ================================================ FILE: utils/osmx-update ================================================ #!/usr/bin/env python from datetime import datetime, timezone import subprocess import tempfile import os import sys import fcntl from server import ReplicationServer # expects osmx to be on the PATH. osmx = 'osmx' try: file = open('/tmp/osmx.lock','w') fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB) s = ReplicationServer(sys.argv[2]) # OSMX always uses minutely timestamps internally - try integrating daily seqnum = subprocess.check_output([osmx,'query',sys.argv[1],'seqnum']) if not seqnum.strip(): timestamp = subprocess.check_output([osmx,'query',sys.argv[1],'timestamp']) timestamp = timestamp.decode('utf-8').strip() timestamp = datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%SZ") timestamp = timestamp.replace(tzinfo=timezone.utc) print('Timestamp is {0}'.format(timestamp)) seqnum = s.timestamp_to_sequence(timestamp) seqnum = int(seqnum) print('Sequence number is {0}'.format(seqnum)) latest = s.get_state_info().sequence print("Latest is {0}".format(latest)) current_id = seqnum + 1 while current_id <= latest: fd, path = tempfile.mkstemp(suffix='.osc.gz') with open(fd,'wb') as f: f.write(s.get_diff_block(current_id)) info = s.get_state_info(current_id) timestamp = info.timestamp.strftime('%Y-%m-%dT%H:%M:%SZ') subprocess.check_call([osmx,'update',sys.argv[1],path,str(current_id),timestamp,'--commit']) os.unlink(path) current_id = current_id + 1 except BlockingIOError: print("Process is running - exiting.") finally: fcntl.lockf(file, fcntl.LOCK_UN) file.close() ================================================ FILE: utils/server.py ================================================ """ Helper functions to communicate with replication servers. derived from https://github.com/osmcode/pyosmium """ import sys import urllib.request as urlrequest import urllib.error as urlerror import datetime as dt from collections import namedtuple from math import ceil OsmosisState = namedtuple('OsmosisState', ['sequence', 'timestamp']) DownloadResult = namedtuple('DownloadResult', ['id', 'newest']) import logging log = logging.getLogger() class ReplicationServer(object): def __init__(self, url, diff_type='osc.gz'): self.baseurl = url self.diff_type = diff_type def open_url(self, url): return urlrequest.urlopen(url,None,10) def timestamp_to_sequence(self, timestamp, balanced_search=False): """ Get the sequence number of the replication file that contains the given timestamp. The search algorithm is optimised for replication servers that publish updates in regular intervals. For servers with irregular change file publication dates 'balanced_search` should be set to true so that a standard binary search for the sequence will be used. The default is good for all known OSM replication services. """ # get the current timestamp from the server upper = self.get_state_info() if upper is None: return None if timestamp >= upper.timestamp or upper.sequence <= 0: return upper.sequence # find a state file that is before the required timestamp lower = None lowerid = 0 while lower is None: log.info("Trying with Id %s" % lowerid) lower = self.get_state_info(lowerid) if lower is not None and lower.timestamp >= timestamp: if lower.sequence == 0 or lower.sequence + 1 >= upper.sequence: return lower.sequence upper = lower lower = None lowerid = 0 if lower is None: # no lower yet, so try a higher id (binary search wise) newid = int((lowerid + upper.sequence) / 2) if newid <= lowerid: # nothing suitable found, so upper is probably the best we can do return upper.sequence lowerid = newid # Now do a binary search between upper and lower. # We could be clever here and compute the most likely state file # by interpolating over the timestamps but that creates a whole ton of # special cases that need to be handled correctly. while True: if balanced_search: base_splitid = int((lower.sequence + upper.sequence) / 2) else: ts_int = (upper.timestamp - lower.timestamp).total_seconds() seq_int = upper.sequence - lower.sequence goal = (timestamp - lower.timestamp).total_seconds() base_splitid = lower.sequence + ceil(goal * seq_int / ts_int) if base_splitid >= upper.sequence: base_splitid = upper.sequence - 1 split = self.get_state_info(base_splitid) if split is None: # file missing, search the next towards lower splitid = base_splitid - 1 while split is None and splitid > lower.sequence: split = self.get_state_info(splitid) splitid -= 1 if split is None: # still nothing? search towards upper splitid = base_splitid + 1 while split is None and splitid < upper.sequence: split = self.get_state_info(splitid) splitid += 1 if split is None: # still nothing? Then lower has to do return lower.sequence # set new boundary if split.timestamp < timestamp: lower = split else: upper = split if lower.sequence + 1 >= upper.sequence: return lower.sequence def get_state_info(self, seq=None): """ Downloads and returns the state information for the given sequence. If the download is successful, a namedtuple with `sequence` and `timestamp` is returned, otherwise the function returns `None`. """ try: response = self.open_url(self.get_state_url(seq)) except Exception as err: logging.error(err) return None ts = None seq = None line = response.readline() while line: line = line.decode('utf-8') if '#' in line: line = line[0:line.index('#')] else: line = line.strip() if line: kv = line.split('=', 2) if len(kv) != 2: return None if kv[0] == 'sequenceNumber': seq = int(kv[1]) elif kv[0] == 'timestamp': ts = dt.datetime.strptime(kv[1], "%Y-%m-%dT%H\\:%M\\:%SZ") if sys.version_info >= (3,0): ts = ts.replace(tzinfo=dt.timezone.utc) line = response.readline() return OsmosisState(sequence=seq, timestamp=ts) def get_diff_block(self, seq): """ Downloads the diff with the given sequence number and returns it as a byte sequence. Throws a :code:`urllib.error.HTTPError` (or :code:`urllib2.HTTPError` in python2) if the file cannot be downloaded. """ return self.open_url(self.get_diff_url(seq)).read() def get_state_url(self, seq): """ Returns the URL of the state.txt files for a given sequence id. If seq is `None` the URL for the latest state info is returned, i.e. the state file in the root directory of the replication service. """ if seq is None: return self.baseurl + '/state.txt' return '%s/%03i/%03i/%03i.state.txt' % (self.baseurl, seq / 1000000, (seq % 1000000) / 1000, seq % 1000) def get_diff_url(self, seq): """ Returns the URL to the diff file for the given sequence id. """ return '%s/%03i/%03i/%03i.%s' % (self.baseurl, seq / 1000000, (seq % 1000000) / 1000, seq % 1000, self.diff_type)