Repository: protomaps/OSMExpress
Branch: main
Commit: 045a515132e9
Files: 39
Total size: 128.2 KB
Directory structure:
gitextract_2jzln99k/
├── .github/
│ ├── dependabot.yml
│ └── workflows/
│ ├── build-container.yml
│ └── codeql.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── Dockerfile
├── LICENSE.md
├── README.md
├── dist/
│ └── archive.sh
├── docs/
│ ├── MANUAL.md
│ └── PROGRAMMING_GUIDE.md
├── examples/
│ ├── .gitignore
│ ├── CMakeLists.txt
│ ├── bbox_wkt.cpp
│ └── way_wkt.cpp
├── include/
│ └── osmx/
│ ├── cmd.h
│ ├── messages.capnp
│ ├── region.h
│ ├── storage.h
│ └── util.h
├── python/
│ ├── .gitignore
│ ├── README.md
│ ├── examples/
│ │ ├── augmented_diff.py
│ │ ├── read_way.py
│ │ └── web_server.py
│ ├── osmx/
│ │ ├── __init__.py
│ │ ├── messages.capnp
│ │ └── osmx.py
│ └── setup.py
├── src/
│ ├── cmd.cpp
│ ├── expand.cpp
│ ├── extract.cpp
│ ├── region.cpp
│ ├── storage.cpp
│ └── update.cpp
├── test/
│ └── test_region.cpp
└── utils/
├── osmx-update
└── server.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
- package-ecosystem: "docker"
directory: "/"
schedule:
interval: "daily"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
================================================
FILE: .github/workflows/build-container.yml
================================================
name: Build and push container image
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
release:
types: [published]
jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
attestations: write
id-token: write
steps:
- name: Check out repository
uses: actions/checkout@v6
with:
submodules: recursive
- name: Set up Podman
run: |
sudo apt-get update
sudo apt-get install -y podman
- name: Log into the container registry
if: github.event_name != 'pull_request'
run: echo "${{ secrets.GITHUB_TOKEN }}" | podman login ghcr.io -u ${{ github.actor }} --password-stdin
- name: Build the container image
run: |
# Container image identifiers must be all-lowercase.
# The two commas transform "User/OSMExpress" to "user/osmexpress".
IMAGE_ID=ghcr.io/${GITHUB_REPOSITORY,,}
SHA_TAG=${{ github.sha }}
LATEST_TAG=latest
# Build the container image with SHA and latest tags.
podman build -t ${IMAGE_ID}:${SHA_TAG} -t ${IMAGE_ID}:${LATEST_TAG} .
# If this is a release event, tag the image with the release tag.
if [ "${{ github.event_name }}" = "release" ]; then
RELEASE_TAG=${{ github.event.release.tag_name }}
podman tag ${IMAGE_ID}:${SHA_TAG} ${IMAGE_ID}:${RELEASE_TAG}
fi
- name: Push the container image to the registry
if: github.event_name != 'pull_request'
run: |
IMAGE_ID=ghcr.io/${GITHUB_REPOSITORY,,}
SHA_TAG=${{ github.sha }}
LATEST_TAG=latest
# Push the container image with SHA and latest tags.
podman push $IMAGE_ID:$SHA_TAG
podman push $IMAGE_ID:$LATEST_TAG
# If this is a release event, push the image with the release tag.
if [ "${{ github.event_name }}" = "release" ]; then
RELEASE_TAG=${{ github.event.release.tag_name }}
podman push $IMAGE_ID:$RELEASE_TAG
fi
================================================
FILE: .github/workflows/codeql.yml
================================================
name: Scan for security problems with CodeQL
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
schedule:
- cron: '17 4 * * 0'
jobs:
analyze:
name: Analyze (${{ matrix.language }})
runs-on: 'ubuntu-latest'
permissions:
security-events: write
packages: read # required to fetch internal or private CodeQL packs
strategy:
fail-fast: false
matrix:
include:
- language: actions
build-mode: none
- language: c-cpp
build-mode: none
- language: python
build-mode: none
steps:
- name: Check out repository
uses: actions/checkout@v6
- name: Initialize CodeQL
uses: github/codeql-action/init@v4
with:
languages: ${{ matrix.language }}
build-mode: ${{ matrix.build-mode }}
- name: Perform CodeQL analysis
uses: github/codeql-action/analyze@v4
with:
category: "/language:${{matrix.language}}"
================================================
FILE: .gitignore
================================================
CMakeCache.txt
CMakeFiles
*.swp
*.osmx
*.osmx-lock
Makefile
*.pbf
*.cmake
osmxTest
venv
depends
a.out
*.osc
*.osc.gz
__pycache__
/osmx
*.dylib
Testing/
compile_commands.json
install_manifest.txt
dist/*.tgz
================================================
FILE: .gitmodules
================================================
[submodule "vendor/s2geometry"]
path = vendor/s2geometry
url = https://github.com/google/s2geometry.git
================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required (VERSION 3.5)
set(CMAKE_C_COMPILER "/usr/bin/clang")
set(CMAKE_CXX_COMPILER "/usr/bin/clang++")
project(OSMExpress)
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
set(CMAKE_CXX_FLAGS_DEBUG "-DDEBUG -g")
set(CMAKE_CXX_FLAGS "-Wno-deprecated")
set(CMAKE_CXX_FLAGS "-Wno-deprecated-declarations")
set(CMAKE_CXX_FLAGS "-pthread")
set(OSMX_VERSION "0.2.0")
set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "")
set(ROARING_USE_CPM OFF)
set(ENABLE_ROARING_TESTS OFF)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
include(FetchContent)
# TODO: Switch to a released version after next CapnProto release (post 1.2.0).
# Reason: https://github.com/capnproto/capnproto/issues/2353
# Change for v1: https://github.com/capnproto/capnproto/pull/2355
# Change for v2: https://github.com/capnproto/capnproto/pull/2354
FetchContent_Declare(
CapnProto
GIT_REPOSITORY https://github.com/capnproto/capnproto.git
GIT_TAG master
EXCLUDE_FROM_ALL
FIND_PACKAGE_ARGS)
FetchContent_Declare(
Catch2
GIT_REPOSITORY https://github.com/catchorg/Catch2.git
GIT_TAG v3.8.1
EXCLUDE_FROM_ALL
FIND_PACKAGE_ARGS 3)
FetchContent_Declare(
cxxopts
GIT_REPOSITORY https://github.com/jarro2783/cxxopts.git
GIT_TAG v3.3.1
EXCLUDE_FROM_ALL
FIND_PACKAGE_ARGS)
FetchContent_Declare(
LMDB
GIT_REPOSITORY https://git.openldap.org/openldap/openldap.git
GIT_TAG OPENLDAP_REL_ENG_2_6_10
EXCLUDE_FROM_ALL
FIND_PACKAGE_ARGS)
FetchContent_Declare(
nlohmann_json
GIT_REPOSITORY https://github.com/nlohmann/json.git
GIT_TAG v3.12.0
EXCLUDE_FROM_ALL
FIND_PACKAGE_ARGS)
FetchContent_Declare(
Osmium
GIT_REPOSITORY https://github.com/osmcode/libosmium.git
GIT_TAG v2.22.0
SOURCE_SUBDIR test/catch
EXCLUDE_FROM_ALL
FIND_PACKAGE_ARGS)
FetchContent_Declare(
Protozero
GIT_REPOSITORY https://github.com/mapbox/protozero.git
GIT_TAG v1.8.0
EXCLUDE_FROM_ALL
FIND_PACKAGE_ARGS)
FetchContent_Declare(
roaring
GIT_REPOSITORY https://github.com/RoaringBitmap/CRoaring.git
GIT_TAG v4.3.6
EXCLUDE_FROM_ALL
FIND_PACKAGE_ARGS)
FetchContent_MakeAvailable(
CapnProto Catch2 cxxopts LMDB nlohmann_json Osmium Protozero roaring)
if(NOT CapnProto_FOUND)
add_subdirectory(${capnproto_SOURCE_DIR} EXCLUDE_FROM_ALL)
endif()
if(NOT TARGET LMDB::LMDB)
set(LMDB_INCLUDE_DIR ${lmdb_SOURCE_DIR}/libraries/liblmdb)
add_library(
LMDB_LMDB
STATIC
${lmdb_SOURCE_DIR}/libraries/liblmdb/mdb.c
${lmdb_SOURCE_DIR}/libraries/liblmdb/midl.c)
target_include_directories(LMDB_LMDB PUBLIC ${LMDB_INCLUDE_DIR})
add_library(LMDB::LMDB INTERFACE IMPORTED)
set_target_properties(
LMDB::LMDB
PROPERTIES
INTERFACE_LINK_LIBRARIES LMDB_LMDB
INTERFACE_INCLUDE_DIRECTORIES ${LMDB_INCLUDE_DIR})
endif()
if(NOT OSMIUM_FOUND)
add_library(Osmium INTERFACE)
include_directories(SYSTEM ${osmium_SOURCE_DIR}/include)
endif()
if(NOT Protozero_FOUND)
add_library(Protozero INTERFACE)
include_directories(SYSTEM ${protozero_SOURCE_DIR}/include)
endif()
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/vendor/s2geometry EXCLUDE_FROM_ALL)
include_directories(vendor/s2geometry/src)
include_directories(include)
# needed for Expat install dir
if(CMAKE_SYSTEM_NAME STREQUAL FreeBSD)
include_directories(/usr/local/include)
link_directories(osmx /usr/local/lib)
endif()
set(CAPNPC_OUTPUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/capnpc_generated)
file(MAKE_DIRECTORY ${CAPNPC_OUTPUT_DIR})
capnp_generate_cpp(CAPNP_SRCS CAPNP_HDRS include/osmx/messages.capnp)
add_executable(
osmx
src/cmd.cpp
src/storage.cpp
src/expand.cpp
src/extract.cpp
src/update.cpp
src/region.cpp
${CAPNP_SRCS})
add_dependencies(osmx s2)
target_include_directories(
osmx
PUBLIC include ${CAPNPC_OUTPUT_DIR}/include)
target_link_libraries(
osmx
bz2 CapnProto::capnp cxxopts::cxxopts expat LMDB::LMDB
nlohmann_json::nlohmann_json roaring s2 z)
set_property(TARGET osmx PROPERTY CXX_STANDARD 14)
add_executable(osmxTest test/test_region.cpp src/region.cpp)
set_property(TARGET osmxTest PROPERTY CXX_STANDARD 14)
target_include_directories(
osmxTest
PUBLIC include ${CAPNPC_OUTPUT_DIR}/include)
target_link_libraries(
osmxTest
bz2 CapnProto::capnp cxxopts::cxxopts expat LMDB::LMDB
nlohmann_json::nlohmann_json roaring s2 z
Catch2::Catch2WithMain)
enable_testing()
add_test(osmxTest osmxTest)
install(TARGETS osmx DESTINATION bin)
add_custom_target(archive COMMAND dist/archive.sh ${OSMX_VERSION} ${CMAKE_SYSTEM_NAME})
add_dependencies(archive osmx)
add_library(
osmx-static
STATIC
src/storage.cpp
src/expand.cpp
src/extract.cpp
src/update.cpp
src/region.cpp)
set_property(TARGET osmx-static PROPERTY CXX_STANDARD 14)
target_include_directories(
osmx-static
PUBLIC include ${CAPNPC_OUTPUT_DIR}/include)
target_link_libraries(
osmx-static
bz2 CapnProto::capnp cxxopts::cxxopts expat LMDB::LMDB
nlohmann_json::nlohmann_json roaring s2 z)
================================================
FILE: Dockerfile
================================================
FROM alpine:3.22 AS builder
# TODO: Add croaring-dev once available in Alpine Linux.
# https://gitlab.alpinelinux.org/alpine/aports/-/merge_requests/87769
RUN apk add --no-cache \
clang \
cmake \
git \
linux-headers \
make \
python3-dev \
\
bzip2-dev \
catch2-3 \
capnproto-dev \
cxxopts-dev \
expat-dev \
libosmium-dev \
lmdb-dev \
nlohmann-json \
openssl-dev \
protozero-dev \
zlib-dev
WORKDIR /usr/src/osmexpress
COPY . /usr/src/osmexpress
RUN cmake -DCMAKE_BUILD_TYPE=Release .
RUN make -j16 && ./osmxTest && make install
FROM alpine:3.22
# cxxopts, libosmium, nlohmann-json and protozero are header-only
# C++ libraries; catch2 is only used for testing. We do not need
# them in the production container.
RUN apk add --no-cache \
libbz2 \
libcrypto3 \
capnproto \
libexpat \
libssl3 \
lmdb \
zlib
COPY --from=builder /usr/local/bin/osmx /usr/local/bin/osmx
ENTRYPOINT [ "/usr/local/bin/osmx" ]
================================================
FILE: LICENSE.md
================================================
Copyright 2019 Protomaps. Some source code from https://github.com/osmcode/pyosmium Copyright (c) 2014-2018, Sarah Hoffmann, All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: README.md
================================================
# OSM Express

[Manual](docs/MANUAL.md), [Programming Guide](docs/PROGRAMMING_GUIDE.md)
OSM Express is a fast storage format for OpenStreetMap that powers [SliceOSM](https://github.com/SliceOSM). It's designed as a low level building block specific to the OSM data model; common access patterns such as random lookups by ID, in-place minutely updates, and spatial queries are efficient and simple to manage in production applications.
## Features
* **Random access:** Look up nodes, ways and relations and their metadata by ID; fetch member elements of ways and relations to construct geometries.
* **Spatial indexing:** Nodes are bucketed into [S2 Geometry](http://s2geometry.io) cells. Access a region by providing a cell covering; works for nonrectangular regions.
* **Scalable:** OSM Express works the same way for OSM data of any size, from a small city to the entire planet. The entire planet can be worked with efficiently on typical hardware such as a laptop computer.
* **In-place updates:** Included are scripts to download minutely changesets from [planet.openstreetmap.org](https://planet.openstreetmap.org) and apply them to an .osmx database.
* **Concurrent access:** Multiple processes can open the database file for reading simultaneously. No running server process is required. Writing minutely updates doesn't block reader access. Reads and writes are transactional.
* **Portable:** An .osmx file can be read and written to from either C++ or Python.
## Details
OSM Express is a compact 1,500 LOC, and really a cobbling together of a few low-level libraries:
* [Libosmium](https://osmcode.org/libosmium/index.html) for the reading and writing of .osm.pbf files.
* [LMDB](https://symas.com/lmdb) for a memory-mapped ACID key-value store with fast cursor iteration.
* [Cap'n Proto](https://capnproto.org) for in-memory and on-disk representation of OSM elements.
* [CRoaring](https://roaringbitmap.org) for in-memory representation of ID sets as compressed bitmaps.
* [S2 Geometry](http://s2geometry.io) for indexing of geographic coordinates.
## Installation
[See the manual for instructions on building from source](/docs/PROGRAMMING_GUIDE.md).
## Usage
OSM Express is being used in production for [SliceOSM](https://slice.openstreetmap.us) and the file format is stable.
* Use the `osmx` command line tool to expand a .osm.pbf to an .osmx database and perform basic tasks such as extracting regions or querying by ID. No programming required.
* Use the [Python library](python/) library via `pip install osmx` to access an .osmx database programatically. See the [Python Examples](python/examples) for how to create command line tools, webservers or detailed diffs based on minutely data.
* Use the C++ library to access an .osmx database programatically.
### Command line
```bash
osmx expand planet.osm.pbf planet.osmx # converts a pbf or xml to osmx. Takes 5-10 hours for the planet, resulting in a ~600GB file.
osmx extract planet.osmx extract.osm.pbf --bbox 40.7411\,-73.9937\,40.7486\,-73.9821 # extract a new pbf for the given bounding box.
osmx update planet.osmx 3648548.osc 3648548 2019-08-29T17:50:02Z --commit # applies an OsmChange diff.
osmx query planet.osmx # Print statistics, seqnum and timestamp.
osmx query planet.osmx way 34633854 # look up an element by ID.
```
`osmx extract` has a flag `--noUserData` intended for public facing instances which will remove the user, uid and changeset fields to comply with [GDPR guidelines](https://wiki.openstreetmap.org/wiki/GDPR).
Detailed command line usage can be found in the [Manual](docs/MANUAL.md).
### Headers
The C++ API is very rough with minimal abstraction. [examples/way_wkt.cpp](examples/way_wkt.cpp) is a short, commented C++ program that uses the headers to read a way from a .osmx file and outputs its [Well-Known Text](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) LineString geometry.
```bash
./way_wkt ../ny.osmx 34633854
Empire State Building LINESTRING (-73.9864855 40.7484833,-73.9851554 40.7479226,-73.9848259 40.7483735,-73.9861526 40.7489422,-73.9863111 40.7487242,-73.9863282 40.7487007,-73.9864684 40.7485078,-73.9864855 40.7484833)
```
[examples/bbox_wkt.cpp](examples/bbox_wkt.cpp) is a more complex example that takes a bounding box as input, and returns WKT LineStrings for ways that overlap the bbox. This overlap is an approximation based on cells and may include ways outside the bounding box.
Detailed C++ usage can be found in the [Programming Guide](docs/PROGRAMMING_GUIDE.md).
### Docker (experimental)
A `Dockerfile` is provided but users will need to build their own container. To do so, run:
```
docker build -t osmx .
```
## License and Development
2-Clause BSD, see [LICENSE.md](LICENSE.md).
================================================
FILE: dist/archive.sh
================================================
#!/bin/bash
set -e
FILENAME=dist/osmexpress-$1-$2.tgz
rm -f LICENSES
printf "osmexpress\n===========\n" >> LICENSES
cat LICENSE.md >> LICENSES
printf "\ncapnproto\n===========\n" >> LICENSES
cat vendor/capnproto/LICENSE >> LICENSES
printf "\ncroaring\n===========\n" >> LICENSES
cat vendor/CRoaring/LICENSE >> LICENSES
printf "\ncxxopts\n===========\n" >> LICENSES
cat vendor/cxxopts/LICENSE >> LICENSES
printf "\njson\n===========\n" >> LICENSES
cat vendor/json/LICENSE.MIT >> LICENSES
printf "\nlibosmium\n===========\n" >> LICENSES
cat vendor/libosmium/LICENSE >> LICENSES
printf "\nlmdb\n===========\n" >> LICENSES
cat vendor/lmdb/libraries/liblmdb/LICENSE >> LICENSES
printf "\nprotozero\n===========\n" >> LICENSES
cat vendor/protozero/LICENSE.md >> LICENSES
printf "\ns2\n===========\n" >> LICENSES
cat vendor/s2geometry/LICENSE >> LICENSES
tar -cvzf $FILENAME osmx LICENSES
rm LICENSES
echo "created $FILENAME"
================================================
FILE: docs/MANUAL.md
================================================
**OSM Express** is a database file format for OpenStreetMap data (.osmx), as well as a command line tool and C++ library for reading and writing .osmx files. Find it on GitHub at [github.com/bdon/OSMExpress](https://github.com/bdon/OSMExpress)

*Illustration of the cell covering for a rectangular input region and its overlap with indexed OpenStreetMap geometries.*
## Motivation
Here are some use cases that OSM Express fits well.
* You want an offline copy of OpenStreetMap, which can be updated every day, hour or minute from the main openstreetmap.org database, instead of redownloading the entire planet.
* You want to quickly access all OSM objects in a geographical region, such as as neighborhood, city or small country.
* You want to quickly look up OSM objects by ID, such as getting the `height` and `name` tags for a given way that represents a building, and construct geometries for ways and relations.
* You want to embed a database that does any of the above, such as in a web application that returns OSM objects as GeoJSON.
## Quick Start
### Command Line
For information on how to compile the `osmx` program from source, see the [Programming Guide.](/docs/PROGRAMMING_GUIDE.md)
Once you have the `osmx` command line program, you'll need to start with an .osm.pbf or OSM XML file. The Planet file is available at [planet.openstreetmap.org](https://planet.openstreetmap.org), but it's preferable to begin with something smaller to learn with.
There are numerous sites for downloading .osm.pbf extracts, including [SliceOSM](https://slice.openstreetmap.us), a service itself powered by OSM Express.
Example: create an .osmx file by using the `expand` command on the .osm.pbf file:
osmx expand new_york_county.osm.pbf new_york_county.osmx
This will result in a 91 MB .osmx file.
We can access objects inside this .osmx file by ID, displaying the node IDs of its member nodes and all tags:
osmx query new_york_county.osmx way 34633854
> 402743563 402743567 402743571 402743573 2709307502 2709307499 2709307464 402743563
addr:city=New York City addr:housenumber=350 addr:postcode=10018 ...
We can also extract regions of the .osmx file into a new .osm.pbf file, which is useful for interoperability with other OSM tools.
osmx extract new_york_county.osmx downtown.osm.pbf --bbox 40.7411\,-73.9937\,40.7486\,-73.9821
### Updating
`utils/osmx-update` is provided to update `.osmx` to the most recent file on a replication server using `osmx update`. For example to update a planet.osmx file with minutely updates:
python utils/osmx-update planet.osmx https://planet.openstreetmap.org/replication/minute/
## Library
the OSM Express library is intentionally minimal and non-opinionated - for example, no attempt is made to transform OSM tags to a fixed schema, distinguish between polygon and linear ways, or assemble multipolygon relations into polygons. For these typical tasks it's recommended to use OSM Express as a library in your own program. Documentation and example code are available at the [Programming Guide.](/docs/PROGRAMMING_GUIDE.md)
## Other Languages
An .osmx file can be opened and queried direcly in a Python program using the `osmx` Python package. See [Python](/docs/PROGRAMMING_GUIDE.md#python) for details.
Languages other than Python may be supported in the future by either language-specific libraries or a new C API. See [Development](#Development) if you're interested or discuss on GitHub.
## Technical Details
### Storage Requirements
A full planet.osmx created from planet.osm.pbf (47 GB) is around 580 GB.
OSM Express is optimized for fast lookups, extracts and updates, goals opposed to making the database size as compact as possible. A typical .osmx file can be 10 times the size of the corresponding .osm.pbf, because:
* Relationships between parent elements and member elements are encoded in both directions, to enable lookups from node to way, way to relation, etc.
* The storage engine (LMDB) has no built-in compression, unlike some LSM-tree storage engines such as LevelDB.
* The `mmap`-based design of LMDB and Cap'n Proto requires that fields are word-aligned on disk, causing storage overhead.
* Keys and values are stored in full as strings. Keys could be hardcoded in a lookup table, saving about 10% space, but this would make the database less portable.
As of 2019, fast local storage is cheap; 1 terabyte solid state drives are less than 150 USD. On managed hosting providers like AWS and Google Cloud, extra storage is affordable compared to more memory or CPU cores.
If it's necessary to optimize for storage space, an .osmx file can be stored on a filesystem with transparent compression such as ZFS or Btrfs, at the cost of CPU overhead. This can reduce planet.osmx to around 200GB.
### Privacy
OSM Express stores all metadata - version, timestamp, changeset, username and user ID - for all OSM objects, except for untagged nodes. The `osmx extract` `--noUserData` flag ignores changeset, username and user ID information for extracts, to comply with [GDPR guidelines](https://wiki.openstreetmap.org/wiki/GDPR).
### Performance
OSM Express should work with reasonable amounts of memory, less than 8 gigabytes, even for `expand` and `extract` on planet.osmx. The strongest predictor of performance is I/O latency. If benchmarking different storage environments, I/O latency can be best measured via IOPS at queue depth 1.
*WIP: benchmarks*
## Alternatives
* [osmium-tool](https://osmcode.org/osmium-tool/index.html) for creating extracts from osm.pbf files. This is more efficient for large country or continent sized extracts, or any task where the entire dataset needs to be read.
* [Overpass API](http://overpass-api.de) is a powerful server application for interactive querying and tag-based lookup of OSM data.
* [conveyal/osm-lib](https://github.com/conveyal/osm-lib) is a similar design, written in Java.
* [imposm3](https://github.com/omniscale/imposm3), [osm2pgsql](https://github.com/openstreetmap/osm2pgsql) if you want OSM data in PostgreSQL and/or want to render maps.
## Concepts
### File Layout
The `osmx query` command with no arguments reveals the layout of an .osmx database:
osmx query planet.osmx
locations: 5313351219
nodes: 144307630
ways: 590470034
relations: 6895065
cell_node: 5313351219
node_way: 5906888644
node_relation: 10242142
way_relation: 63350432
relation_relation: 497137
an .osmx file is a LMDB database with 10 sub-databases. All keys are 64 bit integers in [host byte order](https://en.wikipedia.org/wiki/Endianness) (little-endian on most modern CPUs).
* `locations`: maps OSM node IDs to Locations, which store the coordinates and version number of the node (documented below).
* `nodes`, `ways`, `relations` map OSM object IDs to a Cap'n Proto message defined in [`include/osmx/messages.capnp`](https://github.com/bdon/OSMExpress/blob/main/include/osmx/messages.capnp).
- `nodes` only contains *tagged* nodes; the value for each key describes the node's tags and other metadata. Untagged nodes are included only in `locations` to save space on disk.
- `ways` contains all ways; the value for each key describes the way's tags, metadata, and the list of node IDs that are part of the way.
- `relations` contains all relations; the value for each key contains the relation's tags, metadata, and the IDs and roles of its members.
* `cell_node` maps a level 16 [S2 cell ID](http://s2geometry.io/devguide/s2cell_hierarchy.html) to a node ID, using LMDB's `DUPSORT` to store multiple values for each key (since each S2 cell will intersect many OSM objects).
* `node_way`, `node_relation`, `way_relation` and `relation_relation` map OSM object IDs to their parent object IDs, also using `DUPSORT` (since nodes can belong to multiple ways, ways to multiple relations, etc).
Finally, the `metadata` sub-database holds arbitrary string:string values. This is used to store the replication sequence number and timestamp.
It is important to note that LMDB transactions span all sub-databases. This means that a read operation will retrieve the correct `timestamp` for the data it fetches, even if the database is written to while the read is happening.
#### Encoding of Locations
Values in the `locations` sub-database are structs with the following layout:
```c
struct Location {
int32_t longitude_i;
int32_t latitude_i;
int32_t version;
};
```
Each field is serialized in host byte order.
Longitude and latitude are stored as integers. To obtain the actual longitude and latitude as decimal numbers, divide the integer value by 10000000 (1e7). This integer-based encoding is precise to within a few centimeters anywhere on Earth. The same encoding is used by [libosmium](https://docs.osmcode.org/libosmium/latest/classosmium_1_1Location.html) and by the openstreetmap.org database internally.
### Spatial Indexing
OSM Express avoids expensive point-in-polygon computations for spatial operations. Instead, a query region is approximated by S2 cells with maximum level 16. The level 16 is chosen as a reasonable tradeoff between covering precision and storage space.
*Author's note: the S2 Covering of a region may differ depending on choice of architecture and compiler, while still being valid. Let me know if you know how to make this consistent.*
## Presentations
[State of the Map US 2019, Minneapolis - Video](https://2019.stateofthemap.us/program/sun/osm-express-a-spatial-file-format-for-the-planet.html)
================================================
FILE: docs/PROGRAMMING_GUIDE.md
================================================
## Building from source
OSM Express uses CMake for its build scripts. It's only been tested with the Clang C++ compiler so far.
Most dependencies are included as Git submodules in the `vendor/` directory, but a few stable, common libraries are expected to exist on your system, including bzip2, zlib, Expat and OpenSSL.
### FreeBSD 12
`sudo pkg install cmake expat`
### macOS
via Homebrew: `brew install cmake bzip2 zlib openssl expat`
*Additional macOS notes: the Clang compiler should be available via XCode Command Line Tools.*
### Ubuntu 22.04
via Apt package manager: `sudo apt install cmake clang libbz2-dev libz-dev libexpat-dev libssl-dev python3-dev`
### Build Instructions
git clone --recursive https://github.com/bdon/OSMExpress.git
cd OSMExpress
cmake -DCMAKE_BUILD_TYPE=Release .
make
*macOS note: If OpenSSL is installed through Homebrew, you may need to add an option to your cmake command: `-DOPENSSL_ROOT_DIR=/usr/local/opt/openssl\@3`
For macOS systems with Apple Silicon, this path is `-DOPENSSL_ROOT_DIR=/opt/homebrew/opt/openssl\@3`
## Using the C++ Headers
### Example: Way ID to WKT
See [examples/way_wkt.cpp](https://github.com/bdon/OSMExpress/blob/main/examples/way_wkt.cpp) for a commented program.
### Example: Bbox to Way WKTs
See [examples/bbox_wkt.cpp](https://github.com/bdon/OSMExpress/blob/main/examples/way_wkt.cpp) for a commented program.
## Python
Install the library with `pip install osmx` . This will also download and install the `pycapnp` and `lmdb` Python libraries.
The Python API supports only location, node, way and relation lookups at the moment. Example:
import osmx
env = osmx.Environment('planet.osmx')
txn = osmx.Transaction(env)
locations = osmx.Locations(txn)
nodes = osmx.Nodes(txn)
ways = osmx.Ways(txn)
way = ways.get(123456)
for node_id in way.nodes:
print(locations.get(node_id))
print(osmx.tag_dict(way.tags))
================================================
FILE: examples/.gitignore
================================================
way_wkt
bbox_wkt
================================================
FILE: examples/CMakeLists.txt
================================================
cmake_minimum_required (VERSION 3.5)
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
set(CMAKE_CXX_FLAGS_DEBUG "-DDEBUG -g")
set(CMAKE_CXX_FLAGS "-Wno-deprecated")
set(CMAKE_CXX_FLAGS "-Wno-deprecated-declarations")
set(CMAKE_CXX_FLAGS "-pthread")
include_directories(../vendor/libosmium/include)
include_directories(../vendor/protozero/include)
include_directories(../vendor/s2geometry/src)
include_directories(../vendor/CRoaring/cpp)
include_directories(../vendor/CRoaring/include)
include_directories(../vendor/cxxopts/include)
include_directories(/usr/local/include)
include_directories(../depends)
include_directories(../include)
include_directories(../vendor/lmdb/libraries/liblmdb)
include_directories(../vendor/capnproto/c++/src)
link_directories(../vendor/s2geometry)
link_directories(/usr/local/lib)
link_directories(../vendor/CRoaring)
link_directories(../vendor/capnproto)
link_directories(../vendor/lmdb/libraries/liblmdb/)
link_directories(../vendor/capnproto/c++/src/capnp/)
link_directories(../vendor/capnproto/c++/src/kj/)
link_directories(../vendor/CRoaring/src/)
link_directories(${OPENSSL_ROOT_DIR}lib/)
add_executable(way_wkt way_wkt.cpp ../src/storage.cpp)
target_link_libraries(way_wkt lmdb z expat bz2 s2 capnp kj roaring ssl crypto)
set_property(TARGET way_wkt PROPERTY CXX_STANDARD 14)
add_executable(bbox_wkt bbox_wkt.cpp ../src/storage.cpp)
target_link_libraries(bbox_wkt lmdb z expat bz2 s2 capnp kj roaring ssl crypto)
set_property(TARGET bbox_wkt PROPERTY CXX_STANDARD 14)
================================================
FILE: examples/bbox_wkt.cpp
================================================
#include <vector>
#include <iomanip>
#include "osmx/storage.h"
#include "osmx/util.h"
#include "s2/s2latlng.h"
#include "s2/s2region_coverer.h"
#include "s2/s2latlng_rect.h"
#include "roaring/roaring64map.hh"
using namespace std;
// Example of a very simple program to get OSM objects in a region
// and print them out as WKT.
// see way_wkt for a simpler example.
// This program does not handle Relations at all,
// so it can't be used to find all Polygons in a region, since they may be Multipolygon relations.
// Usage: ./bbox_wkt OSMX_FILE MIN_LON MIN_LAT MAX_LON MAX_LAT
int main(int argc, char* argv[]) {
vector<string> args(argv, argv+argc);
MDB_env* env = osmx::db::createEnv(args[1]);
MDB_txn* txn;
CHECK_LMDB(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));
// Create a S2LatLngRect.
auto lo = S2LatLng::FromDegrees(stof(args[3]),stof(args[2]));
auto hi = S2LatLng::FromDegrees(stof(args[5]),stof(args[4]));
auto bbox = S2LatLngRect{lo,hi};
// Find the cell covering for the LatLngRect,
// with a maximum cell level of 16.
// Although nodes in the database are stored at level=16,
// Cells with levels less than 16 will be correctly handled by the traverseCell function.
// This allows for more compact representations of large regions.
S2RegionCoverer::Options options;
options.set_max_level(16);
S2RegionCoverer coverer(options);
S2CellUnion covering = coverer.GetCovering(bbox);
cerr << "Cell covering size: " << covering.size() << endl;
// Get all node_ids that match the given region.
Roaring64Map node_ids;
MDB_dbi dbi;
MDB_cursor *cursor;
CHECK_LMDB(mdb_dbi_open(txn, "cell_node", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi));
CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor));
for (auto cell_id : covering.cell_ids()) {
osmx::db::traverseCell(cursor,cell_id,node_ids);
}
mdb_cursor_close(cursor);
cerr << "Nodes in region: " << node_ids.cardinality() << endl;
// Get all way_ids that are referred to by node_ids.
Roaring64Map way_ids;
CHECK_LMDB(mdb_dbi_open(txn, "node_way", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi));
CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor));
for (auto const &node_id : node_ids) {
osmx::db::traverseReverse(cursor,node_id,way_ids);
}
mdb_cursor_close(cursor);
cerr << "Ways in region: " << way_ids.cardinality() << endl;
osmx::db::Locations locations(txn);
osmx::db::Elements ways(txn,"ways");
for (auto way_id : way_ids) {
// Fetch a Way element by ID.
auto message = ways.getReader(way_id);
auto way = message.getRoot<Way>();
// Tags are stored as a vector of key,value.
// Iterate through all tags and print the value if key = name.
auto tags = way.getTags();
for (int i = 0; i < tags.size() / 2; i++) {
if (tags[i*2] == "name") cout << tags[i*2+1].cStr();
}
// Assemble a WKT LineString geometry.
cout << "\tLINESTRING (";
cout << std::fixed << std::setprecision(7); // the output should have 7 decimal places.
auto nodes = way.getNodes();
for (int i = 0; i < nodes.size(); i++) {
auto location = locations.get(nodes[i]);
if (i > 0) cout << ",";
cout << location.coords.lon() << " " << location.coords.lat();
}
cout << ")" << endl;
}
mdb_env_close(env); // close the database.
}
================================================
FILE: examples/way_wkt.cpp
================================================
#include <vector>
#include <iomanip>
#include "osmx/storage.h"
#include "osmx/util.h"
using namespace std;
// Example of a very simple C++ program that uses osmx headers
// to open a database, look up a way by ID, and assemble a WKT geometry from its nodes.
// Usage: ./print_wkt OSMX_FILE WAY_ID
int main(int argc, char* argv[]) {
vector<string> args(argv, argv+argc);
// Opening a database: create an Environment, and then a Transaction within the environment.
MDB_env* env = osmx::db::createEnv(args[1]);
MDB_txn* txn;
CHECK_LMDB(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));
// Create a Database handle for each element type within the Transaction.
osmx::db::Locations locations(txn);
osmx::db::Elements ways(txn,"ways");
// Fetch a Way element by ID.
auto message = ways.getReader(stol(args[2]));
auto way = message.getRoot<Way>();
// Tags are stored as a vector of key,value.
// Iterate through all tags and print the value if key = name.
auto tags = way.getTags();
for (int i = 0; i < tags.size() / 2; i++) {
if (tags[i*2] == "name") cout << tags[i*2+1].cStr();
}
// Assemble a WKT LineString geometry.
cout << "\tLINESTRING (";
cout << std::fixed << std::setprecision(7); // the output should have 7 decimal places.
auto nodes = way.getNodes();
for (int i = 0; i < nodes.size(); i++) {
auto location = locations.get(nodes[i]);
if (i > 0) cout << ",";
cout << location.coords.lon() << " " << location.coords.lat();
}
cout << ")" << endl;
mdb_env_close(env); // close the database.
}
================================================
FILE: include/osmx/cmd.h
================================================
void cmdExpand(int argc, char* argv[]);
void cmdExtract(int argc, char* argv[]);
void cmdUpdate(int argc, char* argv[]);
================================================
FILE: include/osmx/messages.capnp
================================================
@0xd3a7e843a9c03421;
struct Metadata {
version @0 :UInt32;
timestamp @1 :UInt64;
changeset @2 :UInt32;
uid @3 :UInt32;
user @4 :Text;
}
struct Node {
tags @0 :List(Text);
metadata @1 :Metadata;
}
struct Way {
nodes @0 :List(UInt64);
tags @1 :List(Text);
metadata @2 :Metadata;
}
struct RelationMember {
ref @0 :UInt64;
type @1 :Type;
role @2 :Text;
enum Type {
node @0;
way @1;
relation @2;
}
}
struct Relation {
tags @0 :List(Text);
members @1 :List(RelationMember);
metadata @2 :Metadata;
}
================================================
FILE: include/osmx/region.h
================================================
#include <string>
#include <nlohmann/json.hpp>
#include "s2/s2region.h"
#include "s2/s2cell_union.h"
#include "s2/s2region_coverer.h"
#include "s2/s2latlng_rect.h"
class Region {
public:
Region(const std::string &text, const std::string &ext);
bool Contains(S2Point p);
S2CellUnion GetCovering(S2RegionCoverer &coverer);
S2LatLngRect GetBounds();
private:
void AddS2RegionFromGeometry(nlohmann::json &geometry);
void AddS2RegionFromPolyFile(std::istringstream &file);
std::vector<std::unique_ptr<S2Region>> mRegions;
};
================================================
FILE: include/osmx/storage.h
================================================
#pragma once
#include "lmdb.h"
#include "osmium/osm/location.hpp"
#include "kj/io.h"
#include "capnp/message.h"
#include "capnp/serialize.h"
#include "osmx/messages.capnp.h"
#include "osmx/util.h"
#include "s2/s2cell_id.h"
#include "roaring/roaring64map.hh"
namespace osmx { namespace db {
uint64_t to64(osmium::Location loc);
osmium::Location toLoc(uint64_t val);
MDB_env *createEnv(std::string path, bool writable = false);
class Noncopyable {
public:
Noncopyable() { }
Noncopyable( const Noncopyable& ) = delete;
Noncopyable& operator=( const Noncopyable& ) = delete;
};
class Metadata : public Noncopyable {
public:
Metadata(MDB_txn *txn);
void put(const std::string &key_str, const std::string &value_str);
std::string get(const std::string &key_str);
private:
MDB_txn* mTxn;
MDB_dbi mDbi;
};
class Elements : public Noncopyable {
public:
Elements(MDB_txn *txn, const std::string &name);
void put(uint64_t id, kj::VectorOutputStream &vos, int flags = 0);
void del(uint64_t id);
bool exists(uint64_t id);
capnp::FlatArrayMessageReader getReader(uint64_t id);
private:
MDB_txn *mTxn;
MDB_dbi mDbi;
};
class Location {
public:
Location() { };
Location(osmium::Location l, int32_t v) : coords(l), version(v) {
}
bool is_undefined() {
return coords.is_undefined();
}
bool is_defined() {
return coords.is_defined();
}
osmium::Location coords;
int32_t version;
};
class Locations : public Noncopyable {
public:
Locations(MDB_txn *txn);
void put(uint64_t id, const Location value, int flags = 0);
void del(uint64_t id);
bool exists(uint64_t id);
Location get(uint64_t id) const;
private:
MDB_txn* mTxn;
MDB_dbi mDbi;
};
class Index : public Noncopyable {
public:
Index(MDB_txn *txn, const std::string &name);
void put(uint64_t from, uint64_t osm_id, int flags = 0);
void del(uint64_t from, uint64_t osm_id );
private:
MDB_dbi mDbi;
MDB_txn *mTxn;
};
class IndexWriter : public Noncopyable {
public:
IndexWriter(MDB_env *env, const std::string &name);
void put(uint64_t from, uint64_t osm_id, int flags = 0);
void commit();
private:
MDB_env *mEnv;
MDB_dbi mDbi;
MDB_txn *mTxn;
std::string mName;
int mWrites = 0;
};
void traverseCell(MDB_cursor *cursor, S2CellId cell_id, roaring::Roaring64Map &set);
void traverseReverse(MDB_cursor *cursor, uint64_t from, roaring::Roaring64Map &set);
} }
================================================
FILE: include/osmx/util.h
================================================
#pragma once
#include <chrono>
#include <iostream>
#include "lmdb.h"
#include "osmium/tags/taglist.hpp"
#define CHECK_LMDB(x) if (0 != x) { printf("%s, file %s, line %d.\n", mdb_strerror(x), __FILE__, __LINE__); abort(); }
// a higher cell level results in more precise extracts, as the size of 1 cell is the minimum index resolution.
#define CELL_INDEX_LEVEL 16
class Timer {
public:
Timer(std::string name) : mName(name) {
mStartTime = std::chrono::high_resolution_clock::now();
std::cout << "Start " << mName << std::endl;
}
~Timer() {
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>( std::chrono::high_resolution_clock::now() - mStartTime ).count();
std::cout << "Finished " << mName << " in " << duration/1000.0 << " seconds." << std::endl;
}
private:
std::chrono::high_resolution_clock::time_point mStartTime;
std::string mName;
};
template <typename T>
void setTags(const osmium::TagList &tags, T &builder) {
builder.initTags(tags.size() * 2);
auto tagBuilder = builder.getTags();
int i = 0;
for (auto const &tag : tags) {
tagBuilder.set(i,tag.key());
i++;
tagBuilder.set(i,tag.value());
i++;
}
}
================================================
FILE: python/.gitignore
================================================
build
dist
*.egg-info
================================================
FILE: python/README.md
================================================
A Python package to read OSM Express (.osmx) database files.
## Installation
```bash
pip install osmx
```
## Usage
[examples/read_way.py](examples/read_way.py) : Simple program: given a way ID, print the coordinates of its member nodes, its metadata and all the relations it directly belongs to.
[examples/web_server.py](examples/web_server.py) Uses only the Python standard library; starts an HTTP server that takes a url like /way/WAY_ID and returns a GeoJSON feature for that OSM object. Shows example of how to descend into relation members.
[examples/augmented_diff.py](examples/augmented_diff.py) Creates an [augmented diff](https://wiki.openstreetmap.org/wiki/Overpass_API/Augmented_Diffs) similar to those implemented by Overpass API, but limited to a single OsmChange (.osc) replication sequence file. Requires that the OSMX database represents the replication sequence state directly before that of the .OSC file.
================================================
FILE: python/examples/augmented_diff.py
================================================
from collections import namedtuple
from datetime import datetime
import copy
import sys
import xml.etree.ElementTree as ET
import xml.dom.minidom
import osmx
# generates an augmented diff for an OSC (OsmChange) file.
# see https://wiki.openstreetmap.org/wiki/Overpass_API/Augmented_Diffs
# this is intended to be run before the OSC file is applied to the osmx file.
if len(sys.argv) < 4:
print("Usage: augmented_diff.py OSMX_FILE OSC_FILE OUTPUT")
exit(1)
# 1st pass:
# populate the collection of actions
# create dictionary from osm_type/osm_id to action
# e.g. node/12345 > Node()
Action = namedtuple('Action',['type','element'])
actions = {}
osc = ET.parse(sys.argv[2]).getroot()
for block in osc:
for e in block:
action_key = e.tag + "/" + e.get("id")
# Always ensure we're updating to the latest version of an object for the diff
if action_key in actions:
newest_version = int(actions[action_key].element.get("version"))
e_version = int(e.get("version"))
if e_version < newest_version:
print("Found element {}, version {} is less than previously visited version {}"
.format(action_key, e_version, newest_version))
continue
actions[action_key] = Action(block.tag,e)
action_list = [v for k,v in actions.items()]
env = osmx.Environment(sys.argv[1])
with osmx.Transaction(env) as txn:
locations = osmx.Locations(txn)
nodes = osmx.Nodes(txn)
ways = osmx.Ways(txn)
relations = osmx.Relations(txn)
def not_in_db(elem):
elem_id = int(elem.get('id'))
if elem.tag == 'node':
return not locations.get(elem_id)
elif elem.tag == 'way':
return not ways.get(elem_id)
else:
return not relations.get(elem_id)
def get_lat_lon(ref, use_new):
if use_new and ('node/' + ref in actions):
node = actions['node/' + ref]
return (node.element.get('lon'),node.element.get('lat'))
else:
ll = locations.get(ref)
return (str(ll[1]),str(ll[0]))
def set_old_metadata(elem):
elem_id = int(elem.get('id'))
if elem.tag == 'node':
o = nodes.get(elem_id)
elif elem.tag == 'way':
o = ways.get(elem_id)
else:
o = relations.get(elem_id)
if o:
elem.set('version',str(o.metadata.version))
elem.set('user',str(o.metadata.user))
elem.set('uid',str(o.metadata.uid))
# convert to ISO8601 timestamp
timestamp = o.metadata.timestamp
formatted = datetime.utcfromtimestamp(timestamp).isoformat()
elem.set('timestamp',formatted + 'Z')
elem.set('changeset',str(o.metadata.changeset))
else:
# tagless nodes
try:
version = locations.get(elem_id)[2]
except TypeError:
# If loc is None here, it typically means that a node was created and
# then deleted within the diff interval. In the future we should
# remove these operations from the diff entirely.
print("No old loc found for tagless node {}".format(elem_id))
version = "?"
elem.set('version',str(version))
elem.set('user','?')
elem.set('uid','?')
elem.set('timestamp','?')
elem.set('changeset','?')
# 2nd pass
# create an XML tree of actions with old and new sub-elements
o = ET.Element('osm')
o.set("version","0.6")
o.set("generator","Overpass API not used, but achavi detects it at the start of string; OSMExpress/python/examples/augmented_diff.py")
for action in action_list:
a = ET.SubElement(o,'action')
a.set('type',action.type)
old = ET.SubElement(a,'old')
new = ET.SubElement(a,'new')
if action.type == 'create':
new.append(action.element)
elif action.type == 'delete':
# get the old metadata
modified = copy.deepcopy(action.element)
set_old_metadata(action.element)
old.append(action.element)
modified.set('visible','false')
for child in list(modified):
modified.remove(child)
# TODO the Geofabrik deleted elements seem to have the old metadata and old version numbers
# check if this is true of planet replication files
new.append(modified)
else:
obj_id = action.element.get('id')
if not_in_db(action.element):
# Typically occurs when:
# 1. TODO: An element is deleted but then restored later,
# which should remain a modify operation. This will be difficult
# because objects are not retained in OSMX when deleted in OSM.
# 2. OK: An element was created and then modified within the diff interval
print("Could not find {0} {1} in db, changing to create".format(action.element.tag,action.element.get('id')))
new.append(action.element)
a.set('type','create')
else:
prev_version = ET.SubElement(old,action.element.tag)
prev_version.set('id',obj_id)
set_old_metadata(prev_version)
if action.element.tag == 'node':
ll = get_lat_lon(obj_id,False)
prev_version.set('lon',ll[0])
prev_version.set('lat',ll[1])
elif action.element.tag == 'way':
way = ways.get(obj_id)
for n in way.nodes:
node = ET.SubElement(prev_version,'nd')
node.set('ref',str(n))
it = iter(way.tags)
for t in it:
tag = ET.SubElement(prev_version,'tag')
tag.set('k',t)
tag.set('v',next(it))
else:
relation = relations.get(obj_id)
for m in relation.members:
member = ET.SubElement(prev_version,'member')
member.set('ref',str(m.ref))
member.set('role',m.role)
member.set('type',str(m.type))
it = iter(relation.tags)
for t in it:
tag = ET.SubElement(prev_version,'tag')
tag.set('k',t)
tag.set('v',next(it))
new.append(action.element)
# 3rd pass
# Augment the created "old" and "new" elements
def augment_nd(nd,use_new):
ll = get_lat_lon(nd.get('ref'),use_new)
nd.set('lon',ll[0])
nd.set('lat',ll[1])
def augment_member(mem,use_new):
if mem.get('type') == 'way':
ref = mem.get('ref')
if use_new and ('way/' + ref in actions):
way = actions['way/' + ref]
for child in way.element:
if child.tag == 'nd':
ll = get_lat_lon(child.get('ref'),use_new)
nd = ET.SubElement(mem,'nd')
nd.set('lon',ll[0])
nd.set('lat',ll[1])
else:
for node_id in ways.get(ref).nodes:
ll = get_lat_lon(str(node_id),use_new)
nd = ET.SubElement(mem,'nd')
nd.set('lon',ll[0])
nd.set('lat',ll[1])
elif mem.get('type') == 'node':
ll = get_lat_lon(mem.get('ref'),use_new)
mem.set('lon',ll[0])
mem.set('lat',ll[1])
def augment(elem,use_new):
if len(elem) == 0:
return
if elem[0].tag == 'way':
for child in elem[0]:
if child.tag == 'nd':
augment_nd(child,use_new)
elif elem[0].tag == 'relation':
for child in elem[0]:
if child.tag == 'member':
augment_member(child,use_new)
for elem in o:
try:
augment(elem[0],False)
augment(elem[1],True)
except (TypeError, AttributeError):
print("Changed {0} {1} is incomplete in db".format(elem[1][0].tag, elem[1][0].get('id')))
# 4th pass:
# find changes that propagate to referencing elements:
# when a node's location changes, that propagates to any ways it belongs to, relations it belongs to
# and also any relations that the way belongs to
# when a way's member list changes, it propagates to any relations it belongs to
node_way = osmx.NodeWay(txn)
node_relation = osmx.NodeRelation(txn)
way_relation = osmx.WayRelation(txn)
affected_ways = set()
affected_relations = set()
for elem in o:
if elem.get('type') == 'modify':
if elem[0][0].tag == 'node':
old_loc = (elem[0][0].get('lat'),elem[0][0].get('lon'))
new_loc = (elem[1][0].get('lat'),elem[1][0].get('lon'))
if old_loc != new_loc:
node_id = elem[0][0].get('id')
for rel in node_relation.get(node_id):
if 'relation/' + str(rel) not in actions:
affected_relations.add(rel)
for way in node_way.get(node_id):
if 'way/' + str(way) not in actions:
affected_ways.add(way)
for rel in way_relation.get(way):
if 'relation/' + str(rel) not in actions:
affected_relations.add(rel)
elif elem[0][0].tag == 'way':
old_way = [nd.get('ref') for nd in elem[0][0] if nd.tag == 'nd']
new_way = [nd.get('ref') for nd in elem[1][0] if nd.tag == 'nd']
if old_way != new_way:
way_id = elem[0][0].get('id')
for rel in way_relation.get(way_id):
if 'relation/' + str(rel) not in actions:
affected_relations.add(rel)
for w in affected_ways:
a = ET.SubElement(o,'action')
a.set('type','modify')
old = ET.SubElement(a,'old')
way_element = ET.SubElement(old,'way')
way_element.set('id',str(w))
set_old_metadata(way_element)
way = ways.get(w)
for n in way.nodes:
node = ET.SubElement(way_element,'nd')
node.set('ref',str(n))
it = iter(way.tags)
for t in it:
tag = ET.SubElement(way_element,'tag')
tag.set('k',t)
tag.set('v',next(it))
new = ET.SubElement(a,'new')
new_elem = copy.deepcopy(way_element)
new.append(new_elem)
augment(old,False)
augment(new,True)
for r in affected_relations:
old = ET.Element('old')
relation_element = ET.SubElement(old,'relation')
relation_element.set('id',str(r))
set_old_metadata(relation_element)
relation = relations.get(r)
for m in relation.members:
member = ET.SubElement(relation_element,'member')
member.set('ref',str(m.ref))
member.set('role',m.role)
member.set('type',str(m.type))
it = iter(relation.tags)
for t in it:
tag = ET.SubElement(relation_element,'tag')
tag.set('k',t)
tag.set('v',next(it))
new_elem = copy.deepcopy(relation_element)
new = ET.Element('new')
new.append(new_elem)
try:
augment(old,False)
augment(new,True)
a = ET.SubElement(o,'action')
a.set('type','modify')
a.append(old)
a.append(new)
except (TypeError, AttributeError):
print("Affected relation {0} is incomplete in db".format(r))
# 5th pass: add bounding boxes
class Bounds:
def __init__(self):
self.minx = 180
self.maxx = -180
self.miny = 90
self.maxy = -90
def add(self,x,y):
if x < self.minx:
self.minx = x
if x > self.maxx:
self.maxx = x
if y < self.miny:
self.miny = y
if y > self.maxy:
self.maxy = y
def elem(self):
e = ET.Element('bounds')
e.set('minlat',str(self.miny))
e.set('minlon',str(self.minx))
e.set('maxlat',str(self.maxy))
e.set('maxlon',str(self.maxx))
return e
for child in o:
if len(child[0]) > 0:
osm_obj = child[0][0]
nds = osm_obj.findall('.//nd')
if nds:
bounds = Bounds()
for nd in nds:
bounds.add(float(nd.get('lon')),float(nd.get('lat')))
osm_obj.insert(0,bounds.elem())
# 6th pass
# sort by node, way, relation
# within each, sorted by increasing ID
def sort_by_type(x):
if x[1][0].tag == 'node':
return 1
elif x[1][0].tag == 'way':
return 2
return 3
o[:] = sorted(o, key=lambda x:int(x[1][0].get('id')))
o[:] = sorted(o, key=sort_by_type)
note = ET.Element('note')
note.text = "The data included in this document is from www.openstreetmap.org. The data is made available under ODbL."
o.insert(0,note)
# pretty print helper
# http://effbot.org/zone/element-lib.htm#prettyprint
def indent(elem, level=0):
i = "\n" + level*" "
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
indent(elem, level+1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
indent(o)
ET.ElementTree(o).write(sys.argv[3])
================================================
FILE: python/examples/read_way.py
================================================
import sys
import osmx
if len(sys.argv) <= 1:
print("Usage: read_way.py OSMX_FILE WAY_ID")
exit(1)
env = osmx.Environment(sys.argv[1])
with osmx.Transaction(env) as txn:
locations = osmx.Locations(txn)
nodes = osmx.Nodes(txn)
ways = osmx.Ways(txn)
way_relation = osmx.WayRelation(txn)
way_id = sys.argv[2]
way = ways.get(way_id)
for node_id in way.nodes:
print(locations.get(node_id))
print(osmx.tag_dict(way.tags))
print(way.metadata)
print(way_relation.get(way_id))
================================================
FILE: python/examples/web_server.py
================================================
import json
import sys
from http.server import BaseHTTPRequestHandler, HTTPServer
import osmx
if len(sys.argv) <= 1:
print("Usage: web_server.py OSMX_FILE")
env = osmx.Environment(sys.argv[1])
# simple implementation of OSM GeoJSON API using osmx + Python standard library.
# not production ready!
class Handler(BaseHTTPRequestHandler):
def do_GET(self):
parts = self.path.split("/")
if len(parts) < 3:
self.send_response(400)
self.wfile.write("bad request".encode('utf-8'))
return
self.send_response(200)
self.send_header('Content-type','application/json')
self.end_headers()
osm_id = parts[2]
resp = {'type':'Feature','properties':{}}
with osmx.Transaction(env) as txn:
locations = osmx.Locations(txn)
def coord(node_id):
loc = locations.get(node_id)
return (loc[1],loc[0])
nodes = osmx.Nodes(txn)
if parts[1] == "node":
node = nodes.get(osm_id)
if node:
for k,v in osmx.tag_dict(node.tags).items():
resp['properties'][k] = v
resp['geometry'] = {'type':'Point','coordinates':coord(osm_id)}
elif parts[1] == "way":
ways = osmx.Ways(txn)
way = ways.get(osm_id)
for k,v in osmx.tag_dict(way.tags).items():
resp['properties'][k] = v
coords = [coord(node_id) for node_id in way.nodes]
resp['geometry'] = {'type':'LineString','coordinates':coords}
elif parts[1] == "relation":
ways = osmx.Ways(txn)
relations = osmx.Relations(txn)
relation = relations.get(osm_id)
for k,v in osmx.tag_dict(relation.tags).items():
resp['properties'][k] = v
geometries = []
def add_relation_geoms(r):
for member in r.members:
if member.type == 'node':
geometries.append({'type':'Point','coordinates':locations.get(member.ref)})
if member.type == 'way':
way = ways.get(member.ref)
coords = [coord(node_id) for node_id in way.nodes]
geometries.append({'type':'LineString','coordinates':coords})
if member.type == 'relation':
add_relation_geoms(relations.get(member.ref))
add_relation_geoms(relation)
resp['geometry'] = {'type':'GeometryCollection','geometries':geometries}
self.wfile.write(json.dumps(resp).encode('utf-8'))
print('Server listening on port 8000...')
httpd = HTTPServer(('', 8000), Handler)
httpd.serve_forever()
================================================
FILE: python/osmx/__init__.py
================================================
from .osmx import *
================================================
FILE: python/osmx/messages.capnp
================================================
@0xd3a7e843a9c03421;
struct Metadata {
version @0 :UInt32;
timestamp @1 :UInt64;
changeset @2 :UInt32;
uid @3 :UInt32;
user @4 :Text;
}
struct Node {
tags @0 :List(Text);
metadata @1 :Metadata;
}
struct Way {
nodes @0 :List(UInt64);
tags @1 :List(Text);
metadata @2 :Metadata;
}
struct RelationMember {
ref @0 :UInt64;
type @1 :Type;
role @2 :Text;
enum Type {
node @0;
way @1;
relation @2;
}
}
struct Relation {
tags @0 :List(Text);
members @1 :List(RelationMember);
metadata @2 :Metadata;
}
================================================
FILE: python/osmx/osmx.py
================================================
import sys
import os
import lmdb
import capnp
capnp.remove_import_hook()
messages_capnp = capnp.load(os.path.join(os.path.dirname(__file__), 'messages.capnp'))
def tag_dict(tag_list):
it = enumerate(tag_list)
d = {}
for x in it:
d[x[1]] = next(it)[1]
return d
class Environment:
def __init__(self,fname):
self._handle = lmdb.Environment(fname,max_dbs=10,readonly=True,readahead=False,subdir=False)
class Transaction:
def __init__(self,env):
self.env = env
self._handle = lmdb.Transaction(self.env._handle, buffers=True)
def __enter__(self,*args,**kwargs):
self._handle.__enter__(*args,**kwargs)
return self
def __exit__(self,*args,**kwargs):
self._handle.__exit__(*args,**kwargs)
class Index:
def __init__(self):
pass
class Index:
def __init__(self,txn,name):
self.txn = txn
self._handle = txn.env._handle.open_db(name,txn=txn._handle,integerkey=True,create=False,dupsort=True,integerdup=True,dupfixed=True)
def get(self,obj_id):
cursor = self.txn._handle.cursor(self._handle)
cursor.set_key(int(obj_id).to_bytes(8,byteorder=sys.byteorder))
retval = [int.from_bytes(data,byteorder=sys.byteorder,signed=False) for data in cursor.iternext_dup()]
cursor.close()
return retval
class Table:
def __init__(self,txn,name):
self.txn = txn
self._handle = txn.env._handle.open_db(name,txn=txn._handle,integerkey=True,create=False)
def _get_bytes(self,elem_id):
return self.txn._handle.get(int(elem_id).to_bytes(8,byteorder=sys.byteorder),db=self._handle)
class Locations(Table):
def __init__(self,txn):
super().__init__(txn,b'locations')
def get(self,node_id):
msg = self._get_bytes(node_id)
if not msg:
return None
return (
int.from_bytes(msg[4:8],byteorder=sys.byteorder,signed=True) / 10000000,
int.from_bytes(msg[0:4],byteorder=sys.byteorder,signed=True) / 10000000,
int.from_bytes(msg[8:12],byteorder=sys.byteorder,signed=False)
)
class Nodes(Table):
def __init__(self,txn):
super().__init__(txn,b'nodes')
def get(self,node_id):
msg = self._get_bytes(node_id)
if not msg:
return None
return messages_capnp.Node.from_bytes(msg)
class Ways(Table):
def __init__(self,txn):
super().__init__(txn,b'ways')
def get(self,way_id):
msg = self._get_bytes(way_id)
if not msg:
return None
return messages_capnp.Way.from_bytes(msg)
class Relations(Table):
def __init__(self,txn):
super().__init__(txn,b'relations')
def get(self,relation_id):
msg = self._get_bytes(relation_id)
if not msg:
return None
return messages_capnp.Relation.from_bytes(msg)
class NodeWay(Index):
def __init__(self,txn):
super().__init__(txn,b'node_way')
class NodeRelation(Index):
def __init__(self,txn):
super().__init__(txn,b'node_relation')
class WayRelation(Index):
def __init__(self,txn):
super().__init__(txn,b'way_relation')
class RelationRelation(Index):
def __init__(self,txn):
super().__init__(txn,b'relation_relation')
================================================
FILE: python/setup.py
================================================
import setuptools
with open("README.md", "r") as fh:
long_description = fh.read()
requirements = [
'lmdb~=1.4.1',
'pycapnp~=2.0.0',
]
setuptools.setup(
name='osmx',
version='0.0.5',
author="Brandon Liu",
author_email='brandon@protomaps.com',
description='Read OSM Express (.osmx) database files.',
license="BSD-2-Clause",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/bdon/OSMExpress",
packages=setuptools.find_packages(),
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: BSD License",
"Operating System :: OS Independent",
],
install_requires = requirements,
requires_python='>=3.0',
package_data={'osmx':['messages.capnp']}
)
================================================
FILE: src/cmd.cpp
================================================
#include <vector>
#include "osmx/storage.h"
#include "osmx/cmd.h"
#include "osmx/util.h"
using namespace std;
using namespace osmx;
void printHelp() {
cout << "Usage: osmx COMMAND [ARG...]" << endl << endl;
cout << "COMMANDS:" << endl;
cout << " expand Convert an OSM PBF or XML to an osmx database." << endl;
cout << " extract Create a regional extract PBF from an osmx database." << endl;
cout << " update Apply an OSM changeset to an osmx database." << endl;
cout << " query Look up objects by ID in an osmx database." << endl;
exit(1);
}
void printQueryHelp() {
cout << "USAGE:" << endl;
cout << " osmx query OSMX_FILE [OPTIONS]" << endl << endl;
cout << "EXAMPLES:" << endl;
cout << " osmx query planet.osmx" << endl;
cout << " osmx query planet.osmx way 123456" << endl << endl;
cout << "OPTIONS:" << endl;
cout << " none specified: print table statistics." << endl;
cout << " [node,way,relation] ID: print OSM object" << endl;
cout << " timestamp: print data timestamp" << endl;
cout << " seqnum: print replication seqence number" << endl;
exit(1);
}
int main(int argc, char* argv[]) {
vector<string> args(argv, argv+argc);
auto db_cmds = {"stat","node","way","relation"};
if (argc < 2) {
printHelp();
}
if (args[1] == "expand") {
cmdExpand(argc,argv);
} else if (args[1] == "extract") {
cmdExtract(argc,argv);
} else if (args[1] == "update") {
cmdUpdate(argc,argv);
} else if (args[1] == "query") {
if (args.size() == 2) {
printQueryHelp();
}
MDB_env* env = db::createEnv(args[2]);
MDB_txn* txn;
CHECK_LMDB(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));
if (args.size() >= 4) {
if (args[3] == "node") {
auto id = stol(args[4]);
auto location = db::Locations(txn).get(id);
cout << location.coords << endl;
auto tags = db::Elements(txn,"nodes").getReader(id).getRoot<Node>().getTags();
for (int i = 0; i < tags.size() / 2; i++) {
cout << tags[i*2].cStr() << "=" << tags[i*2+1].cStr() << "\n";
}
} else if (args[3] == "way") {
db::Elements ways(txn,"ways");
auto message = ways.getReader(stol(args[4]));
auto way = message.getRoot<Way>();
for (auto node_id : way.getNodes()) {
cout << node_id << " ";
}
cout << endl;
auto tags = way.getTags();
for (int i = 0; i < tags.size() / 2; i++) {
cout << tags[i*2].cStr() << "=" << tags[i*2+1].cStr() << " ";
}
cout << endl;
} else if (args[3] == "relation") {
db::Elements relations(txn,"relations");
uint64_t relation_id = stol(args[4]);
auto message = relations.getReader(relation_id);
auto relation = message.getRoot<Relation>();
auto tags = relation.getTags();
for (int i = 0; i < tags.size() / 2; i++) {
cout << tags[i*2].cStr() << "=" << tags[i*2+1].cStr() << " ";
}
auto members = relation.getMembers();
for (auto const &member : members) {
cout << member.getRef() << endl;
}
} else if (args[3] == "timestamp") {
db::Metadata metadata(txn);
cout << metadata.get("osmosis_replication_timestamp") << endl;
} else if (args[3] == "seqnum") {
db::Metadata metadata(txn);
cout << metadata.get("osmosis_replication_sequence_number") << endl;
} else {
printQueryHelp();
}
} else {
auto tables = {"locations","nodes","ways","relations","cell_node","node_way","node_relation","way_relation","relation_relation"};
for (auto const &table : tables) {
MDB_dbi dbi;
CHECK_LMDB(mdb_dbi_open(txn, table, MDB_INTEGERKEY, &dbi));
MDB_stat stat;
CHECK_LMDB(mdb_stat(txn,dbi,&stat));
cout << table << ": " << stat.ms_entries << endl;
}
db::Metadata metadata(txn);
cout << "Timestamp: " << metadata.get("osmosis_replication_timestamp") << endl;
cout << "Sequence #: " << metadata.get("osmosis_replication_sequence_number") << endl;
}
mdb_env_sync(env,true);
mdb_env_close(env);
} else {
printHelp();
}
}
================================================
FILE: src/expand.cpp
================================================
#include <iomanip>
#include <fstream>
#include "osmium/handler.hpp"
#include "osmium/visitor.hpp"
#include "osmium/io/any_input.hpp"
#include "osmium/util/progress_bar.hpp"
#include "osmium/io/reader_with_progress_bar.hpp"
#include "cxxopts.hpp"
#include "kj/io.h"
#include "capnp/message.h"
#include "capnp/serialize.h"
#include "s2/s2latlng.h"
#include "s2/s2cell_id.h"
#include "osmx/storage.h"
#include "osmx/util.h"
#include "osmx/messages.capnp.h"
using namespace std;
using namespace osmx;
typedef std::pair<uint64_t, uint64_t> Pair;
typedef std::pair<Pair, uint64_t> pqelem;
class SortReader {
public:
SortReader(std::string filename) : mStream(filename, std::ios::in | std::ios::binary) { }
bool getNext() {
mStream.read((char *)&entry,sizeof(uint64_t) *2);
if (mStream.eof()) return false;
return true;
}
Pair entry;
private:
std::ifstream mStream;
};
class Sorter {
int MAX_RUN_SIZE = 64000000; // about 1 GB
public:
Sorter(std::string tempDir,std::string name) : mTempDir(tempDir), mName(name) {
mStorage.reserve(MAX_RUN_SIZE);
}
void put(uint64_t from, uint64_t to) {
mStorage.push_back(std::make_pair(from,to));
if (mStorage.size() > MAX_RUN_SIZE) persist();
}
void put(S2CellId from, uint64_t to) {
put(from.id(),to);
}
void persist() {
if (mStorage.size() == 0) return;
sort(mStorage.begin(),mStorage.end());
int runNumber = mSavedRuns.size();
std::ofstream stream;
std::stringstream fname;
fname << mTempDir << "/" << std::setw(2) << std::setfill('0') << mName << "_" << std::setw(3) << std::setfill('0') << runNumber << ".run";
stream.open(fname.str(),std::ios::binary);
for (auto const &entry: mStorage) {
stream.write((char *)&entry.first,sizeof(uint64_t));
stream.write((char *)&entry.second,sizeof(uint64_t));
}
stream.close();
mStorage.clear();
mStorage.reserve(MAX_RUN_SIZE);
mSavedRuns.push_back(fname.str());
}
void writeDb(MDB_env *env) {
persist();
Timer timer("External sort " + mName);
osmium::ProgressBar progress{MAX_RUN_SIZE * mSavedRuns.size(), osmium::isatty(2)};
int read = 0;
std::priority_queue<pqelem, std::vector<pqelem>, std::greater<pqelem>> q;
std::vector<SortReader> readers;
db::IndexWriter index(env,mName);
for (int i = 0; i < mSavedRuns.size(); i++) {
readers.emplace_back(mSavedRuns[i]);
if (readers[i].getNext()) q.push(make_pair(readers[i].entry, i));
}
Pair last;
while (q.size() > 0) {
pqelem pair = q.top();
auto idx = pair.second;
if (pair.first != last) {
if (pair.first.first != last.first) index.put(pair.first.first,pair.first.second,MDB_APPEND);
else index.put(pair.first.first,pair.first.second,MDB_APPENDDUP);
}
q.pop();
if (readers[idx].getNext()) q.push(make_pair(readers[idx].entry, idx));
progress.update(read++);
last = pair.first;
}
index.commit();
progress.done();
for (auto const &run : mSavedRuns) {
remove(run.c_str());
}
}
private:
Sorter( const Sorter& ) = delete;
Sorter& operator=( const Sorter& ) = delete;
std::vector<std::pair<uint64_t,uint64_t>> mStorage;
int mRunNumber = 0;
std::vector<std::string> mSavedRuns;
std::string mTempDir;
std::string mName;
};
class Handler: public osmium::handler::Handler {
public:
Handler(MDB_env *env, MDB_txn *txn,string tempDir) :
mEnv(env),
mTxn(txn),
mCellNode(tempDir,"cell_node"),
mLocations(txn),
mNodes(txn,"nodes"),
mWays(txn,"ways"),
mRelations(txn,"relations"),
mNodeWay(tempDir,"node_way"),
mNodeRelation(tempDir,"node_relation"),
mWayRelation(tempDir,"way_relation"),
mRelationRelation(tempDir,"relation_relation")
{
}
~Handler() {
CHECK_LMDB(mdb_txn_commit(mTxn));
mCellNode.writeDb(mEnv);
mNodeWay.writeDb(mEnv);
mNodeRelation.writeDb(mEnv);
mWayRelation.writeDb(mEnv);
mRelationRelation.writeDb(mEnv);
}
void node(const osmium::Node& node) {
mLocations.put(node.id(), db::Location{node.location(),(int32_t)node.version()},MDB_APPEND);
auto loc = node.location();
auto ll = S2LatLng::FromDegrees(loc.lat(),loc.lon());
auto cell = S2CellId(ll).parent(CELL_INDEX_LEVEL);
mCellNode.put(cell,node.id());
if (node.tags().size() > 0) {
::capnp::MallocMessageBuilder message;
Node::Builder nodeMsg = message.initRoot<Node>();
setTags<Node::Builder>(node.tags(),nodeMsg);
auto metadata = nodeMsg.initMetadata();
metadata.setVersion(node.version());
metadata.setTimestamp(node.timestamp().seconds_since_epoch());
metadata.setChangeset(node.changeset());
metadata.setUid(node.uid());
metadata.setUser(node.user());
kj::VectorOutputStream output;
capnp::writeMessage(output,message);
mNodes.put(node.id(),output,MDB_APPEND);
}
}
void way(const osmium::Way& way) {
auto const &nodes = way.nodes();
::capnp::MallocMessageBuilder message;
Way::Builder wayMsg = message.initRoot<Way>();
wayMsg.initNodes(nodes.size());
int i = 0;
for (int i = 0; i < nodes.size(); i++) {
wayMsg.getNodes().set(i,nodes[i].ref());
mNodeWay.put(nodes[i].ref(),way.id());
}
setTags<Way::Builder>(way.tags(),wayMsg);
auto metadata = wayMsg.initMetadata();
metadata.setVersion(way.version());
metadata.setTimestamp(way.timestamp().seconds_since_epoch());
metadata.setChangeset(way.changeset());
metadata.setUid(way.uid());
metadata.setUser(way.user());
kj::VectorOutputStream output;
capnp::writeMessage(output,message);
mWays.put(way.id(),output,MDB_APPEND);
}
void relation(const osmium::Relation& relation) {
::capnp::MallocMessageBuilder message;
Relation::Builder relationMsg = message.initRoot<Relation>();
setTags<Relation::Builder>(relation.tags(),relationMsg);
auto members = relationMsg.initMembers(relation.members().size());
int i = 0;
for (auto const &member : relation.members()) {
members[i].setRef(member.ref());
members[i].setRole(member.role());
if (member.type() == osmium::item_type::node) {
members[i].setType(RelationMember::Type::NODE);
mNodeRelation.put(member.ref(),relation.id());
}
else if (member.type() == osmium::item_type::way) {
members[i].setType(RelationMember::Type::WAY);
mWayRelation.put(member.ref(),relation.id());
}
else if (member.type() == osmium::item_type::relation) {
members[i].setType(RelationMember::Type::RELATION);
mRelationRelation.put(member.ref(),relation.id());
}
i++;
}
auto metadata = relationMsg.initMetadata();
metadata.setVersion(relation.version());
metadata.setTimestamp(relation.timestamp().seconds_since_epoch());
metadata.setChangeset(relation.changeset());
metadata.setUid(relation.uid());
metadata.setUser(relation.user());
kj::VectorOutputStream output;
capnp::writeMessage(output,message);
mRelations.put(relation.id(),output,MDB_APPEND);
}
private:
MDB_env* mEnv;
MDB_txn* mTxn;
Sorter mCellNode;
db::Locations mLocations;
db::Elements mNodes;
db::Elements mWays;
db::Elements mRelations;
Sorter mNodeWay;
Sorter mNodeRelation;
Sorter mWayRelation;
Sorter mRelationRelation;
};
void cmdExpand(int argc, char* argv[]) {
cxxopts::Options options("Expand", "Expand a a .osm.pbf into an .osmx file");
options.add_options()
("v,verbose", "Verbose output")
("cmd", "Command to run", cxxopts::value<string>())
("input", "Input .pbf", cxxopts::value<string>())
("output", "Output .osmx", cxxopts::value<string>())
;
options.parse_positional({"cmd","input", "output"});
auto result = options.parse(argc, argv);
if (result.count("input") == 0 || result.count("output") == 0) {
cout << "Usage: osmx expand OSM_FILE OSMX_FILE [OPTIONS]" << endl << endl;
cout << "OSM_FILE must be an OSM XML or PBF." << endl << endl;
cout << "EXAMPLE:" << endl;
cout << " osmx expand planet_latest.osm.pbf planet.osmx" << endl << endl;
cout << "OPTIONS:" << endl;
cout << " --v,--verbose: verbose output." << endl;
exit(1);
}
string input =result["input"].as<string>();
string output = result["output"].as<string>();
Timer timer("convert");
MDB_env* env = db::createEnv(output,true);
MDB_txn* txn;
CHECK_LMDB(mdb_txn_begin(env, NULL, 0, &txn));
const osmium::io::File input_file{input};
osmium::io::ReaderWithProgressBar reader{true, input_file, osmium::osm_entity_bits::object};
db::Metadata metadata(txn);
auto header = reader.header();
for (auto option : header) {
cout << option.first << " " << option.second << endl;
}
cout << "Box: " << header.box() << endl;
cout << "Timestamp: " << header.get("osmosis_replication_timestamp") << endl;
cout << "Sequence#: " << header.get("osmosis_replication_sequence_number") << endl;
metadata.put("osmosis_replication_timestamp",header.get("osmosis_replication_timestamp"));
metadata.put("osmosis_replication_sequence_number",header.get("osmosis_replication_sequence_number"));
metadata.put("import_filename",input);
string tempDir = output + "-temp";
assert(mkdir(tempDir.c_str(),S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) == 0);
{
Timer insert("insert");
Handler handler(env,txn,tempDir);
osmium::apply(reader, handler);
}
assert(rmdir(tempDir.c_str()) == 0);
}
================================================
FILE: src/extract.cpp
================================================
#include <string>
#include <fstream>
#include "s2/s2latlng.h"
#include "s2/s2region_coverer.h"
#include "s2/s2latlng_rect.h"
#include "osmium/io/any_output.hpp"
#include "osmium/util/progress_bar.hpp"
#include "osmium/memory/callback_buffer.hpp"
#include "osmium/builder/attr.hpp"
#include "osmium/builder/osm_object_builder.hpp"
#include "cxxopts.hpp"
#include "nlohmann/json.hpp"
#include "osmx/storage.h"
#include "osmx/region.h"
#include "osmx/util.h"
using namespace std;
using namespace osmx;
struct ExportProgress {
string timestamp = "";
uint64_t cells_total = 0;
uint64_t cells_prog = 0;
uint64_t nodes_total = 0;
uint64_t nodes_prog = 0;
uint64_t elems_total = 0;
uint64_t elems_prog = 0;
void print() {
cout << "{\"Timestamp\":\"" << timestamp << "\",\"CellsTotal\":" << cells_total << ",\"CellsProg\":" << cells_prog << ",\"NodesTotal\":" << nodes_total << ",\"NodesProg\":" << nodes_prog << ",\"ElemsTotal\":" << elems_total << ",\"ElemsProg\":" << elems_prog << "}" << endl;
}
};
class ProgressSection {
public:
ProgressSection(ExportProgress &expprog, uint64_t &total, uint64_t &prog, uint64_t total_to_set, bool jsonOutput) : expprog(expprog), total(total), prog(prog), progressbar(total_to_set, osmium::isatty(2) && !jsonOutput), jsonOutput(jsonOutput) {
total = total_to_set;
}
~ProgressSection() {
progressbar.done();
}
void tick() {
prog++;
if (prog - last_prog > (total / 100)) {
if (jsonOutput) expprog.print();
else progressbar.update(prog);
last_prog = prog;
}
}
private:
osmium::ProgressBar progressbar;
uint64_t& prog;
uint64_t& total;
bool jsonOutput;
ExportProgress &expprog;
uint64_t last_prog = 0;
};
static bool endsWith(const std::string& str, const std::string& suffix)
{
return str.size() >= suffix.size() && 0 == str.compare(str.size()-suffix.size(), suffix.size(), suffix);
}
// must be --bbox, --disc, --poly or --json
// or --region
void cmdExtract(int argc, char * argv[]) {
cxxopts::Options cmd_options("Extract", "Create an .osm.pbf from an .osmx file.");
cmd_options.add_options()
("v,verbose", "Verbose output")
("noUserData", "Don't include changeset,uid,user fields (GDPR compliance)")
("jsonOutput", "JSON progress output")
("cmd", "Command to run", cxxopts::value<string>())
("osmx", "Input .osmx", cxxopts::value<string>())
("output", "Output file, pbf or xml", cxxopts::value<string>())
("bbox", "rectangle in minLat,minLon,maxLat,maxLon", cxxopts::value<string>())
("disc", "disc in centerLat,centerLon,radiusDegrees", cxxopts::value<string>())
("geojson","geoJson of region", cxxopts::value<string>())
("poly","osmosis .poly of region", cxxopts::value<string>())
("region","file for region with extension .bbox, .disc, .json or .poly", cxxopts::value<string>())
("expand","buffer at this cell level",cxxopts::value<int>())
;
cmd_options.parse_positional({"cmd","osmx","output"});
auto result = cmd_options.parse(argc, argv);
if (result.count("osmx") == 0 || result.count("output") == 0) {
cout << "Usage: osmx extract OSMX_FILE OUTPUT_FILE [OPTIONS]" << endl << endl;
cout << "EXAMPLE:" << endl;
cout << " osmx extract planet.osmx extract.osm.pbf --region region.json" << endl << endl;
cout << "OPTIONS:" << endl;
cout << " --v,--verbose: verbose output." << endl;
cout << " --jsonOutput: log progress as JSON messages." << endl;
cout << " --bbox MIN_LAT,MIN_LON,MAX_LAT,MAX_LON: region is lat/lon bbox" << endl;
cout << " --disc CENTER_LAT,CENTER_LON,R_DEGREES: region is disc" << endl;
cout << " --geojson GEOJSON: region is an areal GeoJSON feature or geometry" << endl;
cout << " --poly POLY: region is an Osmosis polygon" << endl;
cout << " --region FILE: text file with .bbox, .disc, .json or .poly extension" << endl;
cout << " --expand CELL_LEVEL: buffer region with cells at this level, <= 16" << endl;
exit(1);
}
auto startTime = std::chrono::high_resolution_clock::now();
ExportProgress prog;
string err;
bool jsonOutput = result.count("jsonOutput") > 0;
if (jsonOutput) prog.print();
bool includeUserData = result.count("noUserData") == 0;
std::unique_ptr<Region> region;
if (result.count("bbox")) region = std::make_unique<Region>(result["bbox"].as<string>(),"bbox");
else if (result.count("disc")) region = std::make_unique<Region>(result["disc"].as<string>(),"disc");
else if (result.count("geojson")) region = std::make_unique<Region>(result["geojson"].as<string>(),"geojson");
else if (result.count("poly")) region = std::make_unique<Region>(result["poly"].as<string>(),"poly");
else if (result.count("region")) {
auto fname = result["region"].as<string>();
std::ifstream t(fname);
std::stringstream buffer;
buffer << t.rdbuf();
if (endsWith(fname,"bbox")) region = std::make_unique<Region>(buffer.str(),"bbox");
if (endsWith(fname,"disc")) region = std::make_unique<Region>(buffer.str(),"disc");
if (endsWith(fname,"json")) region = std::make_unique<Region>(buffer.str(),"geojson");
if (endsWith(fname,"poly")) region = std::make_unique<Region>(buffer.str(),"poly");
} else {
cout << "No region specified." << endl;
exit(0);
}
S2RegionCoverer::Options options;
options.set_max_cells(1024);
options.set_max_level(CELL_INDEX_LEVEL);
S2RegionCoverer coverer(options);
S2CellUnion covering = region->GetCovering(coverer);
if (result.count("expand")) {
int expand = result["expand"].as<int>();
if (expand >= 0 && expand <= 16) {
covering.Expand(expand);
}
}
if (!jsonOutput) {
cout << "Query cells: " << covering.cell_ids().size() << endl;
}
roaring::Roaring64Map node_ids;
roaring::Roaring64Map way_ids;
roaring::Roaring64Map relation_ids;
MDB_env* env = db::createEnv(result["osmx"].as<string>(),false);
MDB_txn* txn;
CHECK_LMDB(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));
db::Metadata metadata(txn);
auto timestamp = metadata.get("osmosis_replication_timestamp");
prog.timestamp = timestamp;
if (!jsonOutput) {
cout << "Snapshot timestamp is " << prog.timestamp << endl;
}
{
ProgressSection section(prog,prog.cells_total,prog.cells_prog,covering.size(),jsonOutput);
MDB_dbi dbi;
MDB_cursor *cursor;
CHECK_LMDB(mdb_dbi_open(txn, "cell_node", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi));
CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor));
for (auto cell_id : covering.cell_ids()) {
db::traverseCell(cursor,cell_id,node_ids);
section.tick();
}
mdb_cursor_close(cursor);
}
{
ProgressSection section(prog,prog.nodes_total,prog.nodes_prog,node_ids.cardinality(),jsonOutput);
MDB_dbi dbi;
MDB_cursor *cursor;
CHECK_LMDB(mdb_dbi_open(txn, "node_way", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi));
CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor));
for (auto const &node_id : node_ids) {
db::traverseReverse(cursor,node_id,way_ids);
section.tick();
}
}
// find all Relations that these nodes or Ways are a member of.
{
MDB_dbi dbi;
MDB_cursor *cursor;
CHECK_LMDB(mdb_dbi_open(txn, "node_relation", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi));
CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor));
for (auto const &node_id : node_ids) {
db::traverseReverse(cursor,node_id,relation_ids);
}
}
{
MDB_dbi dbi;
MDB_cursor *cursor;
CHECK_LMDB(mdb_dbi_open(txn, "way_relation", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi));
CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor));
for (auto const &way_id : way_ids) {
db::traverseReverse(cursor,way_id,relation_ids);
}
}
{
MDB_dbi dbi;
MDB_cursor *cursor;
CHECK_LMDB(mdb_dbi_open(txn, "relation_relation", MDB_INTEGERKEY | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &dbi));
CHECK_LMDB(mdb_cursor_open(txn,dbi,&cursor));
roaring::Roaring64Map discovered_relations;
roaring::Roaring64Map discovered_relations_2;
for (auto const &relation_id : relation_ids) {
db::traverseReverse(cursor,relation_id,discovered_relations);
}
relation_ids |= discovered_relations;
while(true) {
for (auto const &relation_id : discovered_relations) {
db::traverseReverse(cursor,relation_id,discovered_relations_2);
}
int num_discovered = 0;
for (auto discovered_relation_id : discovered_relations_2) {
if (relation_ids.addChecked(discovered_relation_id)) num_discovered++;
}
if (num_discovered == 0) break;
discovered_relations = discovered_relations_2;
discovered_relations_2.clear();
}
}
if (!jsonOutput) cout << "Relations: " << relation_ids.cardinality() << endl;
db::Elements ways(txn,"ways");
db::Elements relations(txn,"relations");
// make it Multipolygon-complete: go through all Relations, finding any that have tag type=multipolygon, and add to Ways
for (auto relation_id : relation_ids) {
auto reader = relations.getReader(relation_id);
Relation::Reader relation = reader.getRoot<Relation>();
auto tags = relation.getTags();
for (int i = 0; i < tags.size() / 2; i++) {
if (tags[i*2] == "type" && tags[i*2+1] == "multipolygon") {
for (auto const &member : relation.getMembers()) {
if (member.getType() == RelationMember::Type::WAY) {
auto ref = member.getRef();
// check if the way exists, because this may be an extract
if (ways.exists(ref)) way_ids.add(member.getRef());
}
}
}
}
}
if (!jsonOutput) cout << "Ways: " << way_ids.cardinality() << endl;
// make it Way-complete: go through all Ways and add in any missing Nodes.
{
for (auto way_id : way_ids) {
auto reader = ways.getReader(way_id);
Way::Reader way = reader.getRoot<Way>();
for (auto node_id : way.getNodes()) {
node_ids.add(node_id);
}
}
}
if (!jsonOutput) cout << "Nodes: " << node_ids.cardinality() << endl;
// start Write
osmium::io::Header header;
header.set("generator", "osmx");
header.set("timestamp", timestamp);
header.set("osmosis_replication_timestamp", timestamp);
auto bounds = region->GetBounds();
// the box header is used by some applications,
// for example: zooming to an overview in QGIS.
// however, osmium only supports writing one PBF box header and it must be in the -180 to 180 lng, -90 to 90 lat range.
// valid input regions can cross the antimeridian, but the output header box is omitted as it can't represent the input.
if (bounds.lng_lo().degrees() < bounds.lng_hi().degrees()) {
header.add_box(osmium::Box(bounds.lng_lo().degrees(),bounds.lat_lo().degrees(),bounds.lng_hi().degrees(),bounds.lat_hi().degrees()));
}
osmium::io::Writer writer{result["output"].as<string>(), header, osmium::io::overwrite::allow};
osmium::memory::CallbackBuffer cb;
cb.set_callback([&](osmium::memory::Buffer&& buffer) {
writer(std::move(buffer));
});
{
ProgressSection section(prog,prog.elems_total,prog.elems_prog,node_ids.cardinality() + way_ids.cardinality() + relation_ids.cardinality(),jsonOutput);
{
db::Locations location_index(txn);
db::Elements nodes_table(txn,"nodes");
for (auto node_id : node_ids) {
section.tick();
auto loc = location_index.get(node_id);
if (loc.is_undefined()) continue;
{
using namespace osmium::builder::attr;
osmium::builder::NodeBuilder node_builder{cb.buffer()};
node_builder.set_id(node_id);
node_builder.set_location(loc.coords);
node_builder.set_version(loc.version);
if (!nodes_table.exists(node_id)) continue;
auto reader = nodes_table.getReader(node_id);
Node::Reader node = reader.getRoot<Node>();
auto metadata = node.getMetadata();
node_builder.set_timestamp(metadata.getTimestamp());
if (includeUserData) {
node_builder.set_changeset(metadata.getChangeset());
node_builder.set_user(metadata.getUser());
node_builder.set_uid(metadata.getUid());
}
auto tags = node.getTags();
osmium::builder::TagListBuilder tag_builder{node_builder};
for (int i = 0; i < tags.size() / 2; i++) {
tag_builder.add_tag(tags[i*2],tags[i*2+1]);
}
}
cb.buffer().commit();
cb.possibly_flush();
}
}
// Writing ways pass
{
for (auto way_id : way_ids) {
section.tick();
auto reader = ways.getReader(way_id);
Way::Reader way = reader.getRoot<Way>();
{
using namespace osmium::builder::attr;
osmium::builder::WayBuilder way_builder{cb.buffer()};
way_builder.set_id(way_id);
auto metadata = way.getMetadata();
way_builder.set_version(metadata.getVersion());
way_builder.set_timestamp(metadata.getTimestamp());
if (includeUserData) {
way_builder.set_changeset(metadata.getChangeset());
way_builder.set_user(metadata.getUser());
way_builder.set_uid(metadata.getUid());
}
{
osmium::builder::WayNodeListBuilder way_node_list_builder{way_builder};
for (auto node_id : way.getNodes()) {
way_node_list_builder.add_node_ref(node_id);
}
}
auto tags = way.getTags();
osmium::builder::TagListBuilder tag_builder{way_builder};
for (int i = 0; i < tags.size() / 2; i++) {
tag_builder.add_tag(tags[i*2],tags[i*2+1]);
}
}
cb.buffer().commit();
cb.possibly_flush();
}
}
{
for (auto relation_id : relation_ids) {
section.tick();
auto reader = relations.getReader(relation_id);
Relation::Reader relation = reader.getRoot<Relation>();
{
using namespace osmium::builder::attr;
osmium::builder::RelationBuilder relation_builder{cb.buffer()};
relation_builder.set_id(relation_id);
auto metadata = relation.getMetadata();
relation_builder.set_version(metadata.getVersion());
relation_builder.set_timestamp(metadata.getTimestamp());
if (includeUserData) {
relation_builder.set_changeset(metadata.getChangeset());
relation_builder.set_user(metadata.getUser());
relation_builder.set_uid(metadata.getUid());
}
{
osmium::builder::RelationMemberListBuilder relation_member_list_builder{relation_builder};
for (auto const &member : relation.getMembers()) {
if (member.getType() == RelationMember::Type::NODE) {
relation_member_list_builder.add_member(osmium::item_type::node,member.getRef(),member.getRole());
} else if (member.getType() == RelationMember::Type::WAY) {
relation_member_list_builder.add_member(osmium::item_type::way,member.getRef(),member.getRole());
} else {
relation_member_list_builder.add_member(osmium::item_type::relation,member.getRef(),member.getRole());
}
}
}
auto tags = relation.getTags();
osmium::builder::TagListBuilder tag_builder{relation_builder};
for (int i = 0; i < tags.size() / 2; i++) {
tag_builder.add_tag(tags[i*2],tags[i*2+1]);
}
}
cb.buffer().commit();
cb.possibly_flush();
}
}
}
cb.flush();
writer.close();
mdb_env_close(env);
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>( std::chrono::high_resolution_clock::now() - startTime ).count();
if (!jsonOutput) cout << "Finished export in " << duration/1000.0 << " seconds." << endl;
}
================================================
FILE: src/region.cpp
================================================
#include <sstream>
#include <iostream>
#include "s2/s2latlng.h"
#include "s2/s2latlng_rect.h"
#include "s2/s2cap.h"
#include "s2/s2polygon.h"
#include "s2/s2loop.h"
#include "osmx/region.h"
static inline void rtrim(std::string &s) {
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) {
return !std::isspace(ch);
}).base(), s.end());
}
std::unique_ptr<S2Polygon> S2PolyFromCoordinates(nlohmann::json &coordinates) {
std::vector<std::unique_ptr<S2Loop>> loopRegions;
for (auto loop : coordinates) {
std::vector<S2Point> points;
// ignore the last repeated point
for (int i = 0; i < loop.size() - 1; i++) {
double lon = loop[i][0].get<double>();
double lat = loop[i][1].get<double>();
points.push_back(S2LatLng::FromDegrees(lat,lon).Normalized().ToPoint());
}
auto loopRegion = std::make_unique<S2Loop>(points);
loopRegion->Normalize();
loopRegions.push_back(std::move(loopRegion));
};
return std::make_unique<S2Polygon>(std::move(loopRegions));
}
void Region::AddS2RegionFromGeometry(nlohmann::json &geometry) {
if (geometry["type"] == "Polygon") {
auto p = S2PolyFromCoordinates(geometry["coordinates"]);
mRegions.push_back(std::move(p));
} else if (geometry["type"] == "MultiPolygon") {
for (auto polygon : geometry["coordinates"]) {
auto p = S2PolyFromCoordinates(polygon);
mRegions.push_back(std::move(p));
}
}
}
void Region::AddS2RegionFromPolyFile(std::istringstream &file) {
std::vector<S2Point> points;
std::string line;
while (std::getline(file, line)) {
rtrim(line);
double lat, lon;
// END of polygon
if (line == "END") {
break;
} else {
std::istringstream iss(line);
iss >> lon;
iss >> lat;
points.push_back(S2LatLng::FromDegrees(lat,lon).Normalized().ToPoint());
}
}
if (points[0] == points[points.size() - 1]) points.pop_back();
auto loop = std::make_unique<S2Loop>(points);
loop->Normalize();
mRegions.push_back(std::move(loop));
}
Region::Region(const std::string &text, const std::string &ext) {
if (ext == "bbox") {
double minLat,minLon,maxLat,maxLon;
std::sscanf(text.c_str(), "%lf,%lf,%lf,%lf",&minLat,&minLon,&maxLat,&maxLon);
auto lo = S2LatLng::FromDegrees(minLat,minLon).Normalized();
auto hi = S2LatLng::FromDegrees(maxLat,maxLon).Normalized();
mRegions.push_back(std::make_unique<S2LatLngRect>(lo,hi));
} else if (ext == "disc") {
double lat,lon,radius;
std::sscanf(text.c_str(), "%lf,%lf,%lf",&lat,&lon,&radius);
auto center = S2LatLng::FromDegrees(lat,lon).Normalized();
auto angle = S1Angle::Degrees(radius);
mRegions.push_back(std::make_unique<S2Cap>(center.ToPoint(),angle));
} else if (ext == "poly") {
std::istringstream f(text);
std::string line;
// discard the first line
std::getline(f,line);
// this will either parse name of next polygon
// or END at end of file
while (std::getline(f, line)) {
// END of file
if (line == "END") {
break;
}
AddS2RegionFromPolyFile(f);
}
} else if (ext == "geojson") {
auto json = nlohmann::json::parse(text);
if (json["type"] == "Polygon" || json["type"] == "MultiPolygon") {
AddS2RegionFromGeometry(json);
} else if (json["type"] == "GeometryCollection") {
for (auto geometry : json) {
AddS2RegionFromGeometry(json);
}
} else if (json["type"] == "Feature") {
AddS2RegionFromGeometry(json["geometry"]);
} else if (json["type"] == "FeatureCollection") {
for (auto feature : json["features"]) {
AddS2RegionFromGeometry(feature["geometry"]);
}
}
} else {
std::cerr << "Unknown ext" << std::endl;
assert(false);
}
}
bool Region::Contains(S2Point p) {
for (auto const ®ion : mRegions) {
if (region->Contains(p)) return true;
}
return false;
}
S2CellUnion Region::GetCovering(S2RegionCoverer &coverer) {
S2CellUnion retval;
for (auto const ®ion : mRegions) {
retval = retval.Union(coverer.GetCovering(*region));
}
return retval;
}
S2LatLngRect Region::GetBounds() {
auto const &firstRegion = mRegions[0];
auto lat_min = firstRegion->GetRectBound().lat_lo();
auto lat_max = firstRegion->GetRectBound().lat_hi();
auto lng_min = firstRegion->GetRectBound().lng_lo();
auto lng_max = firstRegion->GetRectBound().lng_hi();
for (size_t i = 1; i < mRegions.size(); i++) {
auto const &r = mRegions[i];
auto lat_lo = r->GetRectBound().lat_lo();
auto lat_hi = r->GetRectBound().lat_hi();
auto lng_lo = r->GetRectBound().lng_lo();
auto lng_hi = r->GetRectBound().lng_hi();
if (lat_lo < lat_min) lat_min = lat_lo;
if (lat_hi > lat_max) lat_max = lat_hi;
if (lng_lo < lng_min) lng_min = lng_lo;
if (lng_hi > lng_max) lng_max = lng_hi;
}
return S2LatLngRect(S2LatLng(lat_min,lng_min),S2LatLng(lat_max,lng_max));
}
================================================
FILE: src/storage.cpp
================================================
#include "osmx/storage.h"
#include "osmx/util.h"
namespace osmx { namespace db {
MDB_env *createEnv(std::string path, bool writable) {
MDB_env* env;
CHECK_LMDB(mdb_env_create(&env));
// the maximum size of any LMDB dataset.
// 2TB is a safe number for just OSM data as of 02/2023
// only affects the size of virtual memory, not real memory.
mdb_env_set_mapsize(env,2UL * 1024UL * 1024UL * 1024UL * 1024UL);
mdb_env_set_maxdbs(env,10);
int flags = 0;
if (!writable) flags |= MDB_RDONLY;
CHECK_LMDB(mdb_env_open(env, path.c_str(),MDB_NOSUBDIR | MDB_NORDAHEAD | MDB_NOSYNC | flags, 0664));
return env;
}
Metadata::Metadata(MDB_txn *txn) : mTxn(txn) {
CHECK_LMDB(mdb_dbi_open(mTxn, "metadata", MDB_CREATE, &mDbi));
}
void Metadata::put(const std::string &key_str, const std::string &value_str) {
MDB_val key, data;
key.mv_size = key_str.size();
key.mv_data = (void *)key_str.data();
data.mv_size = value_str.size();
data.mv_data = (void *)value_str.data();
CHECK_LMDB(mdb_put(mTxn,mDbi, &key, &data, 0));
}
std::string Metadata::get(const std::string &key_str) {
MDB_val key, data;
key.mv_size = key_str.size();
key.mv_data = (void *)key_str.data();
auto retval = mdb_get(mTxn,mDbi, &key, &data);
if (retval == 0) return std::string((const char *)data.mv_data,data.mv_size);
else return "";
}
Elements::Elements(MDB_txn *txn, const std::string &name) : mTxn(txn) {
CHECK_LMDB(mdb_dbi_open(txn, name.c_str(), MDB_INTEGERKEY | MDB_CREATE, &mDbi));
}
void Elements::put(uint64_t id, kj::VectorOutputStream &vos, int flags) {
MDB_val key, data;
key.mv_size = sizeof(uint64_t);
key.mv_data = (void *)&id;
data.mv_size = vos.getArray().size();
data.mv_data = (void *)vos.getArray().begin();
CHECK_LMDB(mdb_put(mTxn, mDbi, &key, &data, flags));
}
void Elements::del(uint64_t id) {
MDB_val key, data;
key.mv_size = sizeof(uint64_t);
key.mv_data = (void *)&id;
mdb_del(mTxn, mDbi, &key, &data);
}
bool Elements::exists(uint64_t id) {
MDB_val key, data;
key.mv_size = sizeof(uint64_t);
key.mv_data = (void *)&id;
return mdb_get(mTxn,mDbi,&key,&data) == 0;
}
capnp::FlatArrayMessageReader Elements::getReader(uint64_t id) {
MDB_val key, data;
key.mv_size = sizeof(uint64_t);
key.mv_data = (void *)&id;
CHECK_LMDB(mdb_get(mTxn,mDbi,&key,&data));
auto arr = kj::ArrayPtr<const capnp::word>((const capnp::word *)data.mv_data,data.mv_size);
return capnp::FlatArrayMessageReader(arr);
}
Locations::Locations(MDB_txn *txn) : mTxn(txn) {
CHECK_LMDB(mdb_dbi_open(mTxn, "locations", MDB_INTEGERKEY | MDB_CREATE, &mDbi));
}
void Locations::put(uint64_t id, const Location value, int flags) {
MDB_val key, data;
key.mv_size = sizeof(uint64_t);
key.mv_data = (void *)&id;
int32_t buf[3];
buf[0] = value.coords.x();
buf[1] = value.coords.y();
buf[2] = value.version;
data.mv_size = sizeof(uint32_t) * 3;
data.mv_data = (void *)&buf;
CHECK_LMDB(mdb_put(mTxn, mDbi, &key, &data, flags));
}
void Locations::del(uint64_t id) {
MDB_val key, data;
key.mv_size = sizeof(uint64_t);
key.mv_data = (void *)&id;
mdb_del(mTxn,mDbi,&key,&data);
}
Location Locations::get(uint64_t id) const {
MDB_val key, data;
key.mv_size = sizeof(uint64_t);
key.mv_data = (void *)&id;
int retval = mdb_get(mTxn, mDbi, &key, &data);
if (retval == MDB_NOTFOUND) return Location{};
CHECK_LMDB(retval);
int32_t *buf = (int32_t *)data.mv_data;
return Location{osmium::Location(buf[0],buf[1]),buf[2]};
}
bool Locations::exists(uint64_t id) {
MDB_val key, data;
key.mv_size = sizeof(uint64_t);
key.mv_data = (void *)&id;
int retval = mdb_get(mTxn, mDbi, &key, &data);
return retval != MDB_NOTFOUND;
}
Index::Index(MDB_txn *txn, const std::string &name) : mTxn(txn) {
CHECK_LMDB(mdb_dbi_open(txn, name.c_str(), MDB_INTEGERKEY | MDB_CREATE | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &mDbi));
}
void Index::put(uint64_t from, uint64_t osm_id, int flags) {
MDB_val key, data;
key.mv_size = sizeof(uint64_t);
key.mv_data = (void *)&from;
data.mv_size = sizeof(uint64_t);
data.mv_data = (void *)&osm_id;
CHECK_LMDB(mdb_put(mTxn,mDbi,&key,&data,flags));
}
void Index::del(uint64_t from, uint64_t osm_id ) {
MDB_val key, data;
key.mv_size = sizeof(uint64_t);
key.mv_data = (void *)&from;
data.mv_size = sizeof(uint64_t);
data.mv_data = (void *)&osm_id;
mdb_del(mTxn,mDbi,&key,&data);
}
IndexWriter::IndexWriter(MDB_env *env, const std::string &name) : mEnv(env), mName(name) {
CHECK_LMDB(mdb_txn_begin(env, NULL, 0, &mTxn));
CHECK_LMDB(mdb_dbi_open(mTxn, name.c_str(), MDB_INTEGERKEY | MDB_CREATE | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &mDbi));
}
void IndexWriter::put(uint64_t from, uint64_t osm_id, int flags) {
MDB_val key, data;
key.mv_size = sizeof(uint64_t);
key.mv_data = (void *)&from;
data.mv_size = sizeof(uint64_t);
data.mv_data = (void *)&osm_id;
CHECK_LMDB(mdb_put(mTxn,mDbi,&key,&data,flags));
if (mWrites++ == 8000000) {
CHECK_LMDB(mdb_txn_commit(mTxn));
CHECK_LMDB(mdb_txn_begin(mEnv, NULL, 0, &mTxn));
CHECK_LMDB(mdb_dbi_open(mTxn, mName.c_str(), MDB_INTEGERKEY | MDB_CREATE | MDB_DUPSORT | MDB_DUPFIXED | MDB_INTEGERDUP, &mDbi));
mWrites = 0;
}
}
void IndexWriter::commit() {
CHECK_LMDB(mdb_txn_commit(mTxn));
}
void traverseCell(MDB_cursor *cursor, S2CellId cell_id, roaring::Roaring64Map &set) {
S2CellId start = cell_id.child_begin(CELL_INDEX_LEVEL);
S2CellId end = cell_id.child_end(CELL_INDEX_LEVEL);
MDB_val key, data;
key.mv_size = sizeof(S2CellId);
key.mv_data = (void *)&start;
// reading past end of db
if (mdb_cursor_get(cursor,&key,&data,MDB_SET_RANGE) != 0) return;
while (*((S2CellId *)key.mv_data) < end) {
int retval_values = mdb_cursor_get(cursor,&key,&data,MDB_GET_MULTIPLE);
while (0 == retval_values) {
for (int i = 0; i < data.mv_size/sizeof(uint64_t); i++) {
uint64_t *d = (uint64_t*)data.mv_data;
set.add(*(d+i));
}
retval_values = mdb_cursor_get(cursor,&key,&data,MDB_NEXT_MULTIPLE);
}
// reached end of db
if (mdb_cursor_get(cursor,&key,&data,MDB_NEXT_NODUP) != 0) return;
}
}
void traverseReverse(MDB_cursor *cursor,uint64_t from, roaring::Roaring64Map &set) {
MDB_val key, data;
key.mv_size = sizeof(uint64_t);
key.mv_data = (void *)&from;
if (mdb_cursor_get(cursor,&key,&data,MDB_SET) != 0) return;
int retval_values = mdb_cursor_get(cursor,&key,&data,MDB_GET_MULTIPLE);
while (0 == retval_values) {
for (int i = 0; i < data.mv_size/sizeof(uint64_t); i++) {
uint64_t *d = (uint64_t*)data.mv_data;
uint64_t to_id = *(d+i);
set.add(to_id);
}
retval_values = mdb_cursor_get(cursor,&key,&data,MDB_NEXT_MULTIPLE);
}
}
}}
================================================
FILE: src/update.cpp
================================================
#include <iostream>
#include <cassert>
#include <set>
#include "cxxopts.hpp"
#include "osmium/handler.hpp"
#include "osmium/io/any_input.hpp"
#include "osmium/visitor.hpp"
#include "osmium/util/progress_bar.hpp"
#include "roaring/roaring.hh"
// Historically, OSMExpress had vendored its dependencies, but we move away
// from this. At the moment, the S2 geometry library is the last remaining
// dependency still being vendored. Our (rather ancient) bundled version
// of S2 tests for a number of implementations of the Standard C library.
// If S2 happens to recognize the library, it includes <byteswap.h>
// (unless it is being compiled by a Microsoft or Apple compiler);
// otherwise, S2 falls back to its own byteswap implementation.
// What S2 does (or rather did, in the old version we happen
// to bundle) isn't so great; it would have been better for S2 to use
// a standards-conforming way of byteswapping, test for its presence,
// and only use the fallback if that test fails. But that's how it is.
//
// Anyhow, the (equally ancient) version of CRoaring, another library
// that we previously vendored into OSMExpress, was polluting the
// C macro namespace in a way that made our bundled version of S2
// believe to be on a C library it knew about. Therefore, at the time
// when OSMExpress still vendored that old version of CRoaring, the
// bundled version of S2 would always include <byteswap.h> instead
// of (re-)defining it, even if S2 did not recognize the C library.
//
// As we upgraded CRoaring to a newer version, which does not pollute
// the C macro namespace anymore, the C preprocessor would now execute
// the fallback path in the S2 headers when compiling with a Standard C
// library that our old version of S2 does not recognize. This caused
// compilation errors on Alpine Linux, which uses musl, a very lightweight
// but fully standards-conforming implementation of the Standard C library.
//
// The following hack prevents our vendored old version of S2 from
// supplying its own byteswap functions. On Bionic (and also other
// modern libc implementations, including musl), it is sufficient to
// include <byteswap.h>. Note we cannot explicitly test for musl here,
// because musl does not define a __MUSL__ macro. (They don't want to,
// since such a macro would not be standards-conforming; whether it's
// really helpful to be so puristic has been the subject of much debate).
//
// TODO: Remove this hack once we stop vendoring the S2 geometry library.
// https://github.com/bdon/OSMExpress/issues/20
#if !defined(_MSC_VER) && !defined(__APPLE__) && !defined(__GLIBC__) \
&& !defined(__BIONIC__) && !defined(__ASYLO__)
#define __BIONIC__ 1
#endif
#include "s2/s2latlng.h"
#include "s2/s2cell_union.h"
#include "osmx/storage.h"
#include "osmx/util.h"
using namespace std;
using namespace osmx;
class DataUpdate : public osmium::handler::Handler {
public:
DataUpdate(MDB_txn *txn) :
mTxn(txn),
mLocations(txn),
mNodes(txn,"nodes"),
mWays(txn,"ways"),
mRelations(txn,"relations"),
mCellNode(txn,"cell_node"),
mNodeWay(txn,"node_way"),
mNodeRelation(txn,"node_relation"),
mWayRelation(txn,"way_relation"),
mRelationRelation(txn, "relation_relation") {
}
// update location, node, cell_location tables
void node(const osmium::Node& node) {
uint64_t id = node.id();
db::Location prev_location = mLocations.get(id);
db::Location new_location = db::Location{node.location(),(int32_t)node.version()};
uint64_t prev_cell;
if (prev_location.is_defined()) prev_cell = S2CellId(S2LatLng::FromDegrees(prev_location.coords.lat(),prev_location.coords.lon())).parent(CELL_INDEX_LEVEL).id();
if (!node.visible()) {
mLocations.del(id);
mNodes.del(id);
mCellNode.del(prev_cell,id);
return;
} else {
mLocations.put(id,new_location);
if (node.tags().size() > 0) {
::capnp::MallocMessageBuilder message;
Node::Builder nodeMsg = message.initRoot<Node>();
setTags<Node::Builder>(node.tags(),nodeMsg);
auto metadata = nodeMsg.initMetadata();
metadata.setVersion(node.version());
metadata.setTimestamp(node.timestamp().seconds_since_epoch());
metadata.setChangeset(node.changeset());
metadata.setUid(node.uid());
metadata.setUser(node.user());
kj::VectorOutputStream output;
capnp::writeMessage(output,message);
mNodes.put(id,output);
} else {
mNodes.del(id);
}
}
uint64_t new_cell = S2CellId(S2LatLng::FromDegrees(new_location.coords.lat(),new_location.coords.lon())).parent(CELL_INDEX_LEVEL).id();
if (!prev_location.is_defined()) {
mCellNode.put(new_cell,id);
return;
}
if (prev_cell != new_cell) {
mCellNode.del(prev_cell,id);
mCellNode.put(new_cell,id);
}
}
// update way, node_way tables
void way(const osmium::Way &way) {
uint64_t id = way.id();
set<uint64_t> prev_nodes;
set<uint64_t> new_nodes;
if (mWays.exists(id)) {
auto reader = mWays.getReader(id);
Way::Reader way = reader.getRoot<Way>();
for (auto const &node_id : way.getNodes()) {
prev_nodes.insert(node_id);
}
}
if (!way.visible()) {
mWays.del(id);
} else {
auto const &nodes = way.nodes();
::capnp::MallocMessageBuilder message;
Way::Builder wayMsg = message.initRoot<Way>();
wayMsg.initNodes(nodes.size());
int i = 0;
for (int i = 0; i < nodes.size(); i++) {
wayMsg.getNodes().set(i,nodes[i].ref());
new_nodes.insert(nodes[i].ref());
}
setTags<Way::Builder>(way.tags(),wayMsg);
auto metadata = wayMsg.initMetadata();
metadata.setVersion(way.version());
metadata.setTimestamp(way.timestamp().seconds_since_epoch());
metadata.setChangeset(way.changeset());
metadata.setUid(way.uid());
metadata.setUser(way.user());
kj::VectorOutputStream output;
capnp::writeMessage(output,message);
mWays.put(id,output);
}
if (!way.visible()) {
for (uint64_t node_id : prev_nodes) mNodeWay.del(node_id,id);
} else {
for (uint64_t node_id : prev_nodes) {
if (new_nodes.count(node_id) == 0) mNodeWay.del(node_id,id);
}
for (uint64_t node_id : new_nodes) {
if (prev_nodes.count(node_id) == 0) mNodeWay.put(node_id,id);
}
}
}
// update relation, node_relation, way_relation and relation_relation tables
void relation(const osmium::Relation &relation) {
uint64_t id = relation.id();
set<uint64_t> prev_nodes;
set<uint64_t> prev_ways;
set<uint64_t> prev_relations;
set<uint64_t> new_nodes;
set<uint64_t> new_ways;
set<uint64_t> new_relations;
if (mRelations.exists(id)) {
auto reader = mRelations.getReader(id);
Relation::Reader relation = reader.getRoot<Relation>();
for (auto const &member : relation.getMembers()) {
if (member.getType() == RelationMember::Type::NODE) {
prev_nodes.insert(member.getRef());
} else if (member.getType() == RelationMember::Type::WAY) {
prev_ways.insert(member.getRef());
} else {
prev_relations.insert(member.getRef());
}
}
}
if (!relation.visible()) {
mRelations.del(relation.id());
} else {
::capnp::MallocMessageBuilder message;
Relation::Builder relationMsg = message.initRoot<Relation>();
setTags<Relation::Builder>(relation.tags(),relationMsg);
auto members = relationMsg.initMembers(relation.members().size());
int i = 0;
for (auto const &member : relation.members()) {
members[i].setRef(member.ref());
members[i].setRole(member.role());
if (member.type() == osmium::item_type::node) {
new_nodes.insert(member.ref());
members[i].setType(RelationMember::Type::NODE);
}
else if (member.type() == osmium::item_type::way) {
new_ways.insert(member.ref());
members[i].setType(RelationMember::Type::WAY);
}
else if (member.type() == osmium::item_type::relation) {
new_relations.insert(member.ref());
members[i].setType(RelationMember::Type::RELATION);
}
i++;
}
auto metadata = relationMsg.initMetadata();
metadata.setVersion(relation.version());
metadata.setTimestamp(relation.timestamp().seconds_since_epoch());
metadata.setChangeset(relation.changeset());
metadata.setUid(relation.uid());
metadata.setUser(relation.user());
kj::VectorOutputStream output;
capnp::writeMessage(output,message);
mRelations.put(relation.id(),output);
}
if (!relation.visible()) {
for (uint64_t node_id : prev_nodes) mNodeRelation.del(node_id,id);
for (uint64_t way_id : prev_ways) mWayRelation.del(way_id,id);
for (uint64_t relation_id : prev_relations) mRelationRelation.del(relation_id,id);
} else {
for (uint64_t node_id : prev_nodes) {
if (new_nodes.count(node_id) == 0) mNodeRelation.del(node_id,id);
}
for (uint64_t node_id : new_nodes) {
if (prev_nodes.count(node_id) == 0) mNodeRelation.put(node_id,id);
}
for (uint64_t way_id : prev_ways) {
if (new_ways.count(way_id) == 0) mWayRelation.del(way_id,id);
}
for (uint64_t way_id : new_ways) {
if (prev_ways.count(way_id) == 0) mWayRelation.put(way_id,id);
}
for (uint64_t relation_id : prev_relations) {
if (new_relations.count(relation_id) == 0) mRelationRelation.del(relation_id,id);
}
for (uint64_t relation_id : new_relations) {
if (prev_relations.count(relation_id) == 0) mRelationRelation.put(relation_id,id);
}
}
}
private:
MDB_txn *mTxn;
db::Locations mLocations;
db::Elements mNodes;
db::Elements mWays;
db::Elements mRelations;
db::Index mNodeWay;
db::Index mNodeRelation;
db::Index mWayRelation;
db::Index mRelationRelation;
db::Index mCellNode;
};
void cmdUpdate(int argc, char* argv[]) {
cxxopts::Options cmdoptions("Update", "Update an .osmx file with a .osc diff.");
cmdoptions.add_options()
("v,verbose", "Verbose output")
("commit", "Commit the update")
("cmd", "Command to run", cxxopts::value<string>())
("osmx", ".osmx to update", cxxopts::value<string>())
("osc", ".osc to apply", cxxopts::value<string>())
("seqnum", "The sequence number of the .osc", cxxopts::value<string>())
("timestamp", "The timestamp of the .osc", cxxopts::value<string>())
;
cmdoptions.parse_positional({"cmd","osmx","osc","seqnum","timestamp"});
auto result = cmdoptions.parse(argc, argv);
if (result.count("osmx") == 0 || result.count("osc") == 0 || \
result.count("seqnum") == 0 || result.count("timestamp") == 0) {
cout << "Usage: osmx update OSMX_FILE OSC_FILE SEQNUM TIMESTAMP [OPTIONS]" << endl;
cout << "Applies OSC_FILE and saves SEQNUM and TIMESTAMP into the metadata table." << endl << endl;
cout << "EXAMPLE:" << endl;
cout << " osmx update planet.osmx 123456.osc 123456 2019-09-05T00:00:00Z --commit" << endl << endl;
cout << "OPTIONS:" << endl;
cout << " --v,--verbose: verbose output." << endl;
cout << " --commit: Actually commit the transaction; otherwise runs the update and rolls back." << endl;
exit(1);
}
string osmx = result["osmx"].as<string>();
string osc = result["osc"].as<string>();
bool verbose = result.count("verbose") > 0;
auto startTime = std::chrono::high_resolution_clock::now();
MDB_env* env = db::createEnv(osmx,true);
MDB_txn* txn;
CHECK_LMDB(mdb_txn_begin(env, NULL, 0, &txn));
string old_seqnum = "UNKNOWN";
auto new_seqnum = result["seqnum"].as<string>();
auto new_timestamp = result["timestamp"].as<string>();
db::Metadata metadata(txn);
if (verbose) cout << "Timestamp: " << metadata.get("osmosis_replication_timestamp") << endl;
old_seqnum = metadata.get("osmosis_replication_sequence_number");
if (verbose) cout << "Starting update from " << old_seqnum << " to " << new_seqnum << endl;
const osmium::io::File input_file{osc};
osmium::io::Reader reader{input_file, osmium::osm_entity_bits::object};
DataUpdate data_update(txn);
osmium::apply(reader, data_update);
auto duration = (std::chrono::duration_cast<std::chrono::milliseconds>( std::chrono::high_resolution_clock::now() - startTime ).count()) / 1000.0;
if (result.count("commit") > 0) {
{
metadata.put("osmosis_replication_sequence_number",new_seqnum);
metadata.put("osmosis_replication_timestamp",new_timestamp);
}
CHECK_LMDB(mdb_txn_commit(txn));
cout << "Committed: ";
} else {
mdb_txn_abort(txn);
cout << "Aborted: ";
}
cout << old_seqnum << " -> " << new_seqnum << " in " << duration << " seconds." << endl;
mdb_env_sync(env,true);
mdb_env_close(env);
}
================================================
FILE: test/test_region.cpp
================================================
#include "catch2/catch_test_macros.hpp"
#include "s2/s2latlng.h"
#include "osmx/region.h"
using namespace std;
// osmium header format is like this: Box: (-79.82402,40.439216,-71.660801,45.07133)
// small: {\"bbox\":[40.7411\,-73.9937\,40.7486\,-73.9821]}
// big: {\"bbox\":[40.6762\,-74.0543\,40.8093\,-73.8603]}
// radius: {"center":[40.7411,-73.9937],"radius":25.5}
// indo: {\"bbox\":[-12.039321\,94.394531\,8.407168\,142.418292]}
// bbox should be minLat,minLon,maxLat,maxLon (opposite of GeoJSON)
TEST_CASE("rectangular bbox") {
SECTION("basic bbox") {
string bbox = "-1.0,-1.0,1.0,1.0";
Region s{bbox,"bbox"};
REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint()));
REQUIRE(s.Contains(S2LatLng::FromDegrees(0.9,0.9).ToPoint()));
}
}
TEST_CASE("disc") {
SECTION("basic disc") {
string disc = "0.0,0.0,1.0";
Region s{disc,"disc"};
REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint()));
REQUIRE(!s.Contains(S2LatLng::FromDegrees(0.9,0.9).ToPoint()));
}
}
TEST_CASE("geojson polygon") {
SECTION("polygon geometry") {
string json = R"json({
"type": "Polygon",
"coordinates": [
[
[-1.0,-1.0],
[-1.0,1.0],
[1.0,1.0],
[1.0,-1.0],
[-1.0,-1.0]
]
]
})json";
Region s{json,"geojson"};
REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint()));
REQUIRE(!s.Contains(S2LatLng::FromDegrees(2.0,2.0).ToPoint()));
}
SECTION("polygon with a hole") {
string json = R"json({
"type": "Polygon",
"coordinates": [
[
[-2.0,-2.0],
[-2.0,2.0],
[2.0,2.0],
[2.0,-2.0],
[-2.0,-2.0]
],
[
[-1.0,-1.0],
[-1.0,1.0],
[1.0,1.0],
[1.0,-1.0],
[-1.0,-1.0]
]
]
})json";
Region s{json,"geojson"};
REQUIRE(s.Contains(S2LatLng::FromDegrees(1.5,1.5).ToPoint()));
REQUIRE(!s.Contains(S2LatLng::FromDegrees(0.0,0.0).ToPoint()));
}
SECTION("multipolygon geometry") {
string json = R"json({
"type": "MultiPolygon",
"coordinates": [
[[
[0.0,0.0],
[1.0,0.0],
[1.0,1.0],
[0.0,1.0],
[0.0,0.0]
]],
[[
[2.0,2.0],
[3.0,2.0],
[3.0,3.0],
[2.0,3.0],
[2.0,2.0]
]]
]
})json";
Region s{json,"geojson"};
REQUIRE(s.Contains(S2LatLng::FromDegrees(0.5,0.5).ToPoint()));
REQUIRE(s.Contains(S2LatLng::FromDegrees(2.5,2.5).ToPoint()));
auto bounds = s.GetBounds();
REQUIRE(bounds.lat_lo().degrees() <= 0.0);
REQUIRE(bounds.lat_hi().degrees() >= 3.0);
REQUIRE(bounds.lng_lo().degrees() <= 0.0);
REQUIRE(bounds.lng_hi().degrees() >= 3.0);
}
SECTION("bounds beyond antimeridian") {
string json = R"json({
"type": "Polygon",
"coordinates": [
[
[180.0,-1.0],
[180.0,1.0],
[181.0,1.0],
[181.0,-1.0],
[180.0,-1.0]
]
]
})json";
Region s{json,"geojson"};
auto bounds = s.GetBounds();
REQUIRE(bounds.lng_lo().degrees() == 180.0);
REQUIRE(bounds.lng_hi().degrees() <= -178.9); // hacky precision
REQUIRE(bounds.lng_hi().degrees() >= -179.1);
}
}
// .poly in Lon, Lat order
TEST_CASE("osmosis .poly") {
SECTION("simple polygon") {
string poly = R"poly(basic
first_area
0.2e+01 0.1e+01
0.2e+01 -0.1e+01
-0.2e+01 -0.1e+01
-0.2e+01 0.1e+01
END
END
)poly";
Region s{poly,"poly"};
REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint()));
REQUIRE(!s.Contains(S2LatLng::FromDegrees(2.0,3.0).ToPoint()));
REQUIRE(s.Contains(S2LatLng::FromDegrees(0.5,1.5).ToPoint()));
}
SECTION("different whitespace, opposite orientation") {
string poly = R"poly(basic
first_area
0.1E+01 0.1E+01
-0.1E+01 0.1E+01
-0.1E+01 -0.1E+01
0.1E+01 -0.1E+01
END
END
)poly";
Region s{poly,"poly"};
REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint()));
REQUIRE(!s.Contains(S2LatLng::FromDegrees(2.0,2.0).ToPoint()));
}
SECTION("repeated last point") {
string poly = R"poly(basic
first_area
0.1e+01 0.1e+01
0.1e+01 -0.1e+01
-0.1e+01 -0.1e+01
-0.1e+01 0.1e+01
0.1e+01 0.1e+01
END
END
)poly";
Region s{poly,"poly"};
REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint()));
REQUIRE(!s.Contains(S2LatLng::FromDegrees(2.0,2.0).ToPoint()));
}
SECTION("multiple outer loops") {
string poly = R"poly(basic
first_area
0.1E+01 0.1E+01
-0.1E+01 0.1E+01
-0.1E+01 -0.1E+01
0.1E+01 -0.1E+01
END
second_area
0.4E+01 0.4E+01
0.3E+01 0.4E+01
0.3E+01 0.3E+01
0.4E+01 0.3E+01
END
END
)poly";
Region s{poly,"poly"};
REQUIRE(s.Contains(S2LatLng::FromDegrees(0,0).ToPoint()));
REQUIRE(s.Contains(S2LatLng::FromDegrees(3.5,3.5).ToPoint()));
REQUIRE(!s.Contains(S2LatLng::FromDegrees(1.0,1.0).ToPoint()));
}
SECTION("loop with hole") {
}
}
================================================
FILE: utils/osmx-update
================================================
#!/usr/bin/env python
from datetime import datetime, timezone
import subprocess
import tempfile
import os
import sys
import fcntl
from server import ReplicationServer
# expects osmx to be on the PATH.
osmx = 'osmx'
try:
file = open('/tmp/osmx.lock','w')
fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB)
s = ReplicationServer(sys.argv[2])
# OSMX always uses minutely timestamps internally - try integrating daily
seqnum = subprocess.check_output([osmx,'query',sys.argv[1],'seqnum'])
if not seqnum.strip():
timestamp = subprocess.check_output([osmx,'query',sys.argv[1],'timestamp'])
timestamp = timestamp.decode('utf-8').strip()
timestamp = datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%SZ")
timestamp = timestamp.replace(tzinfo=timezone.utc)
print('Timestamp is {0}'.format(timestamp))
seqnum = s.timestamp_to_sequence(timestamp)
seqnum = int(seqnum)
print('Sequence number is {0}'.format(seqnum))
latest = s.get_state_info().sequence
print("Latest is {0}".format(latest))
current_id = seqnum + 1
while current_id <= latest:
fd, path = tempfile.mkstemp(suffix='.osc.gz')
with open(fd,'wb') as f:
f.write(s.get_diff_block(current_id))
info = s.get_state_info(current_id)
timestamp = info.timestamp.strftime('%Y-%m-%dT%H:%M:%SZ')
subprocess.check_call([osmx,'update',sys.argv[1],path,str(current_id),timestamp,'--commit'])
os.unlink(path)
current_id = current_id + 1
except BlockingIOError:
print("Process is running - exiting.")
finally:
fcntl.lockf(file, fcntl.LOCK_UN)
file.close()
================================================
FILE: utils/server.py
================================================
""" Helper functions to communicate with replication servers.
derived from https://github.com/osmcode/pyosmium
"""
import sys
import urllib.request as urlrequest
import urllib.error as urlerror
import datetime as dt
from collections import namedtuple
from math import ceil
OsmosisState = namedtuple('OsmosisState', ['sequence', 'timestamp'])
DownloadResult = namedtuple('DownloadResult', ['id', 'newest'])
import logging
log = logging.getLogger()
class ReplicationServer(object):
def __init__(self, url, diff_type='osc.gz'):
self.baseurl = url
self.diff_type = diff_type
def open_url(self, url):
return urlrequest.urlopen(url,None,10)
def timestamp_to_sequence(self, timestamp, balanced_search=False):
""" Get the sequence number of the replication file that contains the
given timestamp. The search algorithm is optimised for replication
servers that publish updates in regular intervals. For servers
with irregular change file publication dates 'balanced_search`
should be set to true so that a standard binary search for the
sequence will be used. The default is good for all known
OSM replication services.
"""
# get the current timestamp from the server
upper = self.get_state_info()
if upper is None:
return None
if timestamp >= upper.timestamp or upper.sequence <= 0:
return upper.sequence
# find a state file that is before the required timestamp
lower = None
lowerid = 0
while lower is None:
log.info("Trying with Id %s" % lowerid)
lower = self.get_state_info(lowerid)
if lower is not None and lower.timestamp >= timestamp:
if lower.sequence == 0 or lower.sequence + 1 >= upper.sequence:
return lower.sequence
upper = lower
lower = None
lowerid = 0
if lower is None:
# no lower yet, so try a higher id (binary search wise)
newid = int((lowerid + upper.sequence) / 2)
if newid <= lowerid:
# nothing suitable found, so upper is probably the best we can do
return upper.sequence
lowerid = newid
# Now do a binary search between upper and lower.
# We could be clever here and compute the most likely state file
# by interpolating over the timestamps but that creates a whole ton of
# special cases that need to be handled correctly.
while True:
if balanced_search:
base_splitid = int((lower.sequence + upper.sequence) / 2)
else:
ts_int = (upper.timestamp - lower.timestamp).total_seconds()
seq_int = upper.sequence - lower.sequence
goal = (timestamp - lower.timestamp).total_seconds()
base_splitid = lower.sequence + ceil(goal * seq_int / ts_int)
if base_splitid >= upper.sequence:
base_splitid = upper.sequence - 1
split = self.get_state_info(base_splitid)
if split is None:
# file missing, search the next towards lower
splitid = base_splitid - 1
while split is None and splitid > lower.sequence:
split = self.get_state_info(splitid)
splitid -= 1
if split is None:
# still nothing? search towards upper
splitid = base_splitid + 1
while split is None and splitid < upper.sequence:
split = self.get_state_info(splitid)
splitid += 1
if split is None:
# still nothing? Then lower has to do
return lower.sequence
# set new boundary
if split.timestamp < timestamp:
lower = split
else:
upper = split
if lower.sequence + 1 >= upper.sequence:
return lower.sequence
def get_state_info(self, seq=None):
""" Downloads and returns the state information for the given
sequence. If the download is successful, a namedtuple with
`sequence` and `timestamp` is returned, otherwise the function
returns `None`.
"""
try:
response = self.open_url(self.get_state_url(seq))
except Exception as err:
logging.error(err)
return None
ts = None
seq = None
line = response.readline()
while line:
line = line.decode('utf-8')
if '#' in line:
line = line[0:line.index('#')]
else:
line = line.strip()
if line:
kv = line.split('=', 2)
if len(kv) != 2:
return None
if kv[0] == 'sequenceNumber':
seq = int(kv[1])
elif kv[0] == 'timestamp':
ts = dt.datetime.strptime(kv[1], "%Y-%m-%dT%H\\:%M\\:%SZ")
if sys.version_info >= (3,0):
ts = ts.replace(tzinfo=dt.timezone.utc)
line = response.readline()
return OsmosisState(sequence=seq, timestamp=ts)
def get_diff_block(self, seq):
""" Downloads the diff with the given sequence number and returns
it as a byte sequence. Throws a :code:`urllib.error.HTTPError`
(or :code:`urllib2.HTTPError` in python2)
if the file cannot be downloaded.
"""
return self.open_url(self.get_diff_url(seq)).read()
def get_state_url(self, seq):
""" Returns the URL of the state.txt files for a given sequence id.
If seq is `None` the URL for the latest state info is returned,
i.e. the state file in the root directory of the replication
service.
"""
if seq is None:
return self.baseurl + '/state.txt'
return '%s/%03i/%03i/%03i.state.txt' % (self.baseurl,
seq / 1000000, (seq % 1000000) / 1000, seq % 1000)
def get_diff_url(self, seq):
""" Returns the URL to the diff file for the given sequence id.
"""
return '%s/%03i/%03i/%03i.%s' % (self.baseurl,
seq / 1000000, (seq % 1000000) / 1000, seq % 1000,
self.diff_type)
gitextract_2jzln99k/
├── .github/
│ ├── dependabot.yml
│ └── workflows/
│ ├── build-container.yml
│ └── codeql.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── Dockerfile
├── LICENSE.md
├── README.md
├── dist/
│ └── archive.sh
├── docs/
│ ├── MANUAL.md
│ └── PROGRAMMING_GUIDE.md
├── examples/
│ ├── .gitignore
│ ├── CMakeLists.txt
│ ├── bbox_wkt.cpp
│ └── way_wkt.cpp
├── include/
│ └── osmx/
│ ├── cmd.h
│ ├── messages.capnp
│ ├── region.h
│ ├── storage.h
│ └── util.h
├── python/
│ ├── .gitignore
│ ├── README.md
│ ├── examples/
│ │ ├── augmented_diff.py
│ │ ├── read_way.py
│ │ └── web_server.py
│ ├── osmx/
│ │ ├── __init__.py
│ │ ├── messages.capnp
│ │ └── osmx.py
│ └── setup.py
├── src/
│ ├── cmd.cpp
│ ├── expand.cpp
│ ├── extract.cpp
│ ├── region.cpp
│ ├── storage.cpp
│ └── update.cpp
├── test/
│ └── test_region.cpp
└── utils/
├── osmx-update
└── server.py
SYMBOL INDEX (105 symbols across 15 files)
FILE: examples/bbox_wkt.cpp
function main (line 19) | int main(int argc, char* argv[]) {
FILE: examples/way_wkt.cpp
function main (line 12) | int main(int argc, char* argv[]) {
FILE: include/osmx/region.h
function class (line 9) | class Region {
FILE: include/osmx/storage.h
function namespace (line 12) | namespace osmx { namespace db {
FILE: include/osmx/util.h
function class (line 12) | class Timer {
FILE: python/examples/augmented_diff.py
function not_in_db (line 48) | def not_in_db(elem):
function get_lat_lon (line 57) | def get_lat_lon(ref, use_new):
function set_old_metadata (line 65) | def set_old_metadata(elem):
function augment_nd (line 169) | def augment_nd(nd,use_new):
function augment_member (line 174) | def augment_member(mem,use_new):
function augment (line 196) | def augment(elem,use_new):
class Bounds (line 309) | class Bounds:
method __init__ (line 310) | def __init__(self):
method add (line 316) | def add(self,x,y):
method elem (line 326) | def elem(self):
function sort_by_type (line 347) | def sort_by_type(x):
function indent (line 363) | def indent(elem, level=0):
FILE: python/examples/web_server.py
class Handler (line 14) | class Handler(BaseHTTPRequestHandler):
method do_GET (line 15) | def do_GET(self):
FILE: python/osmx/osmx.py
function tag_dict (line 9) | def tag_dict(tag_list):
class Environment (line 16) | class Environment:
method __init__ (line 17) | def __init__(self,fname):
class Transaction (line 20) | class Transaction:
method __init__ (line 21) | def __init__(self,env):
method __enter__ (line 25) | def __enter__(self,*args,**kwargs):
method __exit__ (line 29) | def __exit__(self,*args,**kwargs):
class Index (line 32) | class Index:
method __init__ (line 33) | def __init__(self):
method __init__ (line 37) | def __init__(self,txn,name):
method get (line 41) | def get(self,obj_id):
class Index (line 36) | class Index:
method __init__ (line 33) | def __init__(self):
method __init__ (line 37) | def __init__(self,txn,name):
method get (line 41) | def get(self,obj_id):
class Table (line 48) | class Table:
method __init__ (line 49) | def __init__(self,txn,name):
method _get_bytes (line 53) | def _get_bytes(self,elem_id):
class Locations (line 56) | class Locations(Table):
method __init__ (line 57) | def __init__(self,txn):
method get (line 60) | def get(self,node_id):
class Nodes (line 70) | class Nodes(Table):
method __init__ (line 71) | def __init__(self,txn):
method get (line 74) | def get(self,node_id):
class Ways (line 80) | class Ways(Table):
method __init__ (line 81) | def __init__(self,txn):
method get (line 84) | def get(self,way_id):
class Relations (line 90) | class Relations(Table):
method __init__ (line 91) | def __init__(self,txn):
method get (line 94) | def get(self,relation_id):
class NodeWay (line 100) | class NodeWay(Index):
method __init__ (line 101) | def __init__(self,txn):
class NodeRelation (line 104) | class NodeRelation(Index):
method __init__ (line 105) | def __init__(self,txn):
class WayRelation (line 108) | class WayRelation(Index):
method __init__ (line 109) | def __init__(self,txn):
class RelationRelation (line 112) | class RelationRelation(Index):
method __init__ (line 113) | def __init__(self,txn):
FILE: src/cmd.cpp
function printHelp (line 9) | void printHelp() {
function printQueryHelp (line 19) | void printQueryHelp() {
function main (line 33) | int main(int argc, char* argv[]) {
FILE: src/expand.cpp
class SortReader (line 25) | class SortReader {
method SortReader (line 27) | SortReader(std::string filename) : mStream(filename, std::ios::in | st...
method getNext (line 29) | bool getNext() {
class Sorter (line 41) | class Sorter {
method Sorter (line 44) | Sorter(std::string tempDir,std::string name) : mTempDir(tempDir), mNam...
method put (line 48) | void put(uint64_t from, uint64_t to) {
method put (line 53) | void put(S2CellId from, uint64_t to) {
method persist (line 57) | void persist() {
method writeDb (line 75) | void writeDb(MDB_env *env) {
method Sorter (line 115) | Sorter( const Sorter& ) = delete;
method Sorter (line 116) | Sorter& operator=( const Sorter& ) = delete;
class Handler (line 124) | class Handler: public osmium::handler::Handler {
method Handler (line 126) | Handler(MDB_env *env, MDB_txn *txn,string tempDir) :
method node (line 150) | void node(const osmium::Node& node) {
method way (line 173) | void way(const osmium::Way& way) {
method relation (line 195) | void relation(const osmium::Relation& relation) {
function cmdExpand (line 245) | void cmdExpand(int argc, char* argv[]) {
FILE: src/extract.cpp
type ExportProgress (line 20) | struct ExportProgress {
method print (line 29) | void print() {
class ProgressSection (line 34) | class ProgressSection {
method ProgressSection (line 37) | ProgressSection(ExportProgress &expprog, uint64_t &total, uint64_t &pr...
method tick (line 45) | void tick() {
function endsWith (line 63) | static bool endsWith(const std::string& str, const std::string& suffix)
function cmdExtract (line 70) | void cmdExtract(int argc, char * argv[]) {
FILE: src/region.cpp
function rtrim (line 10) | static inline void rtrim(std::string &s) {
function S2PolyFromCoordinates (line 16) | std::unique_ptr<S2Polygon> S2PolyFromCoordinates(nlohmann::json &coordin...
function S2CellUnion (line 129) | S2CellUnion Region::GetCovering(S2RegionCoverer &coverer) {
function S2LatLngRect (line 137) | S2LatLngRect Region::GetBounds() {
FILE: src/storage.cpp
type osmx (line 4) | namespace osmx { namespace db {
type db (line 4) | namespace db {
function MDB_env (line 7) | MDB_env *createEnv(std::string path, bool writable) {
function Location (line 106) | Location Locations::get(uint64_t id) const {
function traverseCell (line 171) | void traverseCell(MDB_cursor *cursor, S2CellId cell_id, roaring::Roa...
function traverseReverse (line 194) | void traverseReverse(MDB_cursor *cursor,uint64_t from, roaring::Roar...
FILE: src/update.cpp
class DataUpdate (line 62) | class DataUpdate : public osmium::handler::Handler {
method DataUpdate (line 64) | DataUpdate(MDB_txn *txn) :
method node (line 78) | void node(const osmium::Node& node) {
method way (line 123) | void way(const osmium::Way &way) {
method relation (line 174) | void relation(const osmium::Relation &relation) {
function cmdUpdate (line 273) | void cmdUpdate(int argc, char* argv[]) {
FILE: utils/server.py
class ReplicationServer (line 19) | class ReplicationServer(object):
method __init__ (line 20) | def __init__(self, url, diff_type='osc.gz'):
method open_url (line 24) | def open_url(self, url):
method timestamp_to_sequence (line 27) | def timestamp_to_sequence(self, timestamp, balanced_search=False):
method get_state_info (line 109) | def get_state_info(self, seq=None):
method get_diff_block (line 144) | def get_diff_block(self, seq):
method get_state_url (line 153) | def get_state_url(self, seq):
method get_diff_url (line 167) | def get_diff_url(self, seq):
Condensed preview — 39 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (138K chars).
[
{
"path": ".github/dependabot.yml",
"chars": 207,
"preview": "version: 2\nupdates:\n - package-ecosystem: \"docker\"\n directory: \"/\"\n schedule:\n interval: \"daily\"\n\n - packag"
},
{
"path": ".github/workflows/build-container.yml",
"chars": 2130,
"preview": "name: Build and push container image\n\non:\n push:\n branches: [ main ]\n pull_request:\n branches: [ main ]\n releas"
},
{
"path": ".github/workflows/codeql.yml",
"chars": 994,
"preview": "name: Scan for security problems with CodeQL\n\non:\n push:\n branches: [ \"main\" ]\n pull_request:\n branches: [ \"main"
},
{
"path": ".gitignore",
"chars": 206,
"preview": "CMakeCache.txt\nCMakeFiles\n*.swp\n*.osmx\n*.osmx-lock\nMakefile\n*.pbf\n*.cmake\nosmxTest\nvenv\ndepends\na.out\n*.osc\n*.osc.gz\n__p"
},
{
"path": ".gitmodules",
"chars": 106,
"preview": "[submodule \"vendor/s2geometry\"]\n\tpath = vendor/s2geometry\n\turl = https://github.com/google/s2geometry.git\n"
},
{
"path": "CMakeLists.txt",
"chars": 5130,
"preview": "cmake_minimum_required (VERSION 3.5)\nset(CMAKE_C_COMPILER \"/usr/bin/clang\")\nset(CMAKE_CXX_COMPILER \"/usr/bin/clang++\")\np"
},
{
"path": "Dockerfile",
"chars": 1253,
"preview": "FROM alpine:3.22 AS builder\n\n# TODO: Add croaring-dev once available in Alpine Linux.\n# https://gitlab.alpinelinux.org/a"
},
{
"path": "LICENSE.md",
"chars": 1383,
"preview": "Copyright 2019 Protomaps. Some source code from https://github.com/osmcode/pyosmium Copyright (c) 2014-2018, Sarah Hoffm"
},
{
"path": "README.md",
"chars": 4809,
"preview": "# OSM Express\n\n\n\n[Manual](docs/MANUAL.md), [Programming Guide](docs/PROGRAMMING_GU"
},
{
"path": "dist/archive.sh",
"chars": 919,
"preview": "#!/bin/bash\nset -e\nFILENAME=dist/osmexpress-$1-$2.tgz\nrm -f LICENSES\nprintf \"osmexpress\\n===========\\n\" >> LICENSES\ncat "
},
{
"path": "docs/MANUAL.md",
"chars": 9647,
"preview": "**OSM Express** is a database file format for OpenStreetMap data (.osmx), as well as a command line tool and C++ library"
},
{
"path": "docs/PROGRAMMING_GUIDE.md",
"chars": 1957,
"preview": "## Building from source\n\nOSM Express uses CMake for its build scripts. It's only been tested with the Clang C++ compiler"
},
{
"path": "examples/.gitignore",
"chars": 17,
"preview": "way_wkt\nbbox_wkt\n"
},
{
"path": "examples/CMakeLists.txt",
"chars": 1494,
"preview": "cmake_minimum_required (VERSION 3.5)\nset(CMAKE_CXX_FLAGS_RELEASE \"-O3\")\nset(CMAKE_CXX_FLAGS_DEBUG \"-DDEBUG -g\")\nset(CMAK"
},
{
"path": "examples/bbox_wkt.cpp",
"chars": 3358,
"preview": "#include <vector>\n#include <iomanip>\n#include \"osmx/storage.h\"\n#include \"osmx/util.h\"\n#include \"s2/s2latlng.h\"\n#include "
},
{
"path": "examples/way_wkt.cpp",
"chars": 1563,
"preview": "#include <vector>\n#include <iomanip>\n#include \"osmx/storage.h\"\n#include \"osmx/util.h\"\n\nusing namespace std;\n\n// Example "
},
{
"path": "include/osmx/cmd.h",
"chars": 121,
"preview": "void cmdExpand(int argc, char* argv[]);\nvoid cmdExtract(int argc, char* argv[]);\nvoid cmdUpdate(int argc, char* argv[]);"
},
{
"path": "include/osmx/messages.capnp",
"chars": 547,
"preview": "@0xd3a7e843a9c03421;\n\nstruct Metadata {\n version @0 :UInt32;\n timestamp @1 :UInt64;\n changeset @2 :UInt32;\n uid @3 :"
},
{
"path": "include/osmx/region.h",
"chars": 530,
"preview": "#include <string>\n\n#include <nlohmann/json.hpp>\n#include \"s2/s2region.h\"\n#include \"s2/s2cell_union.h\"\n#include \"s2/s2reg"
},
{
"path": "include/osmx/storage.h",
"chars": 2426,
"preview": "#pragma once\n#include \"lmdb.h\"\n#include \"osmium/osm/location.hpp\"\n#include \"kj/io.h\"\n#include \"capnp/message.h\"\n#include"
},
{
"path": "include/osmx/util.h",
"chars": 1191,
"preview": "#pragma once\n#include <chrono>\n#include <iostream>\n#include \"lmdb.h\"\n#include \"osmium/tags/taglist.hpp\"\n\n#define CHECK_L"
},
{
"path": "python/.gitignore",
"chars": 22,
"preview": "build\ndist\n*.egg-info\n"
},
{
"path": "python/README.md",
"chars": 932,
"preview": "A Python package to read OSM Express (.osmx) database files. \n\n## Installation\n\n```bash\npip install osmx\n```\n\n## Usage\n\n"
},
{
"path": "python/examples/augmented_diff.py",
"chars": 14081,
"preview": "from collections import namedtuple\nfrom datetime import datetime\nimport copy\nimport sys\nimport xml.etree.ElementTree as "
},
{
"path": "python/examples/read_way.py",
"chars": 530,
"preview": "import sys\nimport osmx\n\nif len(sys.argv) <= 1:\n print(\"Usage: read_way.py OSMX_FILE WAY_ID\")\n exit(1)\n\nenv = osmx"
},
{
"path": "python/examples/web_server.py",
"chars": 2910,
"preview": "import json\nimport sys\nfrom http.server import BaseHTTPRequestHandler, HTTPServer\nimport osmx\n\nif len(sys.argv) <= 1:\n "
},
{
"path": "python/osmx/__init__.py",
"chars": 19,
"preview": "from .osmx import *"
},
{
"path": "python/osmx/messages.capnp",
"chars": 547,
"preview": "@0xd3a7e843a9c03421;\n\nstruct Metadata {\n version @0 :UInt32;\n timestamp @1 :UInt64;\n changeset @2 :UInt32;\n uid @3 :"
},
{
"path": "python/osmx/osmx.py",
"chars": 3307,
"preview": "import sys\nimport os\nimport lmdb\nimport capnp\n\ncapnp.remove_import_hook()\nmessages_capnp = capnp.load(os.path.join(os.pa"
},
{
"path": "python/setup.py",
"chars": 819,
"preview": "import setuptools\n\nwith open(\"README.md\", \"r\") as fh:\n long_description = fh.read()\n\nrequirements = [\n 'lmdb~=1.4."
},
{
"path": "src/cmd.cpp",
"chars": 4187,
"preview": "#include <vector>\n#include \"osmx/storage.h\"\n#include \"osmx/cmd.h\"\n#include \"osmx/util.h\"\n\nusing namespace std;\nusing nam"
},
{
"path": "src/expand.cpp",
"chars": 9549,
"preview": "#include <iomanip>\n#include <fstream>\n#include \"osmium/handler.hpp\"\n#include \"osmium/visitor.hpp\"\n#include \"osmium/io/an"
},
{
"path": "src/extract.cpp",
"chars": 16054,
"preview": "#include <string>\n#include <fstream>\n#include \"s2/s2latlng.h\"\n#include \"s2/s2region_coverer.h\"\n#include \"s2/s2latlng_rec"
},
{
"path": "src/region.cpp",
"chars": 5369,
"preview": "#include <sstream>\n#include <iostream>\n#include \"s2/s2latlng.h\"\n#include \"s2/s2latlng_rect.h\"\n#include \"s2/s2cap.h\"\n#inc"
},
{
"path": "src/storage.cpp",
"chars": 6787,
"preview": "#include \"osmx/storage.h\"\n#include \"osmx/util.h\"\n\nnamespace osmx { namespace db {\n\n\nMDB_env *createEnv(std::string path,"
},
{
"path": "src/update.cpp",
"chars": 12954,
"preview": "#include <iostream>\n#include <cassert>\n#include <set>\n#include \"cxxopts.hpp\"\n#include \"osmium/handler.hpp\"\n#include \"osm"
},
{
"path": "test/test_region.cpp",
"chars": 5048,
"preview": "#include \"catch2/catch_test_macros.hpp\"\n#include \"s2/s2latlng.h\"\n#include \"osmx/region.h\"\n\nusing namespace std;\n\n// osmi"
},
{
"path": "utils/osmx-update",
"chars": 1583,
"preview": "#!/usr/bin/env python\n\nfrom datetime import datetime, timezone\nimport subprocess\nimport tempfile\nimport os\nimport sys\nim"
},
{
"path": "utils/server.py",
"chars": 6565,
"preview": "\"\"\" Helper functions to communicate with replication servers.\nderived from https://github.com/osmcode/pyosmium\n\"\"\"\n\nimpo"
}
]
About this extraction
This page contains the full source code of the protomaps/OSMExpress GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 39 files (128.2 KB), approximately 35.4k tokens, and a symbol index with 105 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.