Repository: atom-archive/xray
Branch: master
Commit: cb6c5809f18c
Files: 141
Total size: 1.1 MB
Directory structure:
gitextract_hfrilqj1/
├── .gitignore
├── .travis.yml
├── CONTRIBUTING.md
├── Cargo.toml
├── LICENSE
├── README.md
├── docs/
│ ├── architecture/
│ │ ├── 001_client_server_protocol.md
│ │ ├── 002_shared_workspaces.md
│ │ └── 003_memo_epochs.md
│ └── updates/
│ ├── 2018_03_05.md
│ ├── 2018_03_12.md
│ ├── 2018_03_19.md
│ ├── 2018_03_26.md
│ ├── 2018_04_02.md
│ ├── 2018_04_09.md
│ ├── 2018_04_16.md
│ ├── 2018_04_23.md
│ ├── 2018_04_30.md
│ ├── 2018_05_07.md
│ ├── 2018_05_14.md
│ ├── 2018_05_28.md
│ ├── 2018_07_10.md
│ ├── 2018_07_16.md
│ ├── 2018_07_23.md
│ ├── 2018_07_31.md
│ ├── 2018_08_21.md
│ ├── 2018_08_28.md
│ ├── 2018_09_14.md
│ └── 2018_10_02.md
├── memo_core/
│ ├── Cargo.toml
│ ├── README.md
│ ├── rustfmt.toml
│ ├── script/
│ │ └── compile_flatbuffers
│ └── src/
│ ├── btree.rs
│ ├── buffer.rs
│ ├── epoch.rs
│ ├── lib.rs
│ ├── operation_queue.rs
│ ├── serialization/
│ │ ├── mod.rs
│ │ ├── schema.fbs
│ │ └── schema_generated.rs
│ ├── time.rs
│ └── work_tree.rs
├── memo_js/
│ ├── .npmignore
│ ├── .nvmrc
│ ├── Cargo.toml
│ ├── README.md
│ ├── package.json
│ ├── rustfmt.toml
│ ├── script/
│ │ └── build
│ ├── src/
│ │ ├── index.ts
│ │ ├── lib.rs
│ │ └── support.ts
│ ├── test/
│ │ ├── tests.ts
│ │ └── tsconfig.json
│ ├── tsconfig.json
│ └── webpack.config.js
├── rust-toolchain
├── script/
│ ├── bench
│ ├── build
│ ├── cibuild
│ └── test
├── xray_browser/
│ ├── README.md
│ ├── package.json
│ ├── script/
│ │ ├── build
│ │ └── server
│ ├── src/
│ │ ├── client.js
│ │ ├── ui.js
│ │ └── worker.js
│ └── static/
│ └── index.html
├── xray_cli/
│ ├── Cargo.toml
│ ├── README.md
│ └── src/
│ └── main.rs
├── xray_core/
│ ├── Cargo.toml
│ ├── README.md
│ ├── benches/
│ │ └── bench.rs
│ └── src/
│ ├── app.rs
│ ├── buffer.rs
│ ├── buffer_view.rs
│ ├── cross_platform.rs
│ ├── file_finder.rs
│ ├── fs.rs
│ ├── fuzzy.rs
│ ├── lib.rs
│ ├── movement.rs
│ ├── never.rs
│ ├── notify_cell.rs
│ ├── project.rs
│ ├── rpc/
│ │ ├── client.rs
│ │ ├── messages.rs
│ │ ├── mod.rs
│ │ └── server.rs
│ ├── stream_ext.rs
│ ├── tree.rs
│ ├── wasm_logging.rs
│ ├── window.rs
│ └── workspace.rs
├── xray_electron/
│ ├── .gitignore
│ ├── README.md
│ ├── index.html
│ ├── lib/
│ │ ├── main_process/
│ │ │ └── main.js
│ │ ├── render_process/
│ │ │ └── main.js
│ │ └── shared/
│ │ └── xray_client.js
│ └── package.json
├── xray_server/
│ ├── Cargo.toml
│ ├── README.md
│ └── src/
│ ├── fs.rs
│ ├── json_lines_codec.rs
│ ├── main.rs
│ ├── messages.rs
│ └── server.rs
├── xray_ui/
│ ├── README.md
│ ├── lib/
│ │ ├── action_dispatcher.js
│ │ ├── app.js
│ │ ├── debounce.js
│ │ ├── file_finder.js
│ │ ├── index.js
│ │ ├── modal.js
│ │ ├── text_editor/
│ │ │ ├── shaders.js
│ │ │ ├── text_editor.js
│ │ │ └── text_plane.js
│ │ ├── theme_provider.js
│ │ ├── view.js
│ │ ├── view_registry.js
│ │ └── workspace.js
│ ├── package.json
│ └── test/
│ ├── action_dispatcher.test.js
│ ├── file_finder.test.js
│ ├── helpers/
│ │ └── component_helpers.js
│ ├── modal.test.js
│ ├── view.test.js
│ └── view_registry.test.js
└── xray_wasm/
├── .gitignore
├── Cargo.toml
├── lib/
│ ├── main.js
│ └── support.js
├── package.json
├── script/
│ ├── build
│ └── test
├── src/
│ └── lib.rs
└── test/
└── tests.js
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
**/node_modules
**/target/
**/*.rs.bk
**/.DS_Store
**/.cargo
Icon*
.tags*
xray_wasm/dist
xray_browser/dist
memo_js/dist
memo_js/test/dist
================================================
FILE: .travis.yml
================================================
language: rust
before_install:
- curl -o- -L https://yarnpkg.com/install.sh | bash
- export PATH="$HOME/.yarn/bin:$PATH"
- curl -o- https://raw.githubusercontent.com/creationix/nvm/v0.33.11/install.sh | bash
- nvm install v11
# Create a virtual display for electron
- export DISPLAY=':99.0'
- Xvfb :99 -screen 0 1024x768x24 > /dev/null 2>&1 &
script: script/cibuild
cache:
cargo: true
yarn: true
branches:
only:
- master
notifications:
email:
on_success: never
on_failure: change
================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to Xray
This project is still in the very early days, and isn't going to be usable for even basic editing for some time. At this point, we're looking for contributors that are willing to roll up their sleeves and solve problems. Please communicate with us however it makes sense, but in general opening a *pull request that fixes an issue* is going to be far more valuable than just reporting an issue.
As the architecture stabilizes and the surface area of the project expands, there will be increasing opportunities to help out. To get some ideas for specific projects that could help in the short term, check out [issues that are labeled "help wanted"](https://github.com/atom/xray/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22). If you have an idea you'd like to pursue outside of these, that's awesome, but you may want to discuss it with us in an issue first to ensure it fits before spending too much time on it.
It's really important to us to have a smooth on-ramp for contributors, and one great way you can contribute is by helping us improve this guide. If your experience is bumpy, can you open a pull request that makes it smoother for the next person?
## Communicating with maintainers
The best way to communicate with maintainers is by posting a issue to this repository. The more thought you put into articulating your question or idea, the more value you'll be adding to the community and the easier it will be for maintainers to respond. That said, just try your best. If you have something you want to say, we'd prefer that you say it imperfectly rather than not saying it at all.
You can also communicate with maintainers or other community members on the `#xray` channel on Atom's public slack instance. After you [request an invite via this form](http://atom-slack.herokuapp.com/), you can access our Slack instance at https://atomio.slack.com.
## Building
So far, we have only built this project on macOS. If you'd like to help us improve our build or documentation to support other platforms, that would be a huge help!
### Install system dependencies
#### Install Node v8.9.3
To install Node, you can install [`nvm`](https://github.com/creationix/nvm) and then run `nvm install v8.9.3`.
Later versions may work, but you should ideally run the build with the same version of Node that is bundled into Xray's current Electron dependency. If in doubt, you can check the version of the `electron` dependency in [`xray_electron/package.json`](https://github.com/atom/xray/blob/master/xray_electron/package.json), then run `process.versions.node` in the console of that version of Electron to ensure that these instructions haven't gotten out of date.
#### Install Rust
You can install Rust via [`rustup`](https://www.rustup.rs/). We currently require building on the nightly channel in order to use `wasm_bindgen` for browser support.
#### Install Yarn
Follow the [installation instructions](https://yarnpkg.com/en/docs/install) on the Yarn site.
### Run the build script
This repository contains several components in top-level folders prefixed with `xray_*`. To build all of the components, simply run this in the root of the repository:
```sh
script/build
```
To build a release version (which will be much faster):
```sh
script/build --release
```
## Running
We currently *only* support launching the application via the CLI. For this to work, you need to set the `XRAY_SRC_PATH` environment variable to the location of your repository. The CLI also currently *requires* an argument:
```sh
XRAY_SRC_PATH=. script/xray .
```
That assumes you built with `--release`. To run the debug version, use `xray_debug` instead:
```sh
XRAY_SRC_PATH=. script/xray_debug .
```
Once a blank window has opened, press cmd-t to open the file selection menu. Search for a file, and press enter to open it. The contents of the file should appear in the window. If something does not go as expected, check the dev tools (cmd-shift-i) for errors.
## Running tests and benchmarks
* All tests: `script/test`
* Rust tests: `cargo test` in the root of the repository or a Rust subfolder.
* Front-end tests: `cd xray_ui && yarn test`
* Benchmarks: `cargo bench`
================================================
FILE: Cargo.toml
================================================
[workspace]
members = [
"memo_core",
"memo_js",
"xray_core",
"xray_server",
"xray_cli",
"xray_wasm",
]
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2018 GitHub
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
**Attention:** GitHub has decided not to move forward with any aspect of this project. We'll archive the repository in case anybody finds value here, but we don't expect to actively work on this in the foreseeable future. Thanks to everyone for their interest and support.
# Xray
[](https://travis-ci.org/atom/xray)
Xray is an experimental Electron-based text editor informed by what we've learned in the four years since the launch of Atom. In the short term, this project is a testbed for rapidly iterating on several radical ideas without risking the stability of Atom. The longer term future of the code in this repository will become clearer after a few months of progress. For now, our primary goal is to iterate rapidly and learn as much as possible.
## Q3 2018 Focus
We're currently focused on a sub-project of Xray called [Memo](./memo_core), which will serve as the foundation of Xray but also be available as a standalone tool. Memo is an operation-based version control system that tracks changes at the level of individual keystrokes and synchronizes branches in real time.
## Updates
* [October 2, 2018](./docs/updates/2018_10_02.md)
* [September 14, 2018](./docs/updates/2018_09_14.md)
* [August 28, 2018](./docs/updates/2018_08_28.md)
* [August 21, 2018](./docs/updates/2018_08_21.md)
* [July 31, 2018](./docs/updates/2018_07_31.md)
* [July 23, 2018](./docs/updates/2018_07_23.md)
* [July 16, 2018](./docs/updates/2018_07_16.md)
* [July 10, 2018](./docs/updates/2018_07_10.md)
* [Archives](./docs/updates/)
## Foundational priorities
Our goal is to build a cross-platform text editor that is designed from the beginning around the following foundational priorities:
### Collaboration
*Xray makes it as easy to code together as it is to code alone.*
We design features for collaborative use from the beginning. Editors and other relevant UI elements are designed to be occupied by multiple users. Interactions with the file system and other resources such as subprocesses are abstracted to work over network connections.
### High performance
*Xray feels lightweight and responsive.*
We design our features to be responsive from the beginning. We reliably provide visual feedback within the latency windows suggested by the [RAIL performance model](https://developers.google.com/web/fundamentals/performance/rail). For all interactions, we shoot for the following targets on the hardware of our median user:
| Duration | Action |
| - | - |
| 8ms | Scrolling, animations, and fine-grained interactions such as typing or cursor movement. |
| 50ms | Coarse-grained interactions such as opening a file or initiating a search. If we can't complete the action within this window, we should show a progress bar. |
| 150ms | Opening an application window. |
We are careful to maximize throughput of batch operations such as project-wide search. Memory consumption is kept within a low constant factor of the size of the project and open buffer set, but we trade memory for speed and extensibility so long as memory requirements are reasonable.
### Extensibility
*Xray gives developers control over their own tools.*
We expose convenient and powerful APIs to enable users to add non-trivial functionality to the application. We balance the power of our APIs with the ability to ensure the responsiveness, stability, and security of the application as a whole. We avoid leaking implementation details and use versioning where possible to enable a sustained rapid development without destabilizing the package ecosystem.
### Web compatibility
*Editing on GitHub feels like editing in Xray.*
We want to provide a full-featured editor experience that can be used from within a browser. This will ultimately help us provide a more unified experience between GitHub.com and Xray and give us a stronger base of stakeholders in the core editing technology.
## Architecture
Martin Fowler defines software architecture as those decisions which are both important and hard to change. Since these decisions are hard to change, we need to be sure that our foundational priorities are well-served by these decisions.

### The UI is built with web technology
Web tech adds a lot of overhead, which detracts from our top priority of high-performance. However, web standards are also the best approach that we know of to deliver a cross-platform, extensible user interface. Atom proved that developers want to add non-trivial UI elements to their editor, and we still see web technologies as the most viable way to offer them that ability.
The fundamental question is whether we can gain the web's benefits for extensibility while still meeting our desired performance goals. Our hypothesis is that it's possible–with the right architecture.
### Core application logic is written in Rust
While the UI will be web-based, the core of the application is implemented in a server process written in Rust. We place as much logic as possible in a library crate located in `/xray_core`, then expose this logic as a server when running Xray on the desktop (`/xray_server`) and a web-assembly library running on a worker thread when running Xray in the browser (`/xray_wasm`). We communicate between the UI and the back end process via JSON RPC.
All of the core application code other than the view logic should be written in Rust. This will ensure that it has a minimal footprint to load and execute, and Rust's robust type system will help us maintain it more efficiently than dynamically typed code. A language that is fundamentally designed for multi-threading will also make it easier to exploit parallelism whenever the need arises, whereas JavaScript's single-threaded nature makes parallelism awkward and challenging.
Fundamentally, we want to spend our time writing in a language that is fast by default. It's true that it's possible to write slow Rust, and also possible to write fast JavaScript. It's *also* true that it's much harder to write slow Rust than it is to write slow JavaScript. By spending fewer resources on the implementation of the platform itself, we'll make more resources available to run package code.
### I/O will be centralized in the server
The server will serialize buffer loads and saves on a per-path basis, and maintains a persistent database of CRDT operations for each file. As edits are performed in windows, they will be streamed to the host process to be stored and echoed out to any other windows with the same open buffer. This will enable unsaved changes to always be incrementally preserved in case of a crash or power failure and preserves the history associated with a file indefinitely.
Early on, we should design the application process to be capable of connecting to multiple workspace servers to facilitate real-time collaboration or editing files on a remote server by running a headless host process. To support these use cases, all code paths that touch the file system or spawn subprocesses will occur in the server process. The UI will not make use of the I/O facilities provided by Electron, and instead interact with the server via RPC.
### Packages will run in a JavaScript VM in the server process
A misbehaving package should not be able to impact the responsiveness of the application. The best way to guarantee this while preserving ease of development is to activate packages on their own threads. We can run a worker thread per package or run packages in their own contexts across a pool of threads.
Packages *can* run code on the render thread by specifying versioned components in their `package.json`.
```json
"components": {
"TodoList": "./components/todo-list.js"
}
```
If a package called `my-todos` had the above entry in its `package.json`, it could request that the workspace attach that component by referring to `myTodos.TodoList` when adding an item. During package installation on the desktop, we can automatically update the V8 snapshot of the UI to include the components of every installed package. Components will only be dynamically loaded from the provided paths in development mode.
Custom views will only have access to the DOM and an asynchronous channel to communicate with the package's back end running on the server. APIs for interacting with the core application state and the underlying operating system will only be available within the server process, discouraging package authors from putting too much logic into their views. We'll use a combination of asynchronous channels and CRDTs to present convenient APIs to package authors within worker threads.
### Text is stored in a copy-on-write CRDT
To fully exploit Rust's unique advantage of parallelism, we need to store text in a concurrency-friendly way. We use a variant of RGA called RGASplit, which is described in [this research paper](https://pages.lip6.fr/Marc.Shapiro/papers/rgasplit-group2016-11.pdf).

In RGA split, the document is stored as a sequence of insertion fragments. In the example above, the document starts as just a single insertion containing `hello world`. We then introduce `, there` and `!` as additional insertions, splitting the original insertion into two fragments. To delete the `ld` at the end of `world` in the third step, we create another fragment containing just the `ld` and mark it as deleted with a tombstone.
Structuring the document in this way has a number of advantages.
* Real-time collaboration works out of the box
* Concurrent edits: Any thread can read or write its own replica of the document without diverging in the presence of concurrent edits.
* Integrated non-linear history: To undo any group of operations, we increment an undo counter associated with any insertions and deletions that controls their visibility. This means we only need to store operation ids in the history rather than operations themselves, and we can undo any operation at any time rather than adhering to historical order.
* Stable logical positions: Instead of tracking the location of markers on every edit, we can refer to stable positions that are guaranteed to be valid for any future buffer state. For example, we can mark the positions of all search results in a background thread and continue to interpret them in a foreground thread if edits are performed in the meantime.
Our use of a CRDT is similar to the Xi editor, but the approach we're exploring is somewhat different. Our current understanding is that in Xi, the buffer is stored in a rope data structure, then a secondary layer is used to incorporate edits. In Xray, the fundamental storage structure of all text is itself a CRDT. It's similar to Xi's rope in that it uses a copy-on-write B-tree to index all inserted fragments, but it does not require any secondary system for incorporating edits.
### Derived state will be computed asynchronously
We should avoid implementing synchronous APIs that depend on open-ended computations of derived state. For example, when soft wrapping is enabled in Atom, we synchronously update a display index that maps display coordinates to buffer coordinates, which can block the UI.
In Xray, we want to avoid making these kinds of promises in our API. For example, we will allow the display index to be accessed synchronously after a buffer edit, but only provide an interpolated version of its state that can be produced in logarithmic time. This means it will be spatially consistent with the underlying buffer, but may contain lines that have not yet been soft-wrapped.
We can expose an asynchronous API that allows a package author to wait until the display layer is up to date with a specific version of the buffer. In the user interface, we can display a progress bar for any derived state updates that exceed 50ms, which may occur when the user pastes multiple megabytes of text into the editor.
### React will be used for presentation
By using React, we completely eliminate the view framework as a concern that we need to deal with and give package authors access to a tool they're likely to be familiar with. We also raise the level of abstraction above basic DOM APIs. The risk of using React is of course that it is not standardized and could have breaking API changes. To mitigate this risk, we will require packages to declare which version of React they depend on. We will attempt using this version information to provide shims to older versions of React when we upgrade the bundled version. When it's not possible to shim breaking changes, we'll use the version information to present a warning.
### Styling will be specified in JS
CSS is a widely-known and well-supported tool for styling user interfaces, which is why we embraced it in Atom. Unfortunately, the performance and maintainability of CSS degrade as the number of selectors increases. CSS also lacks good tools for exposing a versioned theming API and applying programmatic logic such as altering colors. Finally, the browser does not expose APIs for being notified when computed styles change, making it difficult to use CSS as a source of truth for complex components. For a theming system that performs well and scales, we need more direct control. We plan to use a CSS-in-JS approach that automatically generates atomic selectors so as to keep our total number of selectors minimal.
### Text is rendered via WebGL
In Atom, the vast majority of computation of any given frame is spent manipulating the DOM, recalculating styles, and performing layout. To achieve good text rendering performance, it is critical that we bypass this overhead and take direct control over rendering. Like Alacritty and Xi, we plan to employ OpenGL to position quads that are mapped to glyph bitmaps in a texture atlas.
There isn't always a 1:1 relationship between code units inside a JavaScript string and glyphs on screen. Characters (code points) can be expressed as two 16-bit units, but this situation is simple to detect by examining the numeric ranges of the code units. In other cases, the correspondence between code units and glyphs is less straightforward to determine. If the current font and/or locale depends on ligatures or contextual alternates to render correctly, determining the correspondence between code points and glyphs requires support for complex text shaping that references metadata embedded in the font. Bi-directional text complicates the situation further.
For now, our plan is to detect the presence of characters that may require such complex text shaping and fall back to rendering with HTML on the specific lines that require these features. This will enable us to support scripts such as Arabic and Devanagari. For fonts like FiraCode, which include ligatures for common character sequences used in programming, we'll need a different approach. One idea would be to perform a limited subset of text-shaping that just handles ligatures, so as to keep performance high. Another approach that would only work on the desktop would be to use the platform text-shaping and rasterization APIs in this environment.
Bypassing the DOM means that we'll need to implement styling and text layout ourselves. That is a high price to pay, but we think it will be worth it to bypass the performance overhead imposed by the DOM.
## Development process
### Experiment
At this phase, this code is focused on learning. Whatever code we write should be production-quality, but we don't need to support everything at this phase. We can defer features that don't contribute substantially to learning.
### Documentation-driven development
Before coding, we ask ourselves whether the code we're writing can be motivated by something that's written in the guide. The right approach here will always be a judgment call, but let's err on the side of transparency and see what happens.
### Disciplined monorepo
All code related to Xray should live in this repository, but intra-repository dependencies should be expressed in a disciplined way to ensure that a one-line docs change doesn't require us to rebuild the world. Builds should be finger-printed on a per-component basis and we should aim to keep components granular.
## Contributing
Interested in helping out? Welcome! Check out the [CONTRIBUTING](./CONTRIBUTING.md) guide to get started.
================================================
FILE: docs/architecture/001_client_server_protocol.md
================================================
# Xray's client/server protocol
Xray is organized around a client/server architecture, with all the application logic located in a central server. User-facing components connect to this server as clients to present the user experience.
## Major application components

All application logic is controlled by a single server that listens on a domain socket located at `ATOM_SOCKET_PATH`. We connect to the server with three different types of clients:
* **CLI:** When you run the `xray` binary, we will check if a socket for the server already exists and is listening. If it does, we will connect to this socket and communicate with the server directly. For example, the application may already be running, but we want to open a new workspace for a given path. To do that, we just connect to the existing socket and send it an `OpenWorkspace` message. If the CLI is unable to connect to the socket, it spawns the Electron app and waits for it to report that it is `Listening\n` on `stdout`.
* **App:** The Electron app in `xray_electron` spawns the server as a child process on startup and identifies itself as the application client via the `{type: "StartApp"}` message. The server then sends the app client application-level command messages like the `OpenWindow` message, which tells the app to open a new window.
* **Window:** When the server tells the app to open a window, it provides a window id, which gets passed to the Electron window in the URL. Once the window loads, it connects to the server's socket and identifies itself as a window, supplying this id.
## The window protocol

The protocol between the window and the server is inspired by the [Flux application architecture](https://facebook.github.io/flux/), though it's probably different in some ways due to the particular needs of Xray.
The state of the UI for any given window is managed entirely by the server. It creates a `Window` object for each connected window, and this `Window` object is responsible for managing a tree of views to be rendered by the connected client. Each view is associated with a unique identifier, a component name, and a plain-old JS object representing the view's state. Views can refer to *other* views via their id.
When views are added and removed from the `Window` object on the server side, updates are automatically relayed to the client. The server calls `render()` on any newly added views to obtain a JSON object representing the view's state. The window also observes an `updates()` stream associated with each view, and sends a new update for a view's state if the view becomes dirty. To keep things simple, each time a view is updated, its entire state tree is sent again across the wire. For this reason, it's important to limit the size of each view's state object to avoid transmission and parsing overhead. Since the required data for a given view is naturally limited by the viewport, this should be acceptable. We may switch to protocol buffers if JSON parsing overhead becomes a bottleneck.
The root view of a typical window is a `WorkspaceView` with an id of `0`. Its props refer to other views that are displayed in the workspace via their id. For example, the workspace may contain a `BufferView` (editor) with id 1, and also be presenting a `FileFinderView` with id 2 as a modal panel. When views are added to the `Window`, they are provided with a `WindowHandle` via the optional `did_mount` method that allows them to add additional sub-views to the window. When a view adds a sub-view, it receives a `ViewHandle`. When this handle is dropped, the sub-view is automatically removed from the `Window` and deleted on the client.
In the render process, we maintain a `ViewRegistry` which mirrors the state of the `Window` in the server process. The `ViewRegistry` contains an imperative interface for fetching the component and props associated with a particular view id, although most code will interface with the registry declaratively via special React components.
The render process communicates changes to the server via *actions*, which are plain-old JS objects that can be dispatched to a particular view id. These actions are dispatched from the `ViewRegistry` on the render process and make their way to the server, where they are routed by the `Window` object. `Window` calls `dispatch_action` on the view corresponding to the action's specified `view_id`, passing the JSON to the view for handling.
Views can handle an action by updating their own state or the state of other model objects. The `Window` detects state changes via the `updates` stream of any current views, then sends these updates to the client.
The server can also tell the window to *focus* a particular view by calling the `focus` method on its top-level React component. This can be accessed via the `ViewHandle::focus` method on the server side. These commands are simply relayed to the client. The server has no explicit model of focus state.
## Detecting when views need to be re-rendered
Each view is associated with an `updates` stream, which is implemented with the Rust [`futures`](https://docs.rs/futures/0.2.0-alpha/futures/) crate. A full explanation of Rust futures is beyond the scope of this document, but their poll-oriented nature is relevant to this use case.
The `Window` object represents the messages that need to be sent to the client as a `Stream` called `WindowUpdateStream`. This stream implements `poll`, which checks dirty sets for inserted and removed views, then calls `poll` on the updates stream of all currently-visible views. Any views returning `Async::Ready` are then rendered and added to the next update to be sent to the client.
If polling every visible view on each poll of the window update stream turns out to have too much overhead, we can always employ a similar strategy to the `FuturesUnordered` object and track notifications in a more fine-grained way. However, since we're anticipating rendering far fewer than 100 discrete views at any one time, we don't think polling everything should be an issue.
One cool feature of the stream-oriented approach for detecting individual view updates is that a given view's update stream could be composed from other streams in fairly complex ways. For example, the updates stream of a `BufferView` (editor) could derive from a `NotifyCell` on the view itself, plus an updates stream on the view's `Buffer`, which it could share with other `BufferView`s.
## Declarative interface on the client
To consume view state on the client, we implement a `ViewRegistry` that allows you to get the component and props for any view id, and also watch those props for updates.
The `ViewRegistry`'s imperative API is wrapped in a component-oriented interface. At the root of the component hierarchy is the `App` component, which injects the view registry into the component tree's [context](https://reactjs.org/docs/context.html). Beneath the `App` component, the `View` component can be used to render a view with a specific id.
The `View` component takes the view's `id` as a property, then retrieves the view's component and props from the registry and renders the component as a child. It also sets up an observer on the view's props, re-rendering its child component with the new properties when they change. Finally, the `View` component passes a `dispatch` method as a property to the child component, giving it the ability to send arbitrary actions as plain JS objects back to the view's server-side representation.
================================================
FILE: docs/architecture/002_shared_workspaces.md
================================================
# Shared workspaces
## Current features
An instance of `xray_server` can host one or more shared workspaces, which can be accessed by other `xray_server` instances over the network. Currently, when connecting to a remote peer, we automatically open its first shared workspace in a window on the client. The client can use the file finder to locate and open any file in the shared workspace's project. When multiple participants open a buffer for the same file, their edits are replicated to other collaborators in real time.
### Server
* `xray foo/ bar/ --listen 8888` starts the Xray server listening on port 8888.
* The `--headless` flag can be passed to create a server that only hosts workspaces for other clients and does not present its own UI.
### Basic client experience
* `xray --connect hostname:port` opens a new window that is connected to the first workspace available on the remote host.
* `cmd-t` in the new window searches through paths in the remote workspace.
* Selecting a path opens it.
* Running `xray --connect` from a second instance allows for collaborative editing when clients open the same buffer.
### Selecting between multiple workspaces on the client
* If the host exposes multiple workspaces, `xray --connect hostname:port` opens an *Open Workspace* dialog that allows the user to select which workspace to open.
* `cmd-o` in any Xray window opens the *Open Workspace* dialog listing workspaces from all connected servers.
## RPC System
We implement shared workspaces on top of an RPC system that allows objects on the client to derive their state and behavior from objects that live on the server.
### Goals
#### Support replicated objects
The primary goal of the system is to support the construction of a replicated object-oriented domain model. In addition to supporting remote procedure calls, we also want the system to explicitly support long-lived, stateful objects that change over time.
Replication support should be fairly additive, meaning that the domain model on the server side should be designed pretty much as if it weren't replicated. On the client side, interacting with representations of remote objects should be explicit but convenient.
#### Capabilities-based security
Secure ECMA Script and Cap'N Proto introduced me to the concept of capabilities-based security, and our system adopts the same philosophy. Objects on the server are exposed via *services*, which can be thought of as "capabilities" that grant access to a narrow slice of functionality that is dynamically defined. Starting from a single root service, remote users are granted increasing access by being provided with additional capabilities.
#### Dynamic resource management
Server-side services only need to live as long as they are referenced by a client. Server-side code can elect to retain a reference to a service. Otherwise, ownership is maintained by clients over the wire. If both the server and the client drop their reference-counted handle to a service, we should drop the service on the server side automatically.
#### Binary messages
We want to move data efficiently between the server and client, so a binary encoding scheme for messages is important. For now, we're using bincode for convenience, but we should eventually switch to Protocol Buffers to support graceful evolution of the protocol.
### Design

**Services** are the fundamental abstraction of the system.
In `rpc::server`, `Service` is a *trait* that can be implemented by a custom service wrapper for each domain object that makes the object accessible to remote clients. A `Service` exposes a static snapshot of the object's initial state, a stream of updates, and the ability to handle requests. The `Service` trait has various associated types for `Request`, `Response`, `Update`, and `State`.
When server-side code accepts connections, it creates an `rpc::server::Connection` object for each client that takes ownership of the `Stream` of that client's incoming messages. `Connection`s must be created with a *root service*, which is sent to the client immediately. The `Connection` is itself a `Stream` of outgoing messages to be sent to the connected client.
On the client side, we create a connection by passing the `Stream` of incoming messages to `rpc::client::Connection::new`, which returns a *future* for a tuple containing two objects. The first object is a `rpc::client::Service` representing the *root service* that was sent from the server. The second is an instance of `client::Connection`, which is a `Stream` of outgoing messages to send to the server.
Using the root service, the client can make requests to gain access to additional services. In Xray, the root service is currently `app::AppService`, which includes a list of shared workspaces in its replicated state. After a client connects to a server, it stores a handle to its root service in a `PeerList` object. We will eventually build a `PeerListView` based on the state of the `PeerList`, which allows the user to open a remote workspace on any connected peer. For now, we automatically open the first workspace when connecting to a remote peer.
When we connect to a remote workspace, we send an `app::ServiceRequest::OpenWorkspace` message to the remote `AppService`. When handling this request in the `AppService` on the server, we call `add_service` on the connection with a `WorkspaceService` for the requested workspace, which returns us a `ServiceId` integer. We send that id to the client in the response. When handling the response on the client, we call `take_service` on root service with the id to take ownership of a handle to the remote service.
We can then create a `RemoteWorkspace` and pass it ownership of the service handle to the remote workspace. `RemoteWorkspace` and `LocalWorkspace` both implement the `Workspace` trait, which allows a `RemoteWorkspace` to be used in the system in all of the same ways that a `LocalWorkspace` can.
We create the illusion that remote domain objects are really local through a combination of state replication and remote procedure calls. Fuzzy finding on the project file trees is addressed through replication, since the data size is typically small and the task is latency sensitive. Project-wide search is implemented via RPC, since replicating the contents of the entire remote file system would be costly, especially for the in-browser use case. Buffer replication is implemented by relaying conflict-free representations of individual edit operations, which can be correctly integrated on remote replicas due to our use of a CRDT in Xray's underlying buffer implementation.
================================================
FILE: docs/architecture/003_memo_epochs.md
================================================
The following document describes the sequence of operations that we should perform when the repository HEAD changes, both on the machine where the HEAD change occurred and at remote sites that receive the resulting epoch change.
The algorithms assume that version vectors don't reset across epochs. This does raise the concern that version vectors could grow without bound over the life of the repository, but we're going to suspend that concern temporarily to make progress.
### Creating a new epoch after HEAD moves
Assume we are currently at epoch A described by Tree T.
- Scan all the entries from Git's database based on the new HEAD into a new Tree T'.
- Synthesize and apply operations for all uncommitted changes via a `git diff`. This includes file system operations as well as uncommitted changes to file contents.
- For all buffers with unsaved edits in T:
- Diff the last saved contents in T against the current contents of T' using the path of the buffer in T. This diff will describe a set of regions that have been touched outside of our control.
- Go through each of the unsaved operations in T and check if they intersect with any of the regions in this diff to detect a conflict.
- If there is a conflict, synthesize operations by performing a diff between the contents of T' and the contents of T and apply these as unsaved operations on top of T', then mark the buffer as in conflict.
- Otherwise, transform all the unsaved operations according to the initial diff and apply them to the buffer in T'.
Afterward, we broadcast a new epoch B that contains the new HEAD SHA, the work tree's current version vector, a Lamport timestamp, and all synthesized operations.
### Receiving a new epoch
* Check Lamport timestamp of the epoch. If it's less than the current epoch's timestamp, ignore it. Otherwise, proceed to change the active epoch as follows:
* Scan all entries from Git's database based on the new epoch's HEAD SHA into a new Tree T'.
* Apply operations that are associated with the new epoch to T'.
What happens to buffers?
* For all buffers containing edits not included in the epoch change's version vector:
* If a file with the same path exists in T':
* Diff the contents that are included in the version vector against the contents of T' using the path of the buffer in T. This diff will describe a set of regions that have been touched outside of our control.
* Go through each of the local edits that were not part of the version vector. If they do not directly conflict with a region in the diff, synthesize a new operation with an adjusted position based on the diff and apply it to T'.
* If no file with that path exists in T', we create it with initial contents from T.
================================================
FILE: docs/updates/2018_03_05.md
================================================
# Update for March 5, 2018
## Contributions
We received some great contributions from [@dirk](https://github.com/dirk) that improved error handling ([#5](https://github.com/atom/xray/pull/5)) and refined how we build our N-API bindings ([#7](https://github.com/atom/xray/pull/7), [#9](https://github.com/atom/xray/pull/9)). He also clarified our build process in the documentation and added an explicit Electron dependency now that the new beta supports N-API ([#10](https://github.com/atom/xray/pull/10)). Thanks @dirk!
## 12-week experiment
Our plan is to dedicate 12 full weeks to Xray and see how far we can get with the implementation. We originally planned to start this trial period 2 weeks ago, but decided to defer it in order to spend more time doing planning around our vision for real time collaboration. So last week will count as week 1 of 12. This week is week 2 of 12.
## Text shaping
We're currently rendering text with a fairly naive strategy, where we just transform code points to glyphs and position them one after another with WebGL. The great thing about this strategy is it's really fast. It takes me ~1.2 ms to render a full screen's worth of text on a late 2016 MacBook Pro. The downside of this strategy is that we don't perform correct text shaping.
Last week, we explored running all of our lines through HarfBuzz compiled as a separate WebAssembly module, but in our tests, running HarfBuzz on 50 lines of 100 characters each was taking between 4.5ms and 20ms, depending on the font. Since our target for a frame is 8ms, this makes us pretty reluctant to pursue this path further. We're a code editor, not a word processor, so it's not clear that we *need* all the features that a full-on text shaping engine provides.
If we don't do some sort of text shaping (and HarfBuzz seems like the only game in town), here's what we'll be missing:
* No ligatures support: Text shapers combine code points with tables embedded in the font to decide when to render ligatures. We're a code editor, so this isn't a deal-breaker, but fonts like Fira Code rely on ligatures to render special characters for common programming sequences such as `<=`.
* No kerning support: For fixed width fonts, we weren't able to observe any noticeable difference for a lack of kerning. For variable-width fonts like Helvetica, rendering without kerning looks a bit odd. Again, we're a code editor, so not a deal-breaker.
* No support for bi-directional text. This isn't a deal-breaker in the short term, since all of the dominant programming languages are based on latin scripts. Again, it's not our ambition to become a general word processor. Long term, however, we need to support right-to-left text appearing in strings and comments in order to be usable by developers working with languages like Arabic and Hebrew. Interestingly, Sublime Text does not appear to support bidirectional text, but we'd like to do better.
* No support for context-sensitive substitutions. In Arabic and Indic scripts, the same characters can render different glyphs depending on their context. Sublime also does not support this.
Based on what we have learned and the above limitations, this our plan for text shaping going forward. In the near-term:
* Don't support full text shaping in the general case. We want to emphasize maximal speed for the common case, which shouldn't require full text shaping. If running text shaping on every line took less than 1ms, it would be worth it, but we'd prefer not to pay what it appears to cost.
At some point in the future, make the following enhancements:
* Add bi-directional text support. We've run into trouble building a library that combines both Rust and C/C++ in a single WebAssembly module, so the ideal path would be to find or write an implementation of the Unicode bi-directional text algorithm in Rust and embed it in `xray_core`. One important detail is that we need to preserve the correspondence between column positions in the source and rendered text in order to render cursors and interpret mouse interactions, so just transforming the text alone will be insufficient.
* Use presentational characters to render Arabic [as described in this blog post](https://blog.mapbox.com/improving-arabic-and-hebrew-text-in-map-labels-fd184cf5ebd1), again porting an existing implementation of this transformation to Rust and incorporating it into `xray_core`. Again, we'll need to maintain a mapping of how characters in the input and the output map for cursor positioning. Several of the existing implementations of this transformation are GPL-licensed, so we'll need to be careful to avoid deriving our work from one of them.
* Add limited ligatures support at some point in the future to `xray_core`. This would involve loading the font and consulting the lookup table for ligatures. The goal would be support for fonts like Fira Code, and the hope is that we will be able to efficiently perform just this subset of the generalized text shaping workload within our budget of 1ms.
* Render sequences of Indic characters as atomic units via canvas rather than trying to render and composite individual glyphs like we do for other scripts. This would rely on the text-shaping built into the browser to render words in these scripts. We will pay a performance cost, but since we're anticipating these characters to appear rarely as part of comments and strings, it should be acceptable and better than adding the performance and complexity of full shaping for cases where it isn't needed.
Producing a lightning fast editor that runs on the web is going to involve trade-offs, and we'll need to make some tough decisions. Avoiding full text shaping is one of them. It would be great to be fast *and* perfectly correct in all cases, but we're not willing to sacrifice speed in the common case for perfect correctness at the corners.
We're going to post some help-wanted issues to see if anyone is interested in helping out with some of the compromise solutions in the above plan.
So in conclusion, we didn't end up merging any *code* related to text shaping, but we did learn a lot and came up with a clear plan for how to proceed.
## Anchors and selections
The bulk of the week was spent adding support for selections to the editor. The first step was an introduction of a new abstraction called *anchors*. Anchors serve a similar role to markers in Atom today, but they have a much cleaner implementation due to the buffer being a CRDT.
An anchor is a *value* that tracks a logical position in a buffer. You create an anchor by calling one of the following methods on the buffer:
* `anchor_before_offset`
* `anchor_after_offset`
* `anchor_before_point`
* `anchor_after_point`
These return an opaque `Anchor` value, which can be converted back to a concrete offset or point in the future via the following methods:
* `offset_for_anchor`
* `point_for_anchor`
Internally, an anchor is an enum that either represents either the `Start` or `End` of the file or some point in the `Middle` of the file via an `insertion_id`, `offset`, and `bias`. If you create an anchor at offset 10, its position will be updated by any edits that occur prior to offset 10, so that it always tracks the same logical position in the text. So if you create this anchor *before* offset 10, it will have a *left* bias and remain at that offset even if there is a subsequent insertion at its exact location. If the anchor is created *after* offset 10, it will have a *right* bias and be pushed rightward by insertions at its location.
Selections are built on top of anchors. Each anchor maintains a vector of selections ordered by their start anchor, maintaining the invariant that the selection ranges are always disjoint. We use anchors for selections rather than absolute positions so that the logical intention of the user is maintained even in the face of edits to the buffer by other users or by packages. We implemented basic cursor movements and selection expansions (up, down, left, and right) as well as methods to add a selection above and below the current.
We plan to render selections and cursors as additional WebGL shader passes that draw solid rectangles. We have the plumbing mostly in place to do this, but haven't finished actually populating the buffers on the GPU to tell the shaders where to draw. We're hoping to have that finished early this week, so we can move on to handling the input to actually move the selections and cursors around. That will raise the question of how we handle key bindings and commands in Xray, which could take some time to iron out.
Once we can render and manipulate selections, we'll move on to handling keystrokes to perform actual edits to the buffer. The `splice` method already exists to enable edits, so it should just be a matter of calling it in a loop in reverse order of the selection ranges. Once we add some caching related to translating anchors to positions, we can measure our performance and see how many cursors we can type with within our 8ms target window. Hopefully we do well.
## The week ahead
We'll be a bit short-handed this week due to @as-cii being on reactive duty for Atom and @nathansobo heading to Denver on Wednesday to give a talk at Pivotal Labs. We hope to finish selection rendering and ideally also get an initial solution in place for key bindings and commands to move those selections around. If things go really well, we'll start on editing.
================================================
FILE: docs/updates/2018_03_12.md
================================================
# Update for March 12, 2018
## Contributions
We got some help from [@apcragg](https://github.com/apcragg), who made [the build script for our N-API bindings support Linux and Windows](https://github.com/atom/xray/pull/25). Also [@maxim-lian](https://github.com/maxim-lian) helped by updating our repo to correctly use [Cargo workspaces](https://github.com/atom/xray/pull/26). Also saw a bunch of people building the project and exploring. There's not much to see yet, but we're happy that people are interested.
## Hacker News and new contributor interest
Someone posted this repo to Hacker News last week, which drew a bunch of attention to the project. Overall this is obviously great, but I sort of regret reading the comments. People can be so mean on HN and it's really a drag to feel like you're the target of vitriol and derision. I'm just over here trying to build stuff. But I guess you gotta just have a thick skin and keep coding.
Thanks to everyone who jumped in with an interest in contributing! The engagement was really helpful and prompted me to post [the beginnings of a contributing guide](https://github.com/atom/xray/blob/master/CONTRIBUTING.md) and some initial [help-wanted issues](https://github.com/atom/xray/labels/help%20wanted). Looking forward to engaging with someone who wants to dive in on one of those problems.
## Progress on selections
We have selections and cursors rendering on a branch, and hope to merge it this week. The results are promising, and we're only exceeding our 8 ms frame budget when we reach thousands of selections in a document with thousands of edits. We think we can do better with some optimizations.
We plan to merge an initial PR that lays the groundwork this week. This will include the ability to model and render selections, but we need to get a basic key bindings and commands system in place before this will mean much in the UI. We'll post some more detailed benchmarks once we get this basic implementation integrated and have a chance to look at the profile for any basic optimizations.
## Big architectural changes incoming
A conversation with [@joefitzgerald](https://github.com/joefitzgerald) at Pivotal in Denver led to a big shift in how we plan to interoperate between JavaScript and Rust. Originally, we planned to embed a shared library written in Rust into the Electron render process and use Node's N-API to interoperate. Somewhere along the line, we realized that to maintain a global edit history for all files that wasn't tied to a project, we would need to unify all of the file system interaction in a single process to avoid race conditions between multiple Electron windows. This process gradually grew in our thinking to take on other responsibilities, such as connecting to other processes to facilitate real time collaboration. Eventually, we decided that we would need to route all I/O through this central process.
Joe asked a simple and insightful question. If we're planning to route all I/O through an external process, then what's the point of the Node APIs available in Electron? They're all designed around I/O, but we won't be doing any. This made it click for me that we should actually move *all* of the application logic completely into the external Rust process and treat the UI as an extremely thin layer that interacts with the app via an async channel. We'll use something akin to the Flux architecture, where the UI can submit actions to the server on the channel and receive JSON payloads representing state to render. Yes, I realize that this makes us even *more* similar to Xi architecturally.
This will make the UI code 100% ordinary HTML and JavaScript with no assumption of any special APIs, which really supports our goal of running on the web. For the desktop experience, we can use any solution that gives us a modern, standards-compliant web view and run the core of the application as a server process. The easiest solution on the desktop would be Electron, but we could even use an embedded WebKit view on the Mac to save on bundle size if we wanted to absorb that complexity (it's not clear it's worth it though). On the web, we'll need the server process to run inside the browser, because this whole design assumes an ultra-low-latency connection between the front end and the back end. In this scenario, we can compile some subset of the server process that runs on the desktop to WebAssembly and run it in a worker thread. This web "back end" can then establish a peer-to-peer connection with another Xray process running in the cloud or on another user's computer. This will provide a 100% compatible experience between the browser and the desktop and enable collaboration between users in either environment.
Here's a somewhat complicated picture of our current thinking. Note the centrality of the "Xray Core Process". All the logic and extensibility lives there, and the view becomes much simpler.

We'll be starting on these changes this week. The first step is to completely eliminate Electron and wrap `xray_core` in a server process, which we're thinking about calling `xray_server`. The front end will be `xray_client` and compile down to a simple HTML file. When you open it in the browser, it will connect to a local port based on the query parameter in the URL.
Once we accomplish that, we'll need to figure out an alternate variant of the server, maybe called `xray_server_wasm`, which runs a "server" in one or more worker threads for a low-latency experience on the web. Compiling pure Rust to WebAssembly hopefully won't be a big deal, but we are a bit worried about including TreeSitter, whose runtime and grammars are written in C. We think we can figure it out though.
In light of these big structural changes, we'll probably be holding off on merging any PRs this week until the dust has settled. Thanks for your patience.
## What about Xi?
I've mentioned elsewhere in this repo the conversation I had with Raph Levien a few weeks ago about collaborating with Xi. In light of the fact that we're moving even closer to Xi architecturally, I can already anticipate the criticism that what we're doing here is redundant and we should just contribute to Xi. Why bother with this?
First of all, I'm definitely not opposed to some sort of partnership with Xi. I have tremendous respect for the technical work they've done and really *like* Raph personally. That said, it's complicated. We have our own ideas for where we want to take this project and what it means to GitHub. At the moment, the only way I see to guarantee we can achieve those ideas is to have enough creative control to iterate and follow our own path. Xi has been around for a while now, and it would be rude and presumptuous to roll in and start telling them how they should run their project. If we want to be active participants in our own destiny without telling other people what to do, it seems like we need to take our own path, at least for now.
We would like to learn as much as we can from the Xi team, and we'd be happy if we could provide value to them as well. It seems like sharing ideas would be a necessary prerequisite to sharing code. If the work we're doing proves valuable enough for the Xi team to want to invite us in as partners, then that would be great, but we're really still experimenting here with how best to achieve our particular goals. It's not clear that we have the same priorities as the Xi team, and it's not obvious that our priorities are sufficiently compatible for us share a codebase. And that's okay. There's room for more than one editor in the world.
If we do end up having enough social and technical alignment to share code, then that would obviously be great for us, because we'd be working with some incredibly smart people. I want to be open minded, but I also want the freedom to create based on our own ideas without telling anyone else what to do. So it's complicated. It's not crystal clear what the right path is, but we're doing our best to make the best decisions with the experience and wisdom we've managed to acquire to this point.
================================================
FILE: docs/updates/2018_03_19.md
================================================
# Update for March 19, 2018
## Contributions
We have a couple of PRs pending ([#36](https://github.com/atom/xray/pull/36) and [#34](https://github.com/atom/xray/pull/34)), but we're holding off on merging anything until we complete some major architectural changes. Sorry for the delay [@LucaT1](https://github.com/LucaT1) and [@breezykermo](https://github.com/breezykermo).
## Selections optimizations
[I merged a PR](https://github.com/atom/xray/pull/45) from [@as-cii](https://github.com/as-cii) that optimized our initial implementation of selections. While we still think there is room for more optimization, we're pretty happy with our early results. On Antonio's machine, he's moving 1k selections in a document with 10k edits in under 2ms. Based on some hacky experimentation to avoid allocations, we think we can make that even faster. At some point, with some number of selections, we're going to end up blowing our frame budget, but we think maintaining it into the thousands of selections ought to be acceptable.
## Significant progress switching to a client/server architecture
[@as-cii](https://github.com/as-cii), [@maxbrunsfeld](https://github.com/maxbrunsfeld) and I have made decent progress on a PR to switch Xray to the client/server architecture I [discussed last week](./2018_03_12.md#big-architectural-changes-incoming).
We're implementing an event-driven server using [Tokio](https://tokio.rs/), and have what seems like a viable approach for relaying data between the server and the window that will leave the door open to packages implementing custom views that slot in cleanly next to built-in features.
Check out [#46](https://github.com/atom/xray/pull/46) for details. I've also written [a fairly detailed document](https://github.com/atom/xray/blob/198e3bdf3c284679a5520923b0e27b079cc23377/docs/architecture/001_client_server_protocol.md) explaining our architecture and the protocol that will become a permanent part of Xray's documentation once this PR is merged.
================================================
FILE: docs/updates/2018_03_26.md
================================================
# Update for March 26, 2018
## Contributions
[@matthewwithanm](https://github.com/matthewwithanm) of Facebook's Nuclide team helped us improve our React game by [avoiding the use of deprecated string refs](https://github.com/atom/xray/pull/50) and [avoiding the use of component `state` for data that is unrelated to rendering](https://github.com/atom/xray/pull/51). Thanks Matthew!
## The switch to a client/server architecture is complete
We merged [#46](https://github.com/atom/xray/pull/46) last week, completing our switch to a client/server architecture. JavaScript in Xray's user interface now communicates with the Rust core over a domain socket rather than via a native V8 extension, which dramatically simplifies our build process. We connect to the server over a domain socket, which unfortunately means that Xray doesn't work on Windows for now due to the unavailability of domain sockets in the OS. If anyone is interested in adding support for named pipes on Windows to `xray_server`, we'd gladly collaborate on a pull request. If you've tried to build Xray and ran into trouble, now would be a good time to try again on non-Windows platforms after [carefully reading our build instructions](../../CONTRIBUTING.md#building).
## Updated roadmap
We've adjusted our roadmap a bit to prioritize collaborative editing rather than focusing on producing WebAssembly-based editor build. A browser-compatible editor is still part of our long term plan and we're designing the system with that requirement in mind, but since we want all of Xray's features to support remote collaboration, it makes sense to get it into the architecture early.
## Fast file finding
Xray is currently hard-coded to open a single buffer containing the dev build of React, which isn't very useful. To fix that, [we're adding a file finder](https://github.com/atom/xray/pull/55) that can quickly filter all files in the project that match a given subsequence query.
To obtain good search performance, we're maintaining an in-memory replica of the names of all the files and directories in the project which we can brute-force scan on a background thread whenever the query changes. We represent this data as a simple tree which reflects the hierarchy of the file system. To ensure that we can respond to user input within our 50ms deadline for coarse-grained interactions, we really want to be able to run queries before we finish reading all of the entries from the file system. To enable that, we're designing our in-memory file tree to support concurrent reads and writes.
We spent a decent amount of time exploring different approaches that could enable this, and ultimately we decided to protect the entry vector for each directory with a fine-grained read/write lock. When [@as-cii](https://github.com/as-cii) first suggested this approach, I was worried that it would consume too much memory, but I then discovered the [parking_lot](https://github.com/Amanieu/parking_lot) crate, whose `RwLock` implementation only consumes a word of memory per instance.
The basic logic of searching will be in `xray_core` and is modeled as a `Future` to give us flexibility in how we schedule it. For `xray_server`, which runs as a standalone binary and has full threading capabilities, we can simply spawn the search on a thread pool. Until WebAssembly adds threading support, we can implement some kind of background scheduler that uses `requestIdleCallback` to break the work up into smaller chunks before yielding the thread.
Rust futures are based on a polling model, where the executor repeatedly calls `poll` on the future to drive it to completion. To support granular yielding in a single threaded environment, we really need to execute the minimal amount of work each time `poll` is called on our `fs::Search` future. To enable that, we maintain a stack within the future that tracks our current position within the tree. The stack keeps an `Arc` (atomic reference-counted) pointer to the entries of each directory, along with the current index into that list of entries. Since concurrent writers could insert entries that might invalidate these indices, we treat directory entries as clone-on-write if we detect they are referenced by more than one `Arc` pointer, via the `Arc::make_mut` method. Most of the time, writes should be able to freely mutate a directory's vector of entries, but if that write might interfere with an ongoing search, we clone the vector to avoid invalidating any active indices.
The work is still in progress, but we're hoping this design will enable a highly user responsive experience for file finding even in the presence of extremely large source directories. We'll report on our findings in the next update.
## Thoughts on key bindings and actions
We're optimistic that we can finish up a basic (but fast) file finding UX some time next week. After that, I think it's time to tackle key bindings. Atom's key binding implementation is insanely complex and jumps through some ridiculous hoops to support a long tail of different locales and features like overlapping multi-stroke bindings, binding to key-up events, etc. Eventually, we want Xray to support all of these features as well, but in the short term, we want to keep the implementation as simple as possible. We're going to start by targeting single-stroke bindings and avoid any gymnastics to workaround browser limitations in various international locales. We'll revisit these concerns after getting some more traction in other areas of the system.
Our strategy with Atom was to "embrace the web", which led us to associate key bindings with CSS selectors. This was a neat idea and served Atom reasonably well, since CSS selectors are a powerful tool for describing a specific context in the DOM. However, in the end I don't think the power was worth the complexity of full-blown CSS selectors. Their flexibility makes it extremely difficult to build a user interface for configuring bindings, and the complex rules for evaluating selector specificity can lead to a frustrating experience.
With Xray, I want a system for making key bindings context-sensitive that is flexible enough to support most reasonable use cases, but not so flexible that it becomes hard to reason about. My thoughts are still evolving on this, but I'm thinking about representing the context in which we interpret a key binding as a set of simple tokens called an "action context". A custom component can be used to refine this context for a subset of the view hierarchy by adding or removing tokens.
Let's use an example to explain how the system would work. This is going to be a bit contrived, but it's not totally unrealistic. Imagine you wanted to write a spell-checking extension that allowed the user to display a list of suggestions next to a misspelled word that could be navigated from the keyboard. It might look something like this:
```js
class SpellingSuggestions extends React.Component {
render () {
...
}
}
```
In the example above, we declare a refinement to the action context via an `ActionContext` JSX tag at the root of the component, adding the `SpellingSuggestions` and `VerticalNav` tokens and removing `Insert`. We then declare three actions that this component handles via `Action` tags: `NavUp` and `NavDown`, and `Confirm`.
Normally in the editor, the up and down arrows would be bound to the `MoveCursorUp` and `MoveCursorDown` actions, which move the cursor. But when your menu is displaying, you want the arrow keys to select the next or previous item in the list instead. To enable that, the up and down arrow keys could be bound to `NavUp` and `NavDown` within the `VerticalNav` context. The left and right arrow keys would continue to move the cursor, and potentially dismiss the menu if you moved out of the misspelled word.
If you didn't like the menu hijacking your cursor movement, you could unbind the arrow keys in the `VerticalNav` context, or maybe leave the arrow keys bound but preserve the Emacs-style `ctrl-p` and `ctrl-n` bindings for cursor movement.
Users might also bind `j` and `k` to `NavUp` and `NavDown` in any context that is not `Insert`. The text editor would introduce `Insert` to the action context because it inserts text, but the spelling suggestions menu could temporarily override that by removing `Insert` from the context. So could a Vim extension in command mode.
This system is still pretty complex, but its semantics are much simpler than CSS selectors, and it seems like it could cover compositional scenarios like the one described above rather well. We could easily provide some kind of global registry of action context tokens that gives them a human-readable name and description, then use that in a user interface that makes it convenient for users to customize their bindings in specific contexts without opening a JSON file.
================================================
FILE: docs/updates/2018_04_02.md
================================================
# Update for April 2, 2018
## Contributions
[@chgibb](https://github.com/chgibb) helped us get an initial Travis build in [#48](https://github.com/atom/xray/pull/48). This partially addresses [our help-wanted issue](https://github.com/atom/xray/issues/22), but we're still going to leave it open since we want to run the minimal tests for a given change to mitigate one of the downsides of being a monorepo. Thanks to @chgibb for getting this started.
## Almost done with the file finder
Our main focus last week was finishing up the file finder feature that I also [discussed in the previous update](./2018_03_26.md#fast-file-finding). The last update was all about our approach to scanning the directory tree from the file system into an in-memory representation, and the approach we described remains pretty much unchanged. We plan to merge [the pull request](https://github.com/atom/xray/pull/55) early this week.
### Leveraging prior art
Last week was all about using that in-memory representation to return search results based on a "fuzzy" search query. After an initial attempt that yielded decent performance but poor ranking of the search results, we decided to investigate existing solutions. We tried two command-line fuzzy finders, [`fzy`](https://github.com/jhawthorn/fzy) (written in C) and [`fzf`](https://github.com/junegunn/fzf) (written in Go) on the Electron repository, which contains over 500,000 files when `.gitignore` is disabled.
Both tools yielded excellent performance and high quality results, and since the [core matching algorithm](https://github.com/jhawthorn/fzy/blob/47609dbf73789bc28289576a12177965c04ef49b/src/match.c#L70) behind `fzy` was reasonably straightforward to read and understand, we decided to port it to Rust. You can [read more about the algorithm](https://github.com/jhawthorn/fzy/blob/master/ALGORITHM.md) in the `fzy` repository, but at a high-level, their solution is based on dynamic programming and determines the optimal match positions for a given substring by populating a matrix with cascading values. We copied their basic approach almost exactly, but we also enhanced it a bit to make use of the existing tree structure to recycle computation for common path prefixes.
### Matching and scoring
Xray matches paths in two phases. First, [we scan the tree to determine which paths match the query](https://github.com/atom/xray/blob/3c25fc7a7328b0ce1f6746990689e0f80bca3009/xray_core/src/project.rs#L93), populating a hash map to mark which file system entries either match the query or contain matches to the query. Simply matching the query only requires us to perform linear character comparison and is fairly cheap to perform, and this allows us to constrain the search space for the next step. Once we determine matches, [we then walk the tree to associate each matching path with a score](https://github.com/atom/xray/blob/3c25fc7a7328b0ce1f6746990689e0f80bca3009/xray_core/src/project.rs#L154). Scoring is O(N*M), where N is the length of the query and M is the length of the path. Luckily, longer queries tend to match fewer paths, which means when it is most expensive to compute scores, we usually end up needing to compute fewer of them.
### Results
Overall, we're happy with the results. The quality of the matches is extremely high thanks to the work [@jhawthorn](https://github.com/jhawthorn) put into tuning the scoring criteria. Since ranking matches is somewhat subjective, basing our results on an existing, fairly mature solution gives us a lot more confidence in the quality of the results. The performance is also pretty decent. Searching for `init` in the 151,201 files of the [`blink`](https://chromium.googlesource.com/chromium/blink/+/master) repository yields results in ~120ms on my machine. Searching for `init.py`, which is a more selective query, drops that to ~16ms.
### Future improvements
These early results are good, but we think there's room for improvement. First, we're still matching on a single thread, and it seems like we might be able to use [Rayon](https://github.com/rayon-rs/rayon) to parallelize the matching over multiple CPU cores. We could also do a better job reporting progress. 20ms into the query we could check if we are more than 20% complete with ranking, and if we aren't we could display some sort of subtle progress indicator. That could help the search feel *responsive* even if it takes 100+ms to return results. That said, we're going to call this good for now and move on to other areas. The file finder *feels* fast and fluid now, even for big repositories, and we think we have a solid foundation in place for future improvements.
## Other improvements
Since we're still fairly early in development, we're allowing branches to get longer and heavier than we might in a more established project. Folded into the file finder branch are a few smaller improvements that made sense to add along the way.
### Window and view API refinements
We display the file finder as a modal in the workspace, and when the user selects a file or cancels the modal, we need to take action in the workspace. After pondering a couple of approaches, we ended up deciding to use a fairly traditional delegate pattern here, where the `WorkspaceView` implements the `FileFinderViewDelegate` trait and passes a weak reference of itself to the `FileFinderView`.
Trouble is, how does the `WorkspaceView` obtain a weak reference to itself? Since the `Window` wraps each view in an `Rc`, we ended up deciding that it would be convenient for the window to [pass each view a `WeakViewHandle` to itself](https://github.com/atom/xray/blob/3c25fc7a7328b0ce1f6746990689e0f80bca3009/xray_core/src/window.rs#L116) in the view's `will_mount` hook. Many views can simply ignore this parameter, but if views need to perform delegation they can safely store and clone it without worrying about leaking memory, enabling them to hand itself as a delegate of child views. This is how [we connect](https://github.com/atom/xray/blob/3c25fc7a7328b0ce1f6746990689e0f80bca3009/xray_core/src/workspace.rs#L48) actions dispatched on the `FileFinderView` to state changes in the workspace.
### Focus API
We also needed a way to focus the file finder when it displays, then focus the newly opened editor after a file is selected. We decided to implement this on the server side via the new `ViewHandle::focus` method. Whenever this method is called, it assigns the `focused` field on the `Window` to the focused view's id. This gets relayed to the client, which calls the `focus` method on the corresponding React component.
For now, we aren't interested in replicating the focus state to the server. Server-side code can request that a view be focused, but it can't ask which view is currently focused. This is a decision we can revisit later, but focus is a very weird piece of global state that references individual DOM nodes, so it doesn't seem worth the complexity of attempting to represent it outside of the browser environment. This means that the modal panel will still need to have a bit of custom focus handling logic in order to restore focus to the previous element when cancelled, but so far this seems manageable.
### CLI improvements
We've also changed the structure of the CLI's relationship with the server and Electron slightly. Previously, when we spawned Electron, we could ask it to relay a message to the server via the `XRAY_INITIAL_MESSAGE` environment variable. Now, the CLI waits for the Electron app to emit `Listening\n` on `stdout`, then attempts to connect to the server itself to send the initial message.
We made this change to deal with error handling. The server may need to report an error message to the CLI over the socket, and this was going to be complicated to achieve with the previous approach of delegating the initial message send to Electron.
Waiting for Electron to tell us the server has started may introduce some latency, which is why we initially preferred the delegation approach, but we'll need to actually measure this before the additional complexity is warranted in light of the need to receive a response from the server.
## The week ahead
We hope to merge the file finder PR. All that's left is some basic styling and iteration on focus handling.
After that, we plan to start working on shared headless workspaces. The hardest part is enabling concurrent text editing, but that's pretty much solved by our use of a CRDT as Xray's core text-storage structure. However, there's still plenty of complexity remaining in terms of how we actually connect buffer replicas together and structure the client/server interaction.
We plan to explore [Cap'n Proto RPC](https://capnproto.org/rpc.html), which seems to have an actively-maintained [Rust implementation](https://github.com/capnproto/capnproto-rust). None of us has ever used it, so we'll need to see how the reality matches up to its promises, but on initial investigation it looks like it could be a good fit for Xray's needs.
Cap'n Proto offers a compact yet evolvable binary representation for messages, and the RPC system seems like it makes it easy to expose any object over the network in a [secure way](https://capnproto.org/rpc.html#security) and [efficiently call its methods](https://capnproto.org/rpc.html#time-travel-promise-pipelining). As long as they're well-implemented, these features seem sufficiently general to be a foundation for network interaction between Xray instances.
At this point, Xray is still too young to be usable. But we're trying to ruthlessly prioritize and zero in on the highest value and highest risk aspects of the system as soon as possible. It's unfortunate that Xray doesn't build on Windows right now, but there's honestly not that much to see or use anyway. If you're a Windows user and you're interested in helping out, getting a named-pipes- or TCP-based connectivity solution in place on Windows would be a great place to start.
================================================
FILE: docs/updates/2018_04_09.md
================================================
# Update for April 9, 2018
## Shared workspaces
We spent the entire week [laying down the foundations that will enable shared workspaces](https://github.com/atom/xray/pull/61). What are shared workspaces? The basic idea is that you'll be able to start a headless Xray instance on a remote machine, then have multiple developers connect and co-inhabit that workspace from their local machines.
The fact that our buffers are CRDTs makes concurrent buffer editing relatively straightforward to implement, but we still need a solution for synchronizing state between peers and performing requests and response. After experimenting a bit with Cap'N Proto RPC and feeling a bit overwhelmed by the generated code, we decided to explore what a custom solution might look like.
We're not quite done with the implementation, but after a lot of thinking and a bit of wheel-spinning, we have a reasonably solid design for a capabilities-based RPC system that will be a good fit for our use case. I've written up [a much deeper description](https://github.com/atom/xray/blob/9a1a02b7b608225a4c60fa364a1d60c1ef5f59c2/docs/architecture/002_shared_workspaces.md) that will become part of Xray's permanent documentation. Here's a *huge* diagram to get you interested:

## The week ahead
We hope to finish an initial take on the RPC system next week, then start using it to build out a basic demo of shared workspaces. Our goal is to make it possible to find and open paths on the client and support concurrent editing by multiple clients. That may spill into the following week, when I'll be traveling Amsterdam for some in-person full-throttle coding with [@as-cii](https://github.com/as-cii).
================================================
FILE: docs/updates/2018_04_16.md
================================================
# Update for April 16, 2018
## Contributions
[@rleungx](https://github.com/rleungx) [set up a basic benchmarking framework](https://github.com/atom/xray/pull/62) that uses [Criterion](https://github.com/japaric/criterion.rs).
## Progress on shared workspaces
By the middle of last week, we had a first iteration of the RPC system that we were happy with, and started using it to build out shared workspaces. To do that, we're adding replication to Xray's model objects. The goal is to be able to use model objects without worrying about whether or not they are remote or local.
We're converging on a design where most model objects are represented by a trait, with local and remote concrete implementations of this trait. For example, the project model has a `Project` trait along with `LocalProject` and `RemoteProject` implementations. We also have an `rpc::server::Service` implementation that has a shared reference to a `LocalProject` and exposes it to a remote client. On the client side, the `RemoteProject` owns a `rpc::client::Service` object. When you call a method like `open_buffer` on the client side, it's translated into a network request to a service on the remote peer, which translates the request to a method call on the corresponding `LocalProject`.
We have unit tests passing for replication of file system trees and projects, along with the initial state for buffers. We still need to replicate buffer edits. We also have some work to do to refine our treatment of ownership for services on the server side. We think the best approach might be to enable both the client and the server to retain services. So if the server wants to keep a service alive and return it across multiple requests or updates, it can store off a handle to the service. Or it can drop the handle, in which case the client can take ownership over the service. Once the client drops, we'll communicate this fact across the wire and decrement the service's reference count. It's essentially an `Rc` transmitted over the network. We'll see how it goes.
## Syntax awareness
This week, [@maxbrunsfeld](https://github.com/maxbrunsfeld) will be diving in on integrating the [Tree-sitter](https://github.com/tree-sitter/tree-sitter) incremental parsing system into Xray. The first step involves some adjustments to the runtime to enable syntax trees to be fully persistent and sharable across threads. Xray's buffers already support this kind of usage, so including syntax trees will enable lots of interesting computations to be pushed into the background.
## Heads-down in Amsterdam
[@as-cii](https://github.com/as-cii) and I are meeting up in Amsterdam this week to write as much code as possible together in person. To that end, I'm going to keep this update short so we can get to work.
================================================
FILE: docs/updates/2018_04_23.md
================================================
# Update for April 23, 2018
## An initial implementation of shared workspaces is complete
Last week we [completed the initial milestone for shared workspaces](https://github.com/atom/xray/pull/61), which allows you to connect to a remote Xray instance over TCP and open one of its workspaces in a new window. You can then use the file-finder to locate and open any file in the remote project and collaboratively edit buffers.
There is obviously a ton more work to do until we can call our implementation of shared workspaces "done". Xray isn't even really usable right now for even basic text editing due to a long tail of missing features. Regardless, we think it's really important to have this infrastructure in place early. From here on out, every feature we build will be designed to support remote collaboration, and the foundation we've laid over the last two weeks will make that possible. We're pretty excited about the potential RPC system we've built. By combining remote procedure calls with eagerly replicated state and the judicious use of conflict-free replicated data types, we think we can abstract away the physical boundaries that separate individual machines and developers.
## Browser compatibility
The [four pillars of Xray](../../README.md#foundational-priorities) are performance, real-time collaboration, browser compatibility, and extensibility. 8 weeks into focused development, we're feeling confident that Xray's architecture can meet our desired performance goals, and we've validated an approach that will bake collaboration into the heart of the system. Before burning down the long list of features that make up a usable text editor, we want to take some time to put the last two pillars in place by getting Xray working in a browser and laying the foundation for extensibility. By taking care of all four of these high-level concerns early, we'll ensure that they're supported as we build out the remainder of the system.
To that end, we're now turning our attention to browser compatibility. We've actually been designing Xray with this goal in mind from the beginning. Today, Xray comprises two major components: `xray_server`, which contains the core application logic, and `xray_electron`, which presents the user interface and communicates with `xray_server` over a local socket. Now we need to create versions of these two components that run inside of a web browser.
As a browser-based counterpart of the `xray_server` executable, we're creating `xray_wasm`, which will be compiled to WebAssembly and run in a web worker. `xray_wasm` will share the majority of its implementation with `xray_server` via a dependency on the platform-agnostic `xray_core` crate. `xray_core` abstracts its communication with the outside world in terms of abstract traits defined by the Rust `futures` crate. Methods for connecting to remote peers and the user interface accept and return `Stream`s of binary buffers, and the application also expects to be passed `Executor` instances that can schedule futures to be executed in the foreground or background.
In the browser, we'll move data via message channels and web sockets rather than using domain sockets and TCP, but these are just transport layers and are easy to abstract in terms of `Stream`s and `Sink`s so they can be passed into the platform-agnostic code. Similarly, we'll integrate with the browser's event loop by writing a custom `Executor` that uses the `Promise` API or `requestIdleCallback` to defer computation.
We're using the `wasm-bindgen` crate to interoperate between Rust and JavaScript, and last Friday we managed to get asynchronous bi-directional communication working between Rust and JavaScript. This week, we plan to extract as much UI code as possible from `xray_electron` into a common library called `xray_web`. We'll then create `xray_browser`, which will package everything together into a browser-deployable bundle that runs the core application logic in a web worker and connects it to the UI running on a web page.
Since browsers strongly sandbox interaction with the underlying machine, we will only support interactions with remote shared workspaces when Xray is running in a browser. We plan to add WebSockets support to Xray server so that it can accept connections from browser-based clients. We'll also add an `--http` option that exposes a simple web server from `xray_server` that serves a browser-based UI to clients. This will obviously require a security scheme to be useful in a production setting, but it seems like a good way to develop the browser-based user experience. A simple password-list based security scheme would also be pretty quick to add.
================================================
FILE: docs/updates/2018_04_30.md
================================================
# Update for April 30, 2018
## Xray now runs in a browser
Last week, we merged [#67](https://github.com/atom/xray/pull/67), which allows Xray to be run inside of a web browser. The design is different in a couple of details from what I anticipated in last week's update, but the big picture is pretty much what we expected. The main difference is that for now, we decided not to bake HTTP and WebSockets support directly into `xray_server`, but instead place them in [a simple development server](https://github.com/atom/xray/blob/92f6c1959f843059738caff889df0843836cc006/xray_browser/script/server) which is written in Node and proxies WebSocket connections to `xray_server`'s normal TCP-based connection listener. This made it easy to integrate with middleware for WebPack that recompiles our JS bundle during development. Long-term, we'd still like to host web clients directly from `xray_server`, but we want to bundle the static web assets directly into the binary so that `xray_server` can continue to work as a standalone executable. This should definitely be possible, but it doesn't feel important to address it now.
## Demo this week
We plan to show off Xray's progress to some colleagues here at GitHub later this week, so to that end, we'll focus some of this week on smaller details that, while not fundamentally advancing architectural concerns, will end up making for a better demo.
By the end of this week, we should be rendering the cursors and selections of remote collaborators. We also plan to add a discussion panel to the Xray workspace where collaborators can have a text-based conversation that is linked to their code.
Once the demo is behind us, we plan to take a few days to burn down any technical debt we have accrued in the 10 weeks we've been actively developing the project. The biggest thing on our agenda is updating to [futures 0.2](http://aturon.github.io/2018/02/27/futures-0-2-RC/) and the [latest version of tokio](https://tokio.rs/blog/2018-03-tokio-runtime/). We also plan to take a look at our build and see if we can make our CI turnaround faster.
================================================
FILE: docs/updates/2018_05_07.md
================================================
# Update for May 7, 2018
## Contributions
[@yajamon](https://github.com/yajamon) contributed [a fix for an oversight in our build script](https://github.com/atom/xray/pull/78) where we were specifying `+nightly` even though our repository is associated with a `rust-toolchain` file. Thanks!
## First internal demo is complete
As I mentioned in the last update, we focused last week on preparing for an internal demo that presented at least a tiny slice of the Xray vision in a more tangible, interactive form. We spun up a headless Xray server as a digital ocean droplet and showed off remote shared workspaces, collaborative editing, and conversations tied to the code. We also put together a few slides demonstrating Xray's performance for various tasks such as fuzzy file-finding, moving large numbers of cursors, and scrolling. The response was really positive, and we've elected to continue the experiment into the next quarter. [@as-cii](https://github.com/as-cii) and [I](https://github.com/nathansobo) will continue to focus on Xray in the coming months, and we'll get a bit of support from [@maxbrunsfeld](https://github.com/maxbrunsfeld) in order to integrate [tree-sitter](https://github.com/tree-sitter/tree-sitter) as the basis of Xray's syntax awareness.
## Into the unknown with CRDTs
As [I discussed in the first update](./2018_03_05.md#anchors-and-selections), Using CRDTs in Xray's native buffer implementation allows us to create *anchors*, which are stable references to positions within a text file that maintain their logical position even after the buffer is subsequently edited. For our discussions feature, we use anchors to link each message to the range of the buffer that was selected at the time the message was sent. This allows you to select a code fragment and make a comment, then allow other participants to click on the message at some later time to jump to the code you had selected when you sent the message. For now, Xray only maintains all of this state in memory. The discussion history is lost as soon as you kill the process, and we deliberately avoid dropping buffers once they are open in order to preserve the validity of anchors. This is obviously not going to work, and to fix it, we need to figure out how to persist each buffer's operation history.
If we assume that buffers are never renamed and that history only ever marches forward, this is pretty easy. But the possibility of renames and interactions with Git (or other version control systems) make it interesting. We want to track a file's identity across renames and ensure that we load the appropriate history when the user switches branches, and these concerns have a lot of overlap with some other ideas we've been pondering that can loosely be described as "real-time version control". With a proof-of-concept for shared workspaces behind us, we think it's time to explore them.
Currently, we represent buffers as CRDTs. We're interested in what happens if we take that further and treat the entire *repository* as a single unified CRDT data structure that is persisted to disk. Ideally, assuming Xray is used for every edit, we will be able to maintain a keystroke-level history of every edit to every file all the way back to the moment that each file was created, sort of like an infinite conflict-free undo history. But of course, there will be many cases where files change occur outside of Xray, so we'll need to gracefully handle those situations as well. We've decided to spend the next couple weeks exploring this. We'll probably spend most of our time clarifying our thoughts in writing at first before transitioning to coding. It's unclear exactly how much gold is at the end of this particular rainbow, but it seems worth a look.
## Strike out with futures 0.2
On Friday, we spent an hour and a half upgrading `xray_core` to `futures` 0.2, only to discover that Tokio doesn't yet support that version 🙈. Luckily, it wasn't that much time wasted, but we did feel somewhat foolish for assuming that Tokio worked with it without checking first.
## Optimizations
[@as-cii](https://github.com/as-cii) has been picking some low-hanging optimization fruit related to selections and editing. The [first](https://github.com/atom/xray/pull/79) is related to adding selections above and below the cursor. He's also been looking at [batching tree manipulation](https://github.com/atom/xray/tree/optimize-edit) when editing with multiple cursors, which is still in progress and is not yet associated with a PR.
================================================
FILE: docs/updates/2018_05_14.md
================================================
# Update for May 14, 2018
## More optimizations
Last week we spent a couple of days speeding up multi-cursor editing. Specifically, we wanted to take advantage of the batched nature of this operation and edit the buffer's CRDT in a single pass, as opposed to performing a splice for each range. Please, take a look at [#82](https://github.com/atom/xray/pull/82) for all the details.
There is still some work to do in that area to deliver a smooth experience when editing with thousands of cursors, but we are planning to get back to it once we have fleshed out more features.
## Thoughts on further applications of CRDTs
After demoing Xray to our colleagues, we got a lot of interest in how Xray's CRDT-based approach to buffers might apply to the problem of versioning generally, so we took some time to explore it last week. We were intrigued by the idea of a CRDT-based analog to Git, a kind of operation-oriented version control system that allowed for real-time synchronization among several replicas of the same working tree and persistence of all operations. After spinning our wheels quite a bit, we've concluded that we really need to get clear on the specific problems we might like to solve. They are as follows:
* Replay: We'd like to allow developers to record a collaboration session and cross-reference their keystrokes to audio, so that it could be replayed later. Assuming people were willing to opt into this, it could provide deep insights into the thought processes behind a given piece of code to future developers. This use case is really all about persisting the operations, and has nothing to do with replicating the entire file tree.
* Permalinks: Today we have anchors, which automatically track a logical position in a buffer even if in the presence of concurrent and subsequent edits, but these anchors are only valid for the lifetime of the buffer in memory. We'd like to be able to create an anchor that can always be mapped to a logical position at arbitrary points in the future, even thousands of commits later. Again, this has nothing to do with full replication. It's really about *indexing* the operations we persist and tracking the movement of files over time so that we can always efficiently retrieve a logical position for an anchor.
* Streaming persistence and code broadcast: Today, code lives on your local machine until you save it, commit it, and push it to the cloud. We want to persist your edit history as it is typed and optionally stream it into the cloud. If your computer spontaneously combusts, your up-to-the-minute edit history is still saved on the server. If you elect for your edits to be public, colleagues or community members could watch your edit stream in real time. This would require full replication if you wanted to allow another party to make *edits* to the working tree. If the server is just storing your operations, there's really no need to deal with concurrency. It *might* be cool if someone could come along and edit the server's replica of the work tree and have their edits automatically appear in your replica, but is that actually a good user experience? Real-time collaboration requires tight coordination, so it might be jarring to receive edits from someone you didn't actively invite to your workspace.
* File-system mirroring for third-party editors: We'd like to allow other editors to use Xray in headless mode as a collaboration engine. In this use case, we'd need to relay edit operations through Xray via specific APIs, but it might be helpful if Xray could mirror the state of a remote project to the local file system. That way, an exiting editor could use its ordinary mechanisms for dealing with local files to interact with the remote workspace, and wouldn't need to perform file system interactions over RPC, which would simplify integration.
I wanted to think through the design implications of these various features early to determine whether any of them had an impact on Xray's core architecture, and after a lot of thinking, my conclusion is that it should be okay to defer these features for now. I had envisioned a single unified design that elegantly addressed all of these features in a single replicated structure, but now we think that that cost of building such a structure probably outweighs its benefits.
For now, we've decided to defer these concerns to until the point that replay, permalinks, or streaming persistence are actually the next most important feature we want to add. Our instinct is that when that time comes, we'll be able to address these features in an additive fashion, and that it doesn't make sense to invest in adding support for them today.
In retrospect, last week was a bit of a distraction. I've done more up-front design thinking for Xray than I ever have for any other project, and it's worked out pretty well overall. But after last week, I think we're approaching diminishing returns for up-front architectural design. We've validated that the current design can be performant and collaborative, and it's seeming like we've struck a nice balance between simplicity and power. Now it's time to return to a more incremental strategy and continually focus on the next-most-important feature until we have a useful editor.
## The path forward
This week, we'll turn our focus to implementing save as well a simple key bindings system, which [I wrote about in a previous update](2018_03_26.md#thoughts-on-key-bindings-and-actions). We also plan to clarify our short term roadmap, and we'll post an update about that next week.
================================================
FILE: docs/updates/2018_05_28.md
================================================
# Update for May 28, 2018
## Staying the course with CRDT-based version control
In the last update, I said that we were abandoning our efforts to apply CRDTs to the entire repository, citing lack of clarity on what we were actually trying to achieve. However, after more conversations with colleagues, we've decided to proceed with that effort after all. After a lot more thinking and writing, we finally got enough clarity on our direction to start writing code last week.
We still plan to continue developing Xray as a text editor, but we're adding a new top-level module to the repository called Memo, which is essentially a CRDT-based version control system that interoperates with Git. Xray will pull in Memo as a library and build directly on top of its primitives, but we also plan to make Memo available as a standalone executable in the future to support integration with other editors.
Our plan is for Memo to complement Git with real-time capabilities. Like Git, Memo will support branches to track parallel streams of development, but in Memo, all replicas of a given branch will be synchronized in real-time without conflicts. For example, if you and a collaborator check out the same Memo branch, you'll be able to move a file while someones else is editing that file, and the state of the file tree will cleanly converge.
Today, Git serves as a bridge between your local development environment and the cloud. When you push commits to GitHub, you're not only ensuring that your changes are safely persisted and shared with your teammates, but you're also potentially kicking off processes on one or more cloud-based services to run tests, perform analysis, or deploy to production. We want to make that feedback loop tighter, allowing you to share your changes with teammates and cloud-based services as you actively write code.
With Memo, as you're editing, a CI provider like Travis could run tests across a cluster of machines and give you feedback about your changes immediately. A source code analysis service like Code Climate could literally become an extension of your IDE, giving you feedback long before you commit.
Like Git, we also intend to persist each branch's history to a database, but your changes will be continuously persisted on every keystroke rather than only when you commit. After the fact, you'll be able to replay edits and identify specific points in a branch's evolution via a version vector. When we detect commits to the underlying Git repository, we'll automatically persist a snapshot of the current state of the Memo repository and map the commit SHA to a version vector. When a commit only contains a subset of the outstanding changes, we'll need a more complex representation than a pure version vector in order to account for the exact contents of the commit, since a version vector can only identify the state of the repository at a specific point in time.
Last week, after getting clear on our goals, we started on a new tree implementation that we'll use to index the history of changes to the file system and text files. It's based heavily on the tree that we already use within Xray to represent the buffer CRDT, but we're modifying it to support persistence of individual nodes in an external database. This will allow us to index the entire operational history of files without needing to load that entire history into memory during active editing. Once we complete the initial implementation of this B-tree, we'll use it to build out a CRDT representing the state of the file system.
## More progress on the editor
While I've been focused on getting clarity in terms of version control, [@as-cii](https://github.com/as-cii) has continued to make progress on Xray itself. Last week he merged [a PR that adds support for horizontal scrolling](https://github.com/atom/xray/pull/90) the editor, which was a bit more challenging than it might sound.
To support horizontal scrolling, we need to know the width of the editor's content, which involves efficient tracking and measurement of the longest line. Previously, we maintained a vector of newline offsets as part of each chunk of inserted text to support efficient translation between 1-D and 2-D coordinates which we implemented by binary searching this vector. Antonio replaced this representation with a static binary tree, which is still stored inside a vector for efficiency. With the binary tree, we maintain the same offset information that was formerly available in the flat vector, but we also index maximal row lengths, which gives us the ability to request the longest row in an arbitrary region of the text in logarithmic time.
I'll be out next week on vacation, so Antonio plans to focus primarily on more editor features until I'm back on Monday, June 4th. He'll start with rendering a gutter and line numbers, which he already got started last week. In light of my absence, there's a good chance we could go another 2 weeks before the next update. Thanks for your patience.
================================================
FILE: docs/updates/2018_07_10.md
================================================
# Update for July 10, 2018
It's been a while since the last update, and I apologize for that. Our strategic direction has felt less clear to me over the past few weeks, and that lack of clarity combined with some difficulty in my personal life overcame my motivation to post for a while. I just wanted to turn inward and write code in relative isolation. Things are clearer and I'm feeling better, and I'd like to resume posting updates on a weekly basis and ask your forgiveness for the gap in communication.
## The emergence of Eon
When we demonstrated Xray for GitHub leadership in May, there was definitely interest in Xray's potential as a high-performance collaborative text editor that runs on the desktop or in the browser, but there was way *more* excitement about CRDTs and their potential to impact version control. At first, this feedback caused some cognitive dissonance for me. After working so hard on Xray, it wasn't easy to hear that what I considered to be an implementation detail was the most exciting aspect of what we had built. But the more I thought about it, the more intrigued I became with the application of CRDTs to version control. The idea had been floating around in my mind since early in the development of Teletype, but now I felt encouraged to take the idea more seriously.
After a bit of indecision, we decided to dive in. We've now shifted our focus to a new project called Eon, which enables real-time, fine-grained version control. Long term, we see Eon and Xray as two components of the same overall project. Eon will be an editor-agnostic datastore for fine-grained edit history that enables real-time synchronization. It will be like Git, but it will persist and synchronize changes at the granularity of individual keystrokes. We envision Xray as Eon's native user interface and the best showcase of its capabilities. One example is the idea of "layers", which are like commits that can be freely edited at any time.
Git never would have taken off if it had been trapped inside a particular editor, and so if we really want to maximize the utility of what we're building, it makes sense to be editor-agnostic at the core. That's why we've decided to focus on delivering Eon as a standalone project. It may look like we have stopped working on Xray, but since Xray will ultimately build on top of Eon, the spirit of the overall project continues.
Since I was presenting Eon at Qcon NYC, we briefly decided to pull out Eon into a separate repository, but then we decided that this was actually a bad idea. For now, we will [continue to develop Eon within the Xray mono-repo](https://github.com/atom/xray/tree/eon/eon) in order to keep the community and development focused in a single location.
## Progress on Eon
Previously, Xray's allowed you to invite guests into your workspace, but it was a centralized design. The workspace host owned all the files and serialized all guest requests to manipulate the file system. If the host dropped offline, the collaboration was over. With Eon, we're shooting for full decentralization. Multiple people can maintain a first-class replica of a given repository, just like Git.
To achieve that, over the past few weeks, we've been working on replicating the contents of the file system in addition to individual buffers. That means that if one person moves a directory while a collaborator adds a file inside of it, both parties will eventually converge to the same view of the world. It's proven to be a surprisingly complex problem.
We maintain a CRDT that represents the state of all the files and directories within the repository, but the only cross-platform way to detect file system changes is to scan the underlying directory structure and compare it to our in-memory representation. So far, we've focused only on directories, and we're caching inodes so we can detect when a directory is moved. We have yet to deal with files, which add the possibility of multiple hard links to the same file, but we're planning for them in our design. We also still need to deal with the fact that the file system might change in the middle of a scan, which might cause us to encounter a file or directory multiple times.
Once we detect a local change, we update the local index and create an operation to broadcast to other replicas. We've settled on a design in which each file or directory is assigned a unique identifier and associated with one or more *parent references*, which describe where that file is located in the tree. Directories can only have one parent reference since they cannot be hard linked, but files can have multiple. Additionally, directories are associated with *child references*, each of which has a name and corresponds to a parent reference elsewhere in the tree.
Each parent and child reference is a simple CRDT called a *last-writer wins register*. If a file is moved, we update its parent reference. If the same file is moved concurrently on another replica, we break the tie in a consistent way such that the file ends up in the same location in all replicas. Similarly, if two child references with the same name are created concurrently within a directory, only one of them will win across all replicas.
Inspired by the [Btrfs file system](https://en.wikipedia.org/wiki/Btrfs), we're storing the state of the file system in the same copy-on-write B-tree that we use to represent the contents of buffers. Our tree is implemented generically, enabling us to reuse the same code for different kinds of items. In the case of our file system representation, each item is a member of an enumeration, which allows us to store file metadata, parent references, and child references all within the same tree. Each parent and child reference is actually represented by multiple tree items that share a *reference id*. We enforce a total order between all items in the tree, honoring the leftmost item for any register as the current value of that register.
We've also enhanced Xray's original B-tree to allow nodes to be persisted in an external key-value store. This will allow us to maintain a history of how the file system has evolved, and we plan to allow interactions with our tree to filter out certain nodes based on a summary of their contents. This will enable us to avoid loading portions of the tree that contain items that aren't visible in a specific version of the tree, which will keep the memory footprint small for any single version while still allowing us to load past versions of the tree if desired.
In many ways arriving at our current approach was more challenging than coming up with the CRDT for text. We spent many days doing almost nothing but thinking and not writing much code, but now we're feeling pretty good about the design. It seems simple and almost obvious, which is probably a good sign that we're on the right track.
================================================
FILE: docs/updates/2018_07_16.md
================================================
# Update for July 16, 2018
## Breaking cycles
This week, we continued our focus on a fully replicated model of the file system. We're still focusing on directories only, driving our work with an integration test that randomly mutates multiple in-memory replicas of a file system tree and tests for convergence.
Mid-week, we hit a pretty major snag that we hadn't anticipated, but seems obvious in retrospect. Say you have two replicas of a tree that contains two subdirectories, `a` and `b`. At one replica, `a` is moved into `b`. Concurrently, on the other replica, `b` is moved into `a`. When we exchange operations, we end up with both directories in an orphaned cycle, with `a` referring to `b` as its parent and `b` referring to `a` as its parent, a state which we can't mirror to the underlying file system of either replica.
| Time | Replica 1 State | Replica 2 State |
|:-----| :-------------- | :------------------ |
| 0 | `a/` `b/` | `a/` `b/` |
| 1 | `a/b/` | `b/a/` |
| 2 | ??? | ??? |
For any set of concurrent moves, it's possible to create a cycle, and you could potentially create *multiple* different cycles that share directories in certain diabolical cases. Left untreated, these cycles end up disconnecting both directories from the root of the tree. We still have the data in the CRDT, but it can't be accessed via the file system. We need to break them.
We spent the second half of this week thinking about every possible approach to breaking the cycles while also preserving convergence, and we ended up arriving at two major alternatives.
The first approach is to preserve the operations that create the cycle, but find a way to break the cycle when we interpret the operations. The trouble is that cycles are always created by concurrent operations, but because this is a CRDT, it's possible for concurrent operations to arrive in different orders at different replicas. This means a decision to break a cycle is order-dependent, and may need to be reevaluated upon the arrival of a new operation. Our best idea is to create an arbitrary ordering of all operations based on Lamport timestamps and replica ids. When a new operation is inserted anywhere other than the end of this sequence, we integrate it and then reinterpret all subsequent operations based on a state of the tree that accounts for the new operation. It's definitely doable and preserves the purity of the CRDT, but it also seems complex and potentially slow. It also means that we could end up synthetically breaking a cycle only to determine later that we don't need to break the cycle due to the arrival of a concurrent operation. This could cause seemingly unrelated directories to appear out of nowhere upon the arrival of a concurrent operation, which could be pretty confusing depending on the integration delay. We'd like Eon to generalize to async use cases in addition to real-time, and these "phantom directories" seemed like a real negative for usability.
The second approach, which we've decided to go with, is sort of a principled hack. Whenever we interpret a move at a given replica that introduces a cycle, we look at every move operation that contributed to the cycle and synthesize a new operation that reverts the operation with the highest Lamport timestamp. We then broadcast this new operation to other participants. Depending on the order that various concurrent operations arrive at different replicas, we may end up reverting the same move redundantly or reverting multiple moves that participate in different variations of the same cycle. We considered this approach within the first hour of our discovery of the issue, but initially discarded it because it seemed to violate the spirit of CRDTs. It seems weird that integrating an operation should require us to generate a new operation in order to put the tree in a valid state. But after fully envisioning the complexity of the pure alternative, synthesizing operations seemed a lot more appealing. Breaking cycles via operations means that once a replica observes the effects of a given cycle being broken, they'll never see it "unbroken" due to the arrival of a concurrent operation. It also completely avoids the issue of totally ordering operations and reevaluating subsequent operations every time an operation arrives.
One consequence of either approach is that there could be certain combinations of operations that lead to a cycle that we never detect and break. That means that certain version vectors might yield tree states containing cycles and constrains the set of version vectors we should consider valid. This isn't a huge deal, because even without cycles, the constraints of causality already limit us to a subset of all possible version vectors if we want a valid interpretation of the tree. For example: If replica 0 creates a directory at sequence number 50 and replica 1 adds a subdirectory to it at sequence number 10, the state vector `{0: 20, 1: 10}` would contain a directory whose parent doesn't exist. If we limit ourselves to version vectors corresponding to actual states observed on a replica, we will have no problems.
## Homogenous trees
As I discussed in the previous update, we currently represent the state of the file tree inside a B-tree with heterogenous elements. Each tree item is either metadata, a child reference, or a parent reference. Now I'm realizing this is probably wrong. If we separated metadata, parent references, and child references into their own homogenous trees, we could probably simplify our code, reduce memory usage, and perform way pattern matching on the various enumeration variants. We plan to try separating the trees this week.
## Conclusion
For whoever is reading these updates, thanks for your interest. We're always interested in thoughts and feedback. Feel free to comment on this update's PR if there's anything you'd like to communicate.
================================================
FILE: docs/updates/2018_07_23.md
================================================
# Update for July 23, 2018
## Contributions
[@MoritzKn](https://github.com/MoritzKn) [fixed a bug](https://github.com/atom/xray/pull/115) where we were incorrectly calculating the position to place the cursor when inserting strings containing multibyte characters. Thanks!
## Convergence for replicated directory trees
Late last week we were able to achieve convergence in our randomized tests of replicated directory trees. As I mentioned in the last update, the biggest challenge was the possibility of concurrent moves introducing cycles. Our proposed solution of breaking cycles via synthetic "fixup" operations worked out well, but determining exactly *which* fixup operations to generate was still a challenging problem.
In certain diabolical scenarios, reverting a move to break one cycle could end up introducing a second cycle. By reverting *multiple* moves, however, it should always be possible to end up with a directory tree that is free of cycles, and so that's what we do. Whenever a cycle is detected, we continually revert the most recent move that contributes to that cycle, ignoring any moves that have already been reverted. Eventually, we're guaranteed to end up with a tree that's free of cycles. Though we don't have a formal proof to back up our intuition, [we've been unable to find a failing scenario over a million randomized trials](https://github.com/atom/xray/blob/6c49587aad45d7880449668e4b882267435ff763/eon/src/fs2.rs#L1523), and we're ready to move forward.
We applied the same "fixup" strategy to recover from directory name conflicts as well. When we attempt to insert a directory entry whose name conflicts with an existing entry, we compare the entries' Lamport timestamps and replica ids to select an entry that gets to keep the existing name. For the other entry, we append `~` characters until we find a name that does not conflict and synthesize a rename operation. In a real-time scenario, this situation should almost never occur, but if it does, renaming one of the directories means we can mirror the state of the CRDT to the file system without losing data. The users can then decide how to deal with the situation by deleting one of the directories or merging their contents.
## Interacting with the file system
For our convergence results to be useful outside the realm of automated tests, we need to communicate changes to and from the file system. That presents its own set of challenges, since we can't rely on our internal representation always being perfectly synchronized with the state of the disk. After confusing ourselves a bit too much trying to devise a strategy for file system synchronization that could cover every possible scenario, we've decided to focus on a few narrowly-defined situations on the critical path to a working demo.
* Read a tree into a new index: When the Eon daemon starts, we will need to read the current state of the tree into our internal representation.
* Write an index to a file system tree: When you want to clone a remote replica, we need to write its initial state to your local disk.
* Update an index from a tree: Once the daemon is started, we want to watch the file system for changes. When we detect a change, we will scan the directory tree to determine which directories have been inserted, removed, or moved.
* Write incoming operations to the disk: As operations come in, we interpret them relative to our internal index and translate them into writes to the file system.
For now, we've decided to rely on the fact that files and directories get associated with unique inode numbers in order to detect moves. In our previous attempt, we were hoping to not fully rely on inodes in hopes of covering cases such as the entire repository being recursively copied or another system like Git manipulating the file tree. Now we've decided we will deal with those scenarios in a separate pass once we get the basic scenario working. Tracking the mapping between our internal file identifiers and inodes makes everything much simpler.
One thing that makes it challenging (if not impossible) to mirror changes to the file system perfectly is the inability to perform file system operations atomically. When we receive a move operation from the network, we'll resolve abstract identifiers in the operation to actual paths on the local disk. If the disk's contents have changed in the meantime and we haven't heard about it, there's a potential for these paths to be wrong. To mitigate this issue, we will always confirm that the relevant paths exist and have the expected inode numbers before applying a remote operation. If we detect that our index has fallen behind the contents of the disk, we will defer handling the operation until the next update.
However, even if we determine that our index is consistent with the disk, this determination isn't atomic. In the microseconds between checking for consistency and performing the write, another change might invalidate our conclusion and cause the operation to fail. Worse, a change might cause the same paths to point to different inodes, meaning the operation would succeed but apply to different paths. Luckily, we anticipate this sort of situation to be extremely rare. It could only happen if a file at a given path was replaced with another in the moment between our consistency check and actually writing the operation. It might lead to surprising results, but we don't think the consequences are catastrophic.
Dealing with all of these problems and getting changes to and from the file system will be our focus for this week.
================================================
FILE: docs/updates/2018_07_31.md
================================================
# Update for July 31, 2018
## Contributions
[@Aerijo](https://github.com/Aerijo) encountered some confusion and took it upon himself to [update our contributing guide](https://github.com/atom/xray/pull/118) to ensure others wouldn't suffer the same fate. We really appreciate these kinds of improvements.
## Batched conflict resolution
As I mentioned in last week's update, having achieved convergence for replicated directory trees, last week we started down the path of mirroring changes from our internal CRDT-based representation to the underlying file system. After implementing file system reads and starting on randomized tests, we quickly realized that our previous mental model was incomplete.
In our previous tests of convergence, we applied operations to our in-memory representation one at a time, moving, inserting, and deleting each directory in serial. However, when scanning changes from the disk, this serial approach is impossible. We only see a snapshot of the file system's latest state, which could have been produced by a variety of different sequences of individual operations.
Consider the following directory structure, with two different directories that are both named "b". We'll label them `b(1)` and `b(2)` in our example to clearly identify them:
```
a/
a/b(1)
b(2)/
```
The next time we scan the file system, we observe that the directory structure has changed to the following:
```
a/
a/b(2)
b(1)/
```
The two directories named `b` have swapped their positions. If we naively apply the operations derived from this swap one at a time on a remote replica, we'll end up creating name conflicts. As I've discussed previously, we resolve name conflicts created by concurrent operations by appending a tilde character to one of the conflicting names. But in this case, appending a tilde would be incorrect, because the final state of the tree that we are trying to produce contains no actual conflicts.
To avoid spurious conflict resolutions, we moved from resolving conflicts after each operation to resolving conflicts after applying arbitrary batches of operations. It took a couple days to iron out all of the new issues and edge cases with this new approach in randomized testing, which took us until last Thursday. Finally, we managed to achieve convergence with the new approach to conflict resolution in a million randomized trials of 5 different peers applying 20 operations.
## Batched writes to the file system
The batched nature of operation application presented a puzzle for file system writes as well. Previously, we had planned on applying the effects of each operation as it arrived, but now we realized that wouldn't work. We needed to apply a batch of operations to the tree, resolve conflicts, and *only then* write changes in the new state of tree to the file system. Unlike our internal representation, which can temporarily tolerate intermediate states containing conflicts and cycles, each operation applied to the file system must ensure that the tree remains acyclic and free of name conflicts.
We ended up converging on the following approach: We maintain a set of the internal identifiers of all files we have inserted, moved, or removed in the course of applying a batch of operations. We then sort insertions and moves by the depth of the inserted path in the new tree and sort deletions last. By performing shallower insertions first, we ensure that the parent of any directory we are trying to insert always exists.
By performing shallower moves first, we ensure that we don't accidentally create cycles while rearranging directories. We don't have a formal proof, and we may need more empirical verification to be completely confident, but the intuition is as follows: A cycle can only be created by moving a directory downward to become one of its own descendants. Because we've broken cycles in the new tree, we should never encounter a situation in which a directory has been moved to become its own descendant in the final state of the new tree. A combination of moves could end up creating a cycle momentarily, but this cycle could only be created by moving a directory deeper in the tree. If we perform upward moves first, by the time we would be attempting to move a directory into one of its own descendants, we should have already moved that descendant to an equal or shallower depth. At least that's our intuition, and evidence so far is that it works.
Finally, we need to deal with temporary name conflicts that can occur when directories are shuffled around. We've opted to take an extremely simple approach. When performing a move on the file system would create a name conflict, we append tildes until we find a free name and record the fact that we have done so. When all operations have been applied, we go back and clean up, renaming directories with appended tildes back to their desired names. At this point, all of the conflicts should be resolved, and so we can do this without risk of conflict.
## Dealing with concurrency
The above approach worked in randomized trials at the end of last week, but we knew we were only solving part of the problem. Our initial implementation assumed that we were the only process writing to the file system. In reality, the file system can change out from under us at any time, meaning that we could be attempting to update the file system based on an outdated understanding of the file system's state.
To deal with this, before we integrate a batch of operations into our tree, we clone the tree's current state and as the `old_tree`. This represents our best guess as to the current state of the underlying file system. We then update the new tree, resolve conflicts, and start writing. For each file we need to update, we use the `old_tree` to determine the current location of the relevant directories on disk. Assuming a directory still exists at the path in question, we compare inode numbers to ensure it has the proper identity. Assuming our understanding of all the relevant paths is up to date, we can proceed with the file system write and update the `old_tree` accordingly.
If anything goes wrong, such as the path not existing, the path's inode not matching, or the write operation returning some kind of error, we need to pause the entire process and update our understanding of the old tree via a file system scan. As we integrate changes to the old tree, we produce operations which need to be applied to the new tree. Moves, deletions or conflict resolutions could end up changing the nature of operations we have still yet to write, requiring us to refresh and re-sort our pending writes after the old tree is updated.
At the time of writing, we have yet to achieve convergence in the presence of full concurrency with the underlying file system, but it seems like we are getting close. Hopefully we'll get there by the end of this week.
## Q3 Demo
Our focus during Q2 has been figuring out how to achieve optimistic replication on the entire file system as well as persistence of all operations, and we've nearly done it. Once we achieve this abstraction, we plan to shift our focus to showcasing its capabilities in a new demo.
We're still not clear on the details, but the basic idea is that you should be able to open a repository in Xray, then open a "streams" panel to view the latest state of all other working copies of that repository from other developers working in Xray, whether or not they are currently online. If a stream is being actively edited, you'll be able to collaborate. If that stream's author is offline, you'll be able to pick up where they left off. You'll also be able to fork a stream, though we probably won't finish merging before the end of the quarter.
We feel confident we can achieve that basic experience, but if we have time, we'd like to restore the conversation panel now that we will be able to persist anchors over the lifetime of a repository. We'd also like to find other ways to show off our operation-level history, such as the ability to play back operations.
We still plan for Eon (or whatever we end up calling it; I'm not sure if I like the name) to be a standalone tool that can integrate with other editors. But we need to drive its development with a real product experience, and the best way to do that is by producing a working demo.
## Vacation
Antonio is on vacation this week and next week, and I'll also be out next week to spend some quality time with my family. Due to this, expect a 2-week communication gap. We'll come back recharged to slash through randomized test failures and produce a demo of a whole new approach to collaboration. Thanks for reading!
================================================
FILE: docs/updates/2018_08_21.md
================================================
# Update for August 21, 2018
## *Eon* is now *Memo*
I chose the name *Eon* fairly hastily and ended up kind of disliking it. I wanted to change it almost immediately, but decided to hold off until I felt sure about its replacement. *Memo* is one character longer but just sounds better to me and reflects the system's ability to record every keystroke. It's kinda silly to worry this much about a name, but I just needed to change it. Now it's done. Moving on.
## Convergence for directory trees
The bigger news is that we've finally achieved convergence in our randomized tests of replicated directory trees. The problem ended up being way harder than we imagined. The final challenge was to fully simulate the possibility for the file system to change at any time, including during a directory scan.
We are cautiously optimistic that the worst of the algorithmic challenges could be behind us. Weeks of wading through randomized test failures has been a bit monotonous, but hopefully we can pick up some momentum building on top of this abstraction.
## Supporting text files and evolving the high-level structure
The next step is to add support for files to the directory tree, which we think should be easier. Much of what we learned dealing with pure directories can be applied to files, and since files are always leaf nodes we shouldn't need to deal with cycles. We *do* need to deal with hard links, however, which should add some complexity.
Supporting files also means we need to figure out the relationship between the CRDT that maintains the state of the directory tree and the CRDTs that contain the contents of individual text files.
This week seems like the right time to zoom out and get a bit more clarity on the system's higher level design. Until we had a working CRDT for directory trees that felt premature, but now it seems like understanding the big picture a bit better might inform the relationship between the directory tree and individual text files.
We've gone back and forth on whether we should try to decouple them, but for now we think we're going to try a more integrated approach where the directory tree CRDT has explicit knowledge of the file CRDTs. For now, we've decided to wrap both concerns in a single type called a `Timeline`, which will represent the full state of a working tree as it evolves forward in time. A `Repository` will contain multiple timelines which can evolve in parallel, fork, and eventually merge.
There's still quite a bit to figure out though. How will we route operations to and from buffers? What will the ownership structure look like? How can we ensure that performing I/O doesn't interfere with the responsiveness of the system? We'll hopefully have some conclusions about those questions and more to share in the next update.
================================================
FILE: docs/updates/2018_08_28.md
================================================
# Update for August 28, 2018
## Convergence for files and hard links
As predicted in the [last update](./2018_08_21.md), adding support for files and hard links to our directory tree CRDT went smoothly, and we achieved convergence in our randomized tests on Monday. Because hard links make it possible for the same file to appear in multiple locations, many code paths needed to be updated to work in terms of *references* rather than files. Happily, we had already anticipated hard links by allowing a file to be associated with multiple parent refs, so the path was mostly paved. Once we add support for file contents and confirm that everything works in an end-to-end test, we plan to post an in-depth write-up on the directory tree CRDT and do a documentation pass on the [timeline module](../../memo/src/timeline.rs).
## Next up, buffers
The file support added last week assumes that all files are empty. To allow files to be associated with editable content, we're adapting the [`buffer`](../../xray_core/src/buffer.rs) module from `xray_core` to work with Memo's [new B-tree](../../memo/src/btree.rs). The primary difference between the previous B-tree implementation and the new one is support for storing the tree's nodes in a database. This will allow us to store a file's entire history without loading old fragments into memory, but it also means that many methods now have the potential to perform I/O with the database and encounter I/O errors.
We'll need to adjust the `Buffer` APIs slightly to account for this potential. For example, we can no longer return an iterator that implements the `Iterator` trait, since `next` would need to return a `Result` type. We're also dropping some of the previous buffer's support for Xray's RPC system because we anticipate dealing with network interactions differently in Memo. We don't have complete clarity on our plans for dealing with networking just yet, but it makes sense to keep our assumptions minimal at this stage.
Once we get buffers implemented against our new B-tree, we'll need to integrate them into our timeline. We plan to maintain a mapping between file ids and the buffers that contain their contents, but the details will become clearer once we get into it. Buffers will need to be integrated with up to three distinct sources of I/O: the file system for reading/saving contents, the network for collaboration, and the database for history persistence. It should be a fun design problem to give them a convenient API while addressing all of those concerns.
================================================
FILE: docs/updates/2018_09_14.md
================================================
# Update for September 14, 2018
It's been an intense couple of weeks, but we're coming out of it with more clarity than ever on the future direction of Memo. We're entering a new phase of the project where we distill the research of the last few months into a usable form. Thanks for your patience with the radio silence the last couple of weeks.
## Embracing Git
Our previous vision for Memo was to store the full operational history for the repository in a global database, so that each file's full history would be available in a single structure dating back to its creation. This would essentially duplicate the role of Git as a content tracker for the repository, but with a much more fine-grained resolution. It may eventually make sense to build a global operation index to enable advanced features and analysis, but I don't think it makes sense to conceive of such an index as an independent version control system. For async collaboration, CRDTs probably won't offer enough advantages to induce people to switch away from Git. Even if we managed to build such a system, it would always need to interoperate with Git. So we may as well embrace that reality and build on top of Git. We can then focus on the area where CRDTs have their greatest strength: real-time collaboration and recording the fine-grained edits that occur *between* Git commits.
Augmenting Git is definitely something I've considered in the past, but it's finally becoming clearer how we can achieve it. We will start by packaging the previous months' work into a library that is similar to `teletype-crdt`. With Teletype, you work with individual buffers. Each local edit returns one or more operations to apply on remote replicas, and applying remote operations returns a diff that describes the impact of those operations on the local replica. Memo will expand the scope of this abstraction from individual buffers to the working tree, but it won't represent the full state of this tree in the form of operations. Instead, we'll exploit the fact that Git commits provide a common synchronization point. The library will expect any data that's committed to the Git repository to be supplied separately from the operations.
By making the CRDT representation sparse and leaning on Git to synchronize the bulk of the working tree, we reduce the memory footprint of the CRDT to the point where it can reasonably be kept resident in memory. This also bounds the bandwidth required to replicate the full structure, which obviates the need for complex partial data fetching schemes that we were considering previously. This in turn greatly simplifies backend infrastructure. Because a sparse representation should always be small enough to fully reconstruct from raw operations on the client, server side infrastructure shouldn't need to process operations in any way other than simply storing them based on the identifier of the patch to which they apply.
## The challenge of undo
One big obstacle to making this patch-based representation work is undo. In `teletype-crdt`, we implement undo by associating every operation with a counter. If an operation's undo counter is odd, we consider the operation to be undone and therefore invisible. If the operation's undo counter is even, we consider the operation to be visible. If two users undo or redo the same operation concurrently, they'll both assign its undo count to the same number, which preserves both users' intentions of undoing the operation and avoids their actions doubling up or cancelling each other out, which could occur in some other schemes. However, implementing undo in this way comes with a cost, which is that in order for me to undo an operation that is present in my local history, I need to rely on that operation being present in the history of all of my collaborators. This approach to undo combines poorly with resetting to an empty CRDT on each commit, because it forces everyone to clear their undo stack after committing since there won't be any way to refer to prior operations in order to update their undo counters.
This felt like a show-stopper to me until I had a conversation with [@jeffrafter](https://github.com/jeffrafter) about his team's experience using `teletype-crdt` in [Tiny](https://tttiny.com/). I don't have a perfect understanding of the details of their approach, but they essentially bypass Teletype's built-in undo system and maintain their own history on each client independently of the CRDT. When a user performs an undo, they simply apply its to the current CRDT and broadcast it as a new operation. When I asked about some of the more diabolical concurrency scenarios that the counters were designed to eliminate, Jeff simply replied that it's working for them in practice.
Inspired by their experience, I have a hunch that we can implement undo similarly in our library. For each buffer, we can maintain two CRDTs. One will serve as a local non-linear history that allows us to understand the effects of undoing operations that aren't the most recent change to the buffer. We'll perform undos against this local history first, then apply their affects to a CRDT that starts at the most recent commit. This will generate remote operations we know can be cleanly applied by all participants. The local history can be retained across several commits and even be stored locally. By fetching operations from previous commits, we could even construct such a history for clients that are new to the collaboration.
## Stable file references
We need to be able to refer to files in a universal way, but with this hybrid approach, only *new* files are assigned identifiers by operations. This stumped us for a bit, until an obvious solution occurred to us. The set of paths in the base Git commit is the same for every replica, so we can sort these paths lexicographically and assign each an identifier based on its position in this fixed order. Internally, file identifiers are a Rust enum with two possible variants, `Base`, which wraps a `u64`, and `New`, which wraps a local timestamp generated on the replica that created the file.
## The big picture
By being agnostic to plumbing and building a library that operates purely in terms of operations and data, this software should be useful in a broader array of applications. We plan to distribute a WebAssembly version to enable collaboration in browser-based environments, along with a native executable that can talk to editors and synchronize our CRDT with an underlying file system like we originally envisioned. The operations can serve as a kind of common real-time collaboration protocol. As long as an application can send and receive operations and feed them into this library, it should be capable of real-time collaboration with other applications.
In light of these shifts in our thinking, I've updated the [Memo README](../../README.md) to reflect the current state of the world. Some details about the implementation have been dropped, but I plan to reintroduce them over time as our implementation stabilizes. At some point soon, it may make sense to again pull Memo out into its own repository that is separate from Xray. If that happens, I'll keep everyone posted here.
================================================
FILE: docs/updates/2018_10_02.md
================================================
# Update for October 2, 2018
## Shipped an initial light client
Last week, we [shipped](https://github.com/atom/xray/pull/135) an initial version of Memo JS, a light-client implementation of Memo that can be used as a library in web-based applications. To start with, we're assuming that the file system is completely virtual and that all changes are routed directly through the library. This meant that we ended up temporarily shelving a lot of the work we did to synchronize our tree CRDT with an external file system, but we still plan to take advantage of that research in order to build the full client that's capable of observing an external repository. Shipping the light client first will hopefully let us get some feedback and iterate on other aspects of the protocol's design before introducing the complexity of interoperating with an external file system.
## Next, Git operations
Currently, a Memo `WorkTree` always starts at a base commit and builds forward indefinitely with operations. We assume that application code will be responsible for tearing the work tree down and rebuilding it following a commit. The next step is pull this concern into Memo itself and to allow the base commit of a replicated work tree to *change* over time due to operations on the underlying repository such as committing, resetting, and checking out different branches.
We're still in the middle of figuring this out. It's murky and our thinking is still in flux. We're focused on the light client currently, which simplifies our API and reduces complexity, but we still want a design that will work when we do eventually synchronize to the file system. It's somewhat unclear whether we should just start focusing on integrating with the file system now, or alternatively completely ignore the concerns of the file system and hope we can make adjustments later. For now though, here's what is emerging.
### Epochs
We divide the evolution of the work tree into *epochs*. Each epoch begins with a specific commit from the underlying repository that gives all replicas a common frame of reference, then applies additional operations on top to represent uncommitted changes in that epoch. There is one and only one *active epoch* at any time on a given replica. All operations are tagged with an epoch id, and the local counters used to identify operations are reset to zero at the start of each epoch. Someone joining the collaboration should only need to fetch operations associated with the most recent epoch.
When a user performs a local Git operation such as a commit or a reset, they broadcast the creation of a new epoch. Because users can create new epochs concurrently, we always honor the epoch creation with the most recent Lamport timestamp at every replica, which will provide an arbitrary but consistent behavior for concurrent epoch creations while also respecting causality in the sequential case.
### Resets
Collaborators can reset the HEAD of the working copy to an arbitrary ref. In that case, we need to create a new epoch. Depending on the nature of the reset and the state of the file system, there may be uncommitted changes on disk. We'd also like to incorporate the concept of unsaved changes when we integrate with the file system. Both uncommitted changes and unsaved changes will need to be translated into synthetic operations that build upon the new epoch's base commit.
When the epoch creation arrives at remote replicas, it seems like they will have no choice but to perform I/O in order to scan the epoch's base entries into the tree. The base state of open buffers may also need to be re-read, and some of these open buffers may be for files that no longer exist in the new epoch's base commit.
This is where things start to feel pretty messy and confusing. What happens to these "untethered" buffers? Do we empty out the tree and build it back up as we perform I/O on the base entries, or do we preserve the old state until the new state is ready. How do races with the file system complicate all of this?
### Commits
Commits create a new epoch whose state is derived from a previous epoch, although due to the potential for concurrent commits and resets, a commit doesn't always derive from the active epoch on a given replica. Ignoring the potential for partial staging for the moment, when a user creates a commit, we can characterize what they committed via a version vector that includes all observed operations in the current epoch.
If a replica receives a commit based on the active epoch (which should be the most common case), we should be able to determine their base entries without performing I/O. This is because the state that was committed should already be available as a subset of operations they have already seen, as characterized by the version vector. This would allow us to update the tree to its new state synchronously in a very common case.
On the other hand, there's no guarantee that a commit is going to based on the active epoch thanks to diabolical concurrency scenarios, and this seems to mean that we may end up needing to do I/O anyways in some scenarios. That makes us wonder whether we should focus first on the ability to reset the base commit in arbitrary ways and treat commits as a special case of that.
## Conclusion
This is a hard problem. We've made it through one wave of complexity to encounter another, and presumably that will continue. Every decision seems to be entangled with everything else, and even this summary just scratches the surface of the thought process behind this problem. But despite the daunting complexity, I'm still excited by the idea of a fully-replicated Git working copy. Git operations are the next summit to climb, and I imagine there will be more wilderness before we can settle in the fertile valley of conflict free replicated paradise.
================================================
FILE: memo_core/Cargo.toml
================================================
[package]
name = "memo_core"
version = "0.1.0"
authors = ["Antonio Scandurra ", "Nathan Sobo "]
edition = "2018"
[dependencies]
diffs = "0.3"
lazy_static = "1.0"
flatbuffers = "0.5"
futures = "0.1"
serde = "1.0"
serde_derive = "1.0"
smallvec = "0.6.1"
uuid = { version = "0.7", features = ["serde"] }
[dev-dependencies]
futures-cpupool = "0.1"
rand = "0.3"
uuid = { version = "0.7", features = ["serde", "u128"] }
================================================
FILE: memo_core/README.md
================================================
# Memo – Real-time collaboration for Git
**This project is a work in progress. This README defines the vision, but it isn't fully implemented yet.**
On its own, Git can only synchronize changes between clones of a repository after the changes are committed, which forces an asynchronous collaboration workflow. A repository may be replicated across several machines, but the working copies on each of these machines are completely independent of one another.
Memo's goal is to extend Git to allow a single working copy to be replicated across multiple machines. Memo uses conflict-free replicated data types (CRDTs) to record all uncommitted changes for a working copy, allowing changes to be synchronized in real time across multiple replicas as they are actively edited. Memo also maintains an operation-based record of all changes, augmenting Git's commit graph with the fine-grained edit history behind each commit.
Memo is divided into the following major components:
* Protocol: Memo can be thought of as a protocol for real-time change synchronization between working copies that will eventually be open for anyone to implement.
* Library: Memo provides a reference library implementation written in Rust that produces and consumes the Memo protocol messages to synchronize working trees. We plan to ship a "light client" version of the library that compiles to WebAssembly and exposes a virtual file system API, as well as a full version based on Libgit2 that synchronizes with a full replica on the local file system. The libraries could be used in web- or desktop-based editors to enable real-time collaboration on a shared working copy of a Git repository.
* Executable daemon: Memo will provide an executable (also written in Rust) that runs as a daemon process on the local machine. It will synchronize with an underlying file system and expose an RPC interface to support integrations with a variety of editors for collaborative buffer editing.
* Xray: Memo spun out of Xray, which was an experiment to build a collaborative text editor. After the library stabilizes, we may decide to resume development of Xray as a first-class collaborative editor that is designed with Memo in mind. For now, we view the development of the more generalized technology as more important than building a new editor.
Interesting features / design priorities are as follows:
* Based on Git: When it comes to async collaboration and coarse-grained change synchronization, it's hard to beat Git. Memo doesn't try. Our goal is to enable Git users to share a single working copy and relay changes in real time. We may implement the ability to "fork" the state of a working copy, but we don't plan to implement asynchronous features such as branching and merging in terms of conflict-free replicated data types. For that you will continue to use Git. We will strive not to send or store any data that can already be derived from the state of the Git repository.
* Distributed: Like Git, Memo is fully distributed. This means that no replica is privileged over any other. No specific network topology will be enforced by our core algorithms and it will be possible to disseminate operations in arbitrary ways.
* Covers the whole working tree: Memo will merge concurrent edits to files along with modifications of the file system tree. One person can edit a file while another person moves it to a new directory, etc.
* Open and general purpose: We want Memo to feel similar to Git, a tool that can be integrated in a variety of workflows and environments. We may build more centralized experiences on top of it, but the core protocol should remain open and decentralized.
* More than just the source code: One of Memo's primary use cases is real-time collaboration, but effectively collaborating on source code often requires support from the environment to compile, run tests, statically analyze, etc. We intend to extend Memo's protocol to support primitives such as streams and shared buffers, which could support log output or a shared terminal, and annotations, which could support static analysis. An ideal scenario might see two developers with full replicas collaborating with a third developer in a browser, all viewing diagnostics generated by a language server running against a replica in the cloud and viewing test output from another machine.
A fundamental goal is to make the distinction between physical machines less relevant during the actual process of writing code. Today, most code is developed locally, while some code may be developed in cloud-based IDEs. It shouldn't actually matter *where* the working tree is located, and it might be replicated to multiple machines simultaneously which are all contributing something to the overall experience of the participating developers.
================================================
FILE: memo_core/rustfmt.toml
================================================
edition = "2018"
================================================
FILE: memo_core/script/compile_flatbuffers
================================================
#!/bin/bash
flatc --rust -o src/serialization src/serialization/schema.fbs
# Workaround for incorrect code generation by flatc
echo "use flatbuffers::EndianScalar;" >> src/serialization/schema_generated.rs
================================================
FILE: memo_core/src/btree.rs
================================================
use smallvec::SmallVec;
use std::cmp::Ordering;
use std::fmt;
use std::ops::{Add, AddAssign};
use std::sync::Arc;
#[cfg(test)]
const TREE_BASE: usize = 2;
#[cfg(not(test))]
const TREE_BASE: usize = 16;
pub trait Item: Clone + Eq + fmt::Debug {
type Summary: for<'a> AddAssign<&'a Self::Summary> + Default + Clone + fmt::Debug;
fn summarize(&self) -> Self::Summary;
}
pub trait KeyedItem: Item {
type Key: Dimension;
fn key(&self) -> Self::Key;
}
pub trait Dimension:
for<'a> Add<&'a Self, Output = Self> + for<'a> AddAssign<&'a Self> + Ord + Clone + fmt::Debug
{
fn from_summary(summary: &Summary) -> Self;
fn default() -> Self {
Self::from_summary(&Summary::default()).clone()
}
}
#[derive(Debug, Clone)]
pub struct Tree(Arc>);
#[derive(Debug)]
pub enum Node {
Internal {
height: u8,
summary: T::Summary,
child_summaries: SmallVec<[T::Summary; 2 * TREE_BASE]>,
child_trees: SmallVec<[Tree; 2 * TREE_BASE]>,
},
Leaf {
summary: T::Summary,
items: SmallVec<[T; 2 * TREE_BASE]>,
},
}
#[derive(Clone)]
pub struct Cursor {
tree: Tree,
stack: SmallVec<[(Tree, usize, T::Summary); 16]>,
summary: T::Summary,
did_seek: bool,
at_end: bool,
}
pub struct FilterCursor bool, T: Item> {
cursor: Cursor,
filter_node: F,
}
#[derive(Eq, PartialEq)]
pub enum SeekBias {
Left,
Right,
}
#[derive(Debug)]
pub enum Edit {
Insert(T),
Remove(T),
}
impl Tree {
pub fn new() -> Self {
Tree(Arc::new(Node::Leaf {
summary: T::Summary::default(),
items: SmallVec::new(),
}))
}
pub fn from_item(item: T) -> Self {
let mut tree = Self::new();
tree.push(item);
tree
}
#[allow(dead_code)]
pub fn items(&self) -> Vec {
let mut items = Vec::new();
let mut cursor = self.cursor();
cursor.descend_to_first_item(self.clone(), |_| true);
loop {
if let Some(item) = cursor.item() {
items.push(item);
} else {
break;
}
cursor.next();
}
items
}
pub fn cursor(&self) -> Cursor {
Cursor::new(self.clone())
}
pub fn filter(&self, filter_node: F) -> FilterCursor
where
F: Fn(&T::Summary) -> bool,
{
FilterCursor::new(self, filter_node)
}
#[allow(dead_code)]
pub fn first(&self) -> Option {
self.leftmost_leaf().0.items().first().cloned()
}
pub fn last(&self) -> Option {
self.rightmost_leaf().0.items().last().cloned()
}
pub fn extent>(&self) -> D {
match self.0.as_ref() {
Node::Internal { summary, .. } => D::from_summary(summary).clone(),
Node::Leaf { summary, .. } => D::from_summary(summary).clone(),
}
}
pub fn summary(&self) -> T::Summary {
match self.0.as_ref() {
Node::Internal { summary, .. } => summary.clone(),
Node::Leaf { summary, .. } => summary.clone(),
}
}
#[cfg(test)]
pub fn is_empty(&self) -> bool {
match self.0.as_ref() {
Node::Internal { .. } => false,
Node::Leaf { items, .. } => items.is_empty(),
}
}
pub fn extend(&mut self, iter: I)
where
I: IntoIterator- ,
{
let mut leaf: Option> = None;
for item in iter {
if leaf.is_some() && leaf.as_ref().unwrap().items().len() == 2 * TREE_BASE {
self.push_tree(Tree(Arc::new(leaf.take().unwrap())));
}
if leaf.is_none() {
leaf = Some(Node::Leaf:: {
summary: T::Summary::default(),
items: SmallVec::new(),
});
}
let leaf = leaf.as_mut().unwrap();
*leaf.summary_mut() += &item.summarize();
leaf.items_mut().push(item);
}
if leaf.is_some() {
self.push_tree(Tree(Arc::new(leaf.take().unwrap())));
}
}
pub fn push(&mut self, item: T) {
self.push_tree(Tree::from_child_trees(vec![Tree(Arc::new(Node::Leaf {
summary: item.summarize(),
items: SmallVec::from_vec(vec![item]),
}))]))
}
pub fn push_tree(&mut self, other: Self) {
let other_node = other.0.clone();
if !other_node.is_leaf() || other_node.items().len() > 0 {
if self.0.height() < other_node.height() {
for tree in other_node.child_trees() {
self.push_tree(tree.clone());
}
} else if let Some(split_tree) = self.push_tree_recursive(other) {
*self = Self::from_child_trees(vec![self.clone(), split_tree]);
}
}
}
fn push_tree_recursive(&mut self, other: Tree) -> Option> {
match Arc::make_mut(&mut self.0) {
Node::Internal {
height,
summary,
child_summaries,
child_trees,
..
} => {
let other_node = other.0.clone();
*summary += other_node.summary();
let height_delta = *height - other_node.height();
let mut summaries_to_append = SmallVec::<[T::Summary; 2 * TREE_BASE]>::new();
let mut trees_to_append = SmallVec::<[Tree; 2 * TREE_BASE]>::new();
if height_delta == 0 {
summaries_to_append.extend(other_node.child_summaries().iter().cloned());
trees_to_append.extend(other_node.child_trees().iter().cloned());
} else if height_delta == 1 && !other_node.is_underflowing() {
summaries_to_append.push(other_node.summary().clone());
trees_to_append.push(other)
} else {
let tree_to_append = child_trees.last_mut().unwrap().push_tree_recursive(other);
*child_summaries.last_mut().unwrap() =
child_trees.last().unwrap().0.summary().clone();
if let Some(split_tree) = tree_to_append {
summaries_to_append.push(split_tree.0.summary().clone());
trees_to_append.push(split_tree);
}
}
let child_count = child_trees.len() + trees_to_append.len();
if child_count > 2 * TREE_BASE {
let left_summaries: SmallVec<_>;
let right_summaries: SmallVec<_>;
let left_trees;
let right_trees;
let midpoint = (child_count + child_count % 2) / 2;
{
let mut all_summaries = child_summaries
.iter()
.chain(summaries_to_append.iter())
.cloned();
left_summaries = all_summaries.by_ref().take(midpoint).collect();
right_summaries = all_summaries.collect();
let mut all_trees =
child_trees.iter().chain(trees_to_append.iter()).cloned();
left_trees = all_trees.by_ref().take(midpoint).collect();
right_trees = all_trees.collect();
}
*summary = sum(left_summaries.iter());
*child_summaries = left_summaries;
*child_trees = left_trees;
Some(Tree(Arc::new(Node::Internal {
height: *height,
summary: sum(right_summaries.iter()),
child_summaries: right_summaries,
child_trees: right_trees,
})))
} else {
child_summaries.extend(summaries_to_append);
child_trees.extend(trees_to_append);
None
}
}
Node::Leaf { summary, items, .. } => {
let other_node = other.0;
let child_count = items.len() + other_node.items().len();
if child_count > 2 * TREE_BASE {
let left_items;
let right_items: SmallVec<[T; 2 * TREE_BASE]>;
let midpoint = (child_count + child_count % 2) / 2;
{
let mut all_items = items.iter().chain(other_node.items().iter()).cloned();
left_items = all_items.by_ref().take(midpoint).collect();
right_items = all_items.collect();
}
*items = left_items;
*summary = sum_owned(items.iter().map(|item| item.summarize()));
Some(Tree(Arc::new(Node::Leaf {
summary: sum_owned(right_items.iter().map(|item| item.summarize())),
items: right_items,
})))
} else {
*summary += other_node.summary();
items.extend(other_node.items().iter().cloned());
None
}
}
}
}
fn from_child_trees(child_trees: Vec>) -> Self {
let height = child_trees[0].0.height() + 1;
let mut child_summaries = SmallVec::new();
for child in &child_trees {
child_summaries.push(child.0.summary().clone());
}
let summary = sum(child_summaries.iter());
Tree(Arc::new(Node::Internal {
height,
summary,
child_summaries,
child_trees: SmallVec::from_vec(child_trees),
}))
}
fn leftmost_leaf(&self) -> Tree {
match *self.0 {
Node::Leaf { .. } => self.clone(),
Node::Internal {
ref child_trees, ..
} => child_trees.first().unwrap().leftmost_leaf(),
}
}
fn rightmost_leaf(&self) -> Tree {
match *self.0 {
Node::Leaf { .. } => self.clone(),
Node::Internal {
ref child_trees, ..
} => child_trees.last().unwrap().rightmost_leaf(),
}
}
}
impl Tree {
pub fn insert(&mut self, item: T) {
let mut cursor = self.cursor();
let mut new_tree = cursor.slice(&item.key(), SeekBias::Left);
new_tree.push(item);
new_tree.push_tree(cursor.suffix::());
*self = new_tree;
}
pub fn edit(&mut self, edits: &mut [Edit]) {
if edits.is_empty() {
return;
}
edits.sort_unstable_by_key(|item| item.key());
let mut cursor = self.cursor();
let mut new_tree = Tree::new();
let mut buffered_items = Vec::new();
cursor.seek(&T::Key::default(), SeekBias::Left);
for edit in edits {
let new_key = edit.key();
let mut old_item = cursor.item();
if old_item
.as_ref()
.map_or(false, |old_item| old_item.key() < new_key)
{
new_tree.extend(buffered_items.drain(..));
let slice = cursor.slice(&new_key, SeekBias::Left);
new_tree.push_tree(slice);
old_item = cursor.item();
}
if old_item.map_or(false, |old_item| old_item.key() == new_key) {
cursor.next();
}
match edit {
Edit::Insert(item) => {
buffered_items.push(item.clone());
}
Edit::Remove(_) => {}
}
}
new_tree.extend(buffered_items);
new_tree.push_tree(cursor.suffix::());
*self = new_tree;
}
}
impl Node {
fn is_leaf(&self) -> bool {
match self {
Node::Leaf { .. } => true,
_ => false,
}
}
fn height(&self) -> u8 {
match self {
Node::Internal { height, .. } => *height,
Node::Leaf { .. } => 0,
}
}
fn summary(&self) -> &T::Summary {
match self {
Node::Internal { summary, .. } => summary,
Node::Leaf { summary, .. } => summary,
}
}
fn child_summaries(&self) -> &[T::Summary] {
match self {
Node::Internal {
child_summaries, ..
} => child_summaries.as_slice(),
Node::Leaf { .. } => panic!("Leaf nodes have no child summaries"),
}
}
fn child_trees(&self) -> &SmallVec<[Tree; 2 * TREE_BASE]> {
match self {
Node::Internal { child_trees, .. } => child_trees,
Node::Leaf { .. } => panic!("Leaf nodes have no child trees"),
}
}
fn items(&self) -> &SmallVec<[T; 2 * TREE_BASE]> {
match self {
Node::Leaf { items, .. } => items,
Node::Internal { .. } => panic!("Internal nodes have no items"),
}
}
fn items_mut(&mut self) -> &mut SmallVec<[T; 2 * TREE_BASE]> {
match self {
Node::Leaf { items, .. } => items,
Node::Internal { .. } => panic!("Internal nodes have no items"),
}
}
fn summary_mut(&mut self) -> &mut T::Summary {
match self {
Node::Internal { summary, .. } => summary,
Node::Leaf { summary, .. } => summary,
}
}
fn is_underflowing(&self) -> bool {
match self {
Node::Internal { child_trees, .. } => child_trees.len() < TREE_BASE,
Node::Leaf { items, .. } => items.len() < TREE_BASE,
}
}
}
impl Clone for Node {
fn clone(&self) -> Self {
match self {
Node::Internal {
height,
summary,
child_summaries,
child_trees,
..
} => Node::Internal {
height: *height,
summary: summary.clone(),
child_summaries: child_summaries.clone(),
child_trees: child_trees.clone(),
},
Node::Leaf { summary, items, .. } => Node::Leaf {
summary: summary.clone(),
items: items.clone(),
},
}
}
}
impl Cursor {
fn new(tree: Tree) -> Self {
Self {
tree,
stack: SmallVec::new(),
summary: T::Summary::default(),
did_seek: false,
at_end: false,
}
}
fn reset(&mut self) {
self.did_seek = false;
self.at_end = false;
self.stack.truncate(0);
self.summary = T::Summary::default();
}
pub fn start>(&self) -> D {
D::from_summary(&self.summary).clone()
}
pub fn end>(&self) -> D {
if let Some(item) = self.item() {
self.start::() + &D::from_summary(&item.summarize())
} else {
self.start::()
}
}
pub fn item(&self) -> Option {
assert!(self.did_seek, "Must seek before calling this method");
if let Some((subtree, index, _)) = self.stack.last() {
match *subtree.0 {
Node::Leaf { ref items, .. } => {
if *index == items.len() {
None
} else {
Some(items[*index].clone())
}
}
_ => unreachable!(),
}
} else {
None
}
}
pub fn prev_item(&self) -> Option {
assert!(self.did_seek, "Must seek before calling this method");
if let Some((cur_leaf, index, _)) = self.stack.last() {
if *index == 0 {
if let Some(prev_leaf) = self.prev_leaf() {
let prev_leaf = prev_leaf.0;
Some(prev_leaf.items().last().unwrap().clone())
} else {
None
}
} else {
match *cur_leaf.0 {
Node::Leaf { ref items, .. } => Some(items[index - 1].clone()),
_ => unreachable!(),
}
}
} else if self.at_end {
self.tree.last()
} else {
None
}
}
fn prev_leaf(&self) -> Option> {
for (ancestor, index, _) in self.stack.iter().rev().skip(1) {
if *index != 0 {
match *ancestor.0 {
Node::Internal {
ref child_trees, ..
} => return Some(child_trees[index - 1].rightmost_leaf()),
Node::Leaf { .. } => unreachable!(),
};
}
}
None
}
pub fn prev(&mut self) {
assert!(self.did_seek, "Must seek before calling this method");
if self.at_end {
self.summary = T::Summary::default();
let root = self.tree.clone();
self.descend_to_last_item(root);
self.at_end = false;
} else {
while let Some((subtree, index, _)) = self.stack.pop() {
if index > 0 {
let new_index = index - 1;
self.summary = self
.stack
.last()
.map_or(T::Summary::default(), |(_, _, summary)| summary.clone());
match subtree.0.as_ref() {
Node::Internal {
child_trees,
child_summaries,
..
} => {
for summary in &child_summaries[0..new_index] {
self.summary += summary;
}
self.stack
.push((subtree.clone(), new_index, self.summary.clone()));
self.descend_to_last_item(child_trees[new_index].clone());
}
Node::Leaf { items, .. } => {
for item in &items[0..new_index] {
self.summary += &item.summarize();
}
self.stack
.push((subtree.clone(), new_index, self.summary.clone()));
}
}
break;
}
}
}
}
pub fn next(&mut self) {
self.next_internal(|_| true)
}
fn next_internal(&mut self, filter_node: F)
where
F: Fn(&T::Summary) -> bool,
{
assert!(self.did_seek, "Must seek before calling this method");
if self.stack.is_empty() {
if !self.at_end {
let root = self.tree.clone();
self.descend_to_first_item(root, filter_node);
}
} else {
while self.stack.len() > 0 {
let new_subtree = {
let (subtree, index, summary) = self.stack.last_mut().unwrap();
match subtree.0.as_ref() {
Node::Internal {
child_trees,
child_summaries,
..
} => {
while *index < child_summaries.len() {
*summary += &child_summaries[*index];
*index += 1;
if let Some(next_summary) = child_summaries.get(*index) {
if filter_node(next_summary) {
break;
} else {
self.summary += next_summary;
}
}
}
child_trees.get(*index).cloned()
}
Node::Leaf { items, .. } => loop {
let item_summary = items[*index].summarize();
self.summary += &item_summary;
*summary += &item_summary;
*index += 1;
if let Some(next_item) = items.get(*index) {
if filter_node(&next_item.summarize()) {
return;
}
} else {
break None;
}
},
}
};
if let Some(subtree) = new_subtree {
self.descend_to_first_item(subtree, filter_node);
break;
} else {
self.stack.pop();
}
}
}
self.at_end = self.stack.is_empty();
}
fn descend_to_first_item(&mut self, mut subtree: Tree, filter_node: F)
where
F: Fn(&T::Summary) -> bool,
{
self.did_seek = true;
loop {
subtree = match *subtree.0 {
Node::Internal {
ref child_trees,
ref child_summaries,
..
} => {
let mut new_index = None;
for (index, summary) in child_summaries.iter().enumerate() {
if filter_node(summary) {
new_index = Some(index);
break;
}
self.summary += summary;
}
if let Some(new_index) = new_index {
self.stack
.push((subtree.clone(), new_index, self.summary.clone()));
child_trees[new_index].clone()
} else {
break;
}
}
Node::Leaf { ref items, .. } => {
let mut new_index = None;
for (index, item) in items.iter().enumerate() {
let summary = item.summarize();
if filter_node(&summary) {
new_index = Some(index);
break;
}
self.summary += &summary;
}
if let Some(new_index) = new_index {
self.stack
.push((subtree.clone(), new_index, self.summary.clone()));
}
break;
}
}
}
}
fn descend_to_last_item(&mut self, mut subtree: Tree) {
self.did_seek = true;
loop {
match subtree.0.clone().as_ref() {
Node::Internal {
child_trees,
child_summaries,
..
} => {
for summary in &child_summaries[0..child_summaries.len() - 1] {
self.summary += summary;
}
self.stack
.push((subtree.clone(), child_trees.len() - 1, self.summary.clone()));
subtree = child_trees.last().unwrap().clone();
}
Node::Leaf { items, .. } => {
let last_index = items.len().saturating_sub(1);
for item in &items[0..last_index] {
self.summary += &item.summarize();
}
self.stack
.push((subtree.clone(), last_index, self.summary.clone()));
break;
}
}
}
}
pub fn seek(&mut self, pos: &D, bias: SeekBias) -> bool
where
D: Dimension,
{
self.reset();
self.seek_internal(pos, bias, None)
}
pub fn seek_forward(&mut self, pos: &D, bias: SeekBias) -> bool
where
D: Dimension,
{
self.seek_internal(pos, bias, None)
}
pub fn slice(&mut self, end: &D, bias: SeekBias) -> Tree
where
D: Dimension,
{
let mut slice = Tree::new();
self.seek_internal(end, bias, Some(&mut slice));
slice
}
pub fn suffix(&mut self) -> Tree
where
D: Dimension,
{
let extent = self.tree.extent::();
let mut slice = Tree::new();
self.seek_internal(&extent, SeekBias::Right, Some(&mut slice));
slice
}
fn seek_internal(
&mut self,
target: &D,
bias: SeekBias,
mut slice: Option<&mut Tree>,
) -> bool
where
D: Dimension,
{
let mut pos = D::from_summary(&self.summary).clone();
debug_assert!(target >= &pos);
let mut containing_subtree = None;
if self.did_seek {
'outer: while self.stack.len() > 0 {
{
let (parent_subtree, index, _) = self.stack.last_mut().unwrap();
match *parent_subtree.0 {
Node::Internal {
ref child_summaries,
ref child_trees,
..
} => {
*index += 1;
while *index < child_summaries.len() {
let child_tree = &child_trees[*index];
let child_summary = &child_summaries[*index];
let mut child_end = pos;
child_end += &D::from_summary(&child_summary);
let comparison = target.cmp(&child_end);
if comparison == Ordering::Greater
|| (comparison == Ordering::Equal && bias == SeekBias::Right)
{
self.summary += child_summary;
pos = child_end;
if let Some(slice) = slice.as_mut() {
slice.push_tree(child_tree.clone());
}
*index += 1;
} else {
pos = D::from_summary(&self.summary).clone();
containing_subtree = Some(child_tree.clone());
break 'outer;
}
}
}
Node::Leaf { ref items, .. } => {
let mut slice_items = SmallVec::<[T; 2 * TREE_BASE]>::new();
let mut slice_items_summary = T::Summary::default();
while *index < items.len() {
let item = &items[*index];
let item_summary = item.summarize();
let mut item_end = pos;
item_end += &D::from_summary(&item_summary);
let comparison = target.cmp(&item_end);
if comparison == Ordering::Greater
|| (comparison == Ordering::Equal && bias == SeekBias::Right)
{
self.summary += &item_summary;
pos = item_end;
if slice.is_some() {
slice_items.push(item.clone());
slice_items_summary += &item_summary;
}
*index += 1;
} else {
pos = D::from_summary(&self.summary).clone();
if let Some(slice) = slice.as_mut() {
slice.push_tree(Tree(Arc::new(Node::Leaf {
summary: slice_items_summary,
items: slice_items,
})));
}
break 'outer;
}
}
if let Some(slice) = slice.as_mut() {
if slice_items.len() > 0 {
slice.push_tree(Tree(Arc::new(Node::Leaf {
summary: slice_items_summary,
items: slice_items,
})));
}
}
}
}
}
self.stack.pop();
}
} else {
self.did_seek = true;
containing_subtree = Some(self.tree.clone());
}
if let Some(mut subtree) = containing_subtree {
loop {
let mut next_subtree = None;
match *subtree.0 {
Node::Internal {
ref child_summaries,
ref child_trees,
..
} => {
for (index, child_summary) in child_summaries.iter().enumerate() {
let mut child_end = pos;
child_end += &D::from_summary(child_summary);
let comparison = target.cmp(&child_end);
if comparison == Ordering::Greater
|| (comparison == Ordering::Equal && bias == SeekBias::Right)
{
self.summary += child_summary;
pos = child_end;
if let Some(slice) = slice.as_mut() {
slice.push_tree(child_trees[index].clone());
}
} else {
pos = D::from_summary(&self.summary).clone();
self.stack
.push((subtree.clone(), index, self.summary.clone()));
next_subtree = Some(child_trees[index].clone());
break;
}
}
}
Node::Leaf { ref items, .. } => {
let mut slice_items = SmallVec::<[T; 2 * TREE_BASE]>::new();
let mut slice_items_summary = T::Summary::default();
for (index, item) in items.iter().enumerate() {
let item_summary = item.summarize();
let mut child_end = pos;
child_end += &D::from_summary(&item_summary);
let comparison = target.cmp(&child_end);
if comparison == Ordering::Greater
|| (comparison == Ordering::Equal && bias == SeekBias::Right)
{
if slice.is_some() {
slice_items.push(item.clone());
slice_items_summary += &item_summary;
}
self.summary += &item_summary;
pos = child_end;
} else {
pos = D::from_summary(&self.summary).clone();
self.stack
.push((subtree.clone(), index, self.summary.clone()));
break;
}
}
if let Some(slice) = slice.as_mut() {
if slice_items.len() > 0 {
slice.push_tree(Tree(Arc::new(Node::Leaf {
summary: slice_items_summary,
items: slice_items,
})));
}
}
}
};
if let Some(next_subtree) = next_subtree {
subtree = next_subtree;
} else {
break;
}
}
}
self.at_end = self.stack.is_empty();
if bias == SeekBias::Left {
*target == self.end::()
} else {
*target == self.start::()
}
}
}
impl Iterator for Cursor {
type Item = T;
fn next(&mut self) -> Option {
if !self.did_seek {
let root = self.tree.clone();
self.descend_to_first_item(root, |_| true);
}
if let Some(item) = self.item() {
self.next();
Some(item)
} else {
None
}
}
}
impl bool, T: Item> FilterCursor {
fn new(tree: &Tree, filter_node: F) -> Self {
let mut cursor = tree.cursor();
if filter_node(&tree.summary()) {
cursor.descend_to_first_item(tree.clone(), &filter_node);
} else {
cursor.did_seek = true;
cursor.at_end = true;
}
Self {
cursor,
filter_node,
}
}
pub fn start>(&self) -> D {
self.cursor.start()
}
pub fn item(&self) -> Option {
self.cursor.item()
}
pub fn next(&mut self) {
self.cursor.next_internal(&self.filter_node);
}
}
impl bool, T: Item> Iterator for FilterCursor {
type Item = T;
fn next(&mut self) -> Option {
if let Some(item) = self.item() {
self.cursor.next_internal(&self.filter_node);
Some(item)
} else {
None
}
}
}
impl Edit {
fn key(&self) -> T::Key {
match self {
Edit::Insert(item) | Edit::Remove(item) => item.key(),
}
}
}
fn sum<'a, T, I>(iter: I) -> T
where
T: 'a + Default + AddAssign<&'a T>,
I: Iterator
- ,
{
let mut sum = T::default();
for value in iter {
sum += value;
}
sum
}
fn sum_owned(iter: I) -> T
where
T: Default + for<'a> AddAssign<&'a T>,
I: Iterator
- ,
{
let mut sum = T::default();
for value in iter {
sum += &value;
}
sum
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extend_and_push_tree() {
let mut tree1 = Tree::new();
tree1.extend(0..20);
let mut tree2 = Tree::new();
tree2.extend(50..100);
tree1.push_tree(tree2);
assert_eq!(tree1.items(), (0..20).chain(50..100).collect::>());
}
#[test]
fn test_random() {
for seed in 0..100 {
use rand::{Rng, SeedableRng, StdRng};
let mut rng = StdRng::from_seed(&[seed]);
let mut tree = Tree::::new();
let count = rng.gen_range(0, 10);
tree.extend(rng.gen_iter().take(count));
for _ in 0..5 {
let splice_end = rng.gen_range(0, tree.extent::().0 + 1);
let splice_start = rng.gen_range(0, splice_end + 1);
let count = rng.gen_range(0, 3);
let tree_end = tree.extent::();
let new_items = rng.gen_iter().take(count).collect::>();
let mut reference_items = tree.items();
reference_items.splice(splice_start..splice_end, new_items.clone());
let mut cursor = tree.cursor();
tree = cursor.slice(&Count(splice_start), SeekBias::Right);
tree.extend(new_items);
cursor.seek(&Count(splice_end), SeekBias::Right);
tree.push_tree(cursor.slice(&tree_end, SeekBias::Right));
assert_eq!(tree.items(), reference_items);
let mut filter_cursor = tree.filter(|summary| summary.contains_even);
let mut reference_filter = tree
.items()
.into_iter()
.enumerate()
.filter(|(_, item)| (item & 1) == 0);
while let Some(actual_item) = filter_cursor.item() {
let (reference_index, reference_item) = reference_filter.next().unwrap();
assert_eq!(actual_item, reference_item);
assert_eq!(filter_cursor.start::().0, reference_index);
filter_cursor.next();
}
assert!(reference_filter.next().is_none());
let mut pos = rng.gen_range(0, tree.extent::().0 + 1);
let mut before_start = false;
let mut cursor = tree.cursor();
cursor.seek(&Count(pos), SeekBias::Right);
for i in 0..10 {
assert_eq!(cursor.start::().0, pos);
if pos > 0 {
assert_eq!(cursor.prev_item().unwrap(), reference_items[pos - 1]);
} else {
assert_eq!(cursor.prev_item(), None);
}
if pos < reference_items.len() && !before_start {
assert_eq!(cursor.item().unwrap(), reference_items[pos]);
} else {
assert_eq!(cursor.item(), None);
}
if i < 5 {
cursor.next();
if pos < reference_items.len() {
pos += 1;
before_start = false;
}
} else {
cursor.prev();
if pos == 0 {
before_start = true;
}
pos = pos.saturating_sub(1);
}
}
}
}
}
#[test]
fn test_cursor() {
// Empty tree
let tree = Tree::::new();
let mut cursor = tree.cursor();
assert_eq!(cursor.slice(&Sum(0), SeekBias::Right).items(), vec![]);
assert_eq!(cursor.item(), None);
assert_eq!(cursor.prev_item(), None);
assert_eq!(cursor.start::(), Count(0));
assert_eq!(cursor.start::(), Sum(0));
// Single-element tree
let mut tree = Tree::::new();
tree.extend(vec![1]);
let mut cursor = tree.cursor();
assert_eq!(cursor.slice(&Sum(0), SeekBias::Right).items(), vec![]);
assert_eq!(cursor.item(), Some(1));
assert_eq!(cursor.prev_item(), None);
assert_eq!(cursor.start::(), Count(0));
assert_eq!(cursor.start::(), Sum(0));
cursor.next();
assert_eq!(cursor.item(), None);
assert_eq!(cursor.prev_item(), Some(1));
assert_eq!(cursor.start::(), Count(1));
assert_eq!(cursor.start::(), Sum(1));
cursor.prev();
assert_eq!(cursor.item(), Some(1));
assert_eq!(cursor.prev_item(), None);
assert_eq!(cursor.start::(), Count(0));
assert_eq!(cursor.start::(), Sum(0));
cursor.reset();
assert_eq!(cursor.slice(&Sum(1), SeekBias::Right).items(), [1]);
assert_eq!(cursor.item(), None);
assert_eq!(cursor.prev_item(), Some(1));
assert_eq!(cursor.start::(), Count(1));
assert_eq!(cursor.start::(), Sum(1));
cursor.seek(&Sum(0), SeekBias::Right);
assert_eq!(
cursor
.slice(&tree.extent::(), SeekBias::Right)
.items(),
[1]
);
assert_eq!(cursor.item(), None);
assert_eq!(cursor.prev_item(), Some(1));
assert_eq!(cursor.start::(), Count(1));
assert_eq!(cursor.start::(), Sum(1));
// Multiple-element tree
let mut tree = Tree::new();
tree.extend(vec![1, 2, 3, 4, 5, 6]);
let mut cursor = tree.cursor();
assert_eq!(cursor.slice(&Sum(4), SeekBias::Right).items(), [1, 2]);
assert_eq!(cursor.item(), Some(3));
assert_eq!(cursor.prev_item(), Some(2));
assert_eq!(cursor.start::(), Count(2));
assert_eq!(cursor.start::(), Sum(3));
cursor.next();
assert_eq!(cursor.item(), Some(4));
assert_eq!(cursor.prev_item(), Some(3));
assert_eq!(cursor.start::(), Count(3));
assert_eq!(cursor.start::(), Sum(6));
cursor.next();
assert_eq!(cursor.item(), Some(5));
assert_eq!(cursor.prev_item(), Some(4));
assert_eq!(cursor.start::(), Count(4));
assert_eq!(cursor.start::(), Sum(10));
cursor.next();
assert_eq!(cursor.item(), Some(6));
assert_eq!(cursor.prev_item(), Some(5));
assert_eq!(cursor.start::(), Count(5));
assert_eq!(cursor.start::(), Sum(15));
cursor.next();
cursor.next();
assert_eq!(cursor.item(), None);
assert_eq!(cursor.prev_item(), Some(6));
assert_eq!(cursor.start::(), Count(6));
assert_eq!(cursor.start::(), Sum(21));
cursor.prev();
assert_eq!(cursor.item(), Some(6));
assert_eq!(cursor.prev_item(), Some(5));
assert_eq!(cursor.start::(), Count(5));
assert_eq!(cursor.start::(), Sum(15));
cursor.prev();
assert_eq!(cursor.item(), Some(5));
assert_eq!(cursor.prev_item(), Some(4));
assert_eq!(cursor.start::(), Count(4));
assert_eq!(cursor.start::(), Sum(10));
cursor.prev();
assert_eq!(cursor.item(), Some(4));
assert_eq!(cursor.prev_item(), Some(3));
assert_eq!(cursor.start::(), Count(3));
assert_eq!(cursor.start::(), Sum(6));
cursor.prev();
assert_eq!(cursor.item(), Some(3));
assert_eq!(cursor.prev_item(), Some(2));
assert_eq!(cursor.start::(), Count(2));
assert_eq!(cursor.start::(), Sum(3));
cursor.prev();
assert_eq!(cursor.item(), Some(2));
assert_eq!(cursor.prev_item(), Some(1));
assert_eq!(cursor.start::(), Count(1));
assert_eq!(cursor.start::(), Sum(1));
cursor.prev();
assert_eq!(cursor.item(), Some(1));
assert_eq!(cursor.prev_item(), None);
assert_eq!(cursor.start::(), Count(0));
assert_eq!(cursor.start::(), Sum(0));
cursor.prev();
assert_eq!(cursor.item(), None);
assert_eq!(cursor.prev_item(), None);
assert_eq!(cursor.start::(), Count(0));
assert_eq!(cursor.start::(), Sum(0));
cursor.next();
assert_eq!(cursor.item(), Some(1));
assert_eq!(cursor.prev_item(), None);
assert_eq!(cursor.start::(), Count(0));
assert_eq!(cursor.start::(), Sum(0));
cursor.reset();
assert_eq!(
cursor
.slice(&tree.extent::(), SeekBias::Right)
.items(),
tree.items()
);
assert_eq!(cursor.item(), None);
assert_eq!(cursor.prev_item(), Some(6));
assert_eq!(cursor.start::(), Count(6));
assert_eq!(cursor.start::(), Sum(21));
cursor.seek(&Count(3), SeekBias::Right);
assert_eq!(
cursor
.slice(&tree.extent::(), SeekBias::Right)
.items(),
[4, 5, 6]
);
assert_eq!(cursor.item(), None);
assert_eq!(cursor.prev_item(), Some(6));
assert_eq!(cursor.start::(), Count(6));
assert_eq!(cursor.start::(), Sum(21));
// Seeking can bias left or right
cursor.seek(&Sum(1), SeekBias::Left);
assert_eq!(cursor.item(), Some(1));
cursor.seek(&Sum(1), SeekBias::Right);
assert_eq!(cursor.item(), Some(2));
// Slicing without resetting starts from where the cursor is parked at.
cursor.seek(&Sum(1), SeekBias::Right);
assert_eq!(cursor.slice(&Sum(6), SeekBias::Right).items(), vec![2, 3]);
assert_eq!(cursor.slice(&Sum(21), SeekBias::Left).items(), vec![4, 5]);
assert_eq!(cursor.slice(&Sum(21), SeekBias::Right).items(), vec![6]);
}
#[derive(Clone, Default, Debug)]
pub struct IntegersSummary {
count: Count,
sum: Sum,
contains_even: bool,
}
#[derive(Ord, PartialOrd, Default, Eq, PartialEq, Clone, Debug)]
struct Count(usize);
#[derive(Ord, PartialOrd, Default, Eq, PartialEq, Clone, Debug)]
struct Sum(usize);
impl Item for u8 {
type Summary = IntegersSummary;
fn summarize(&self) -> Self::Summary {
IntegersSummary {
count: Count(1),
sum: Sum(*self as usize),
contains_even: (*self & 1) == 0,
}
}
}
impl<'a> AddAssign<&'a Self> for IntegersSummary {
fn add_assign(&mut self, other: &Self) {
self.count += &other.count;
self.sum += &other.sum;
self.contains_even |= other.contains_even;
}
}
impl Dimension for Count {
fn from_summary(summary: &IntegersSummary) -> Self {
summary.count.clone()
}
}
impl<'a> AddAssign<&'a Self> for Count {
fn add_assign(&mut self, other: &Self) {
self.0 += other.0;
}
}
impl<'a> Add<&'a Self> for Count {
type Output = Self;
fn add(mut self, other: &Self) -> Self {
self.0 += other.0;
self
}
}
impl Dimension for Sum {
fn from_summary(summary: &IntegersSummary) -> Self {
summary.sum.clone()
}
}
impl<'a> AddAssign<&'a Self> for Sum {
fn add_assign(&mut self, other: &Self) {
self.0 += other.0;
}
}
impl<'a> Add<&'a Self> for Sum {
type Output = Self;
fn add(mut self, other: &Self) -> Self {
self.0 += other.0;
self
}
}
}
================================================
FILE: memo_core/src/buffer.rs
================================================
use crate::btree::{self, SeekBias};
use crate::operation_queue::{self, OperationQueue};
use crate::serialization;
use crate::time;
use crate::{Error, ReplicaId};
use flatbuffers::{FlatBufferBuilder, WIPOffset};
use lazy_static::lazy_static;
use serde_derive::{Deserialize, Serialize};
use smallvec::SmallVec;
use std::cell::RefCell;
use std::cmp::{self, Ordering};
use std::collections::{HashMap, HashSet};
use std::iter;
use std::mem;
use std::ops::{Add, AddAssign, Range, Sub};
use std::sync::Arc;
use std::vec;
pub type SelectionSetId = time::Lamport;
pub type SelectionsVersion = usize;
#[derive(Clone)]
pub struct Buffer {
fragments: btree::Tree,
insertion_splits: HashMap>,
anchor_cache: RefCell>,
offset_cache: RefCell>,
pub version: time::Global,
last_edit: time::Local,
selections: HashMap>,
pub selections_last_update: SelectionsVersion,
deferred_ops: OperationQueue,
deferred_replicas: HashSet,
}
#[derive(Clone, Copy, Deserialize, Eq, PartialEq, Debug, Hash, Serialize)]
pub struct Point {
pub row: u32,
pub column: u32,
}
#[derive(Clone, Eq, PartialEq, Debug, Hash)]
pub enum Anchor {
Start,
End,
Middle {
insertion_id: time::Local,
offset: usize,
bias: AnchorBias,
},
}
#[derive(Clone, Eq, PartialEq, Debug, Hash)]
pub enum AnchorBias {
Left,
Right,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Selection {
pub start: Anchor,
pub end: Anchor,
pub reversed: bool,
}
pub struct Iter {
fragment_cursor: btree::Cursor,
fragment_offset: usize,
reversed: bool,
}
struct ChangesIter bool> {
cursor: btree::FilterCursor,
since: time::Global,
}
#[derive(Debug, Eq, PartialEq)]
pub struct Change {
pub range: Range,
pub code_units: Vec,
new_extent: Point,
}
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct Insertion {
id: time::Local,
parent_id: time::Local,
offset_in_parent: usize,
text: Arc,
lamport_timestamp: time::Lamport,
}
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct Text {
code_units: Vec,
nodes: Vec,
}
#[derive(Clone, Eq, PartialEq, Debug)]
struct LineNode {
len: u32,
longest_row: u32,
longest_row_len: u32,
offset: usize,
rows: u32,
}
struct LineNodeProbe<'a> {
offset_range: &'a Range,
row: u32,
left_ancestor_end_offset: usize,
right_ancestor_start_offset: usize,
node: &'a LineNode,
left_child: Option<&'a LineNode>,
right_child: Option<&'a LineNode>,
}
#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Debug)]
struct FragmentId(Arc>);
#[derive(Eq, PartialEq, Clone, Debug)]
struct Fragment {
id: FragmentId,
insertion: Insertion,
start_offset: usize,
end_offset: usize,
deletions: HashSet,
}
#[derive(Eq, PartialEq, Clone, Debug)]
pub struct FragmentSummary {
extent: usize,
extent_2d: Point,
max_fragment_id: FragmentId,
first_row_len: u32,
longest_row: u32,
longest_row_len: u32,
max_version: time::Global,
}
#[derive(Eq, PartialEq, Clone, Debug)]
struct InsertionSplit {
extent: usize,
fragment_id: FragmentId,
}
#[derive(Eq, PartialEq, Clone, Debug)]
struct InsertionSplitSummary {
extent: usize,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Operation {
Edit {
start_id: time::Local,
start_offset: usize,
end_id: time::Local,
end_offset: usize,
version_in_range: time::Global,
new_text: Option>,
local_timestamp: time::Local,
lamport_timestamp: time::Lamport,
},
UpdateSelections {
set_id: SelectionSetId,
selections: Option>,
lamport_timestamp: time::Lamport,
},
}
impl Buffer {
pub fn new(base_text: T) -> Self
where
T: Into,
{
let mut insertion_splits = HashMap::new();
let mut fragments = btree::Tree::new();
let base_insertion = Insertion {
id: time::Local::default(),
parent_id: time::Local::default(),
offset_in_parent: 0,
text: Arc::new(base_text.into()),
lamport_timestamp: time::Lamport::default(),
};
insertion_splits.insert(
base_insertion.id,
btree::Tree::from_item(InsertionSplit {
fragment_id: FragmentId::min_value(),
extent: 0,
}),
);
fragments.push(Fragment {
id: FragmentId::min_value(),
insertion: base_insertion.clone(),
start_offset: 0,
end_offset: 0,
deletions: HashSet::new(),
});
if base_insertion.text.len() > 0 {
let base_fragment_id =
FragmentId::between(&FragmentId::min_value(), &FragmentId::max_value());
insertion_splits
.get_mut(&base_insertion.id)
.unwrap()
.push(InsertionSplit {
fragment_id: base_fragment_id.clone(),
extent: base_insertion.text.len(),
});
fragments.push(Fragment {
id: base_fragment_id,
start_offset: 0,
end_offset: base_insertion.text.len(),
insertion: base_insertion,
deletions: HashSet::new(),
});
}
Self {
fragments,
insertion_splits,
anchor_cache: RefCell::new(HashMap::default()),
offset_cache: RefCell::new(HashMap::default()),
version: time::Global::new(),
last_edit: time::Local::default(),
selections: HashMap::default(),
selections_last_update: 0,
deferred_ops: OperationQueue::new(),
deferred_replicas: HashSet::new(),
}
}
pub fn is_modified(&self) -> bool {
self.version != time::Global::new()
}
pub fn len(&self) -> usize {
self.fragments.extent::()
}
pub fn len_for_row(&self, row: u32) -> Result {
let row_start_offset = self.offset_for_point(Point::new(row, 0))?;
let row_end_offset = if row >= self.max_point().row {
self.len()
} else {
self.offset_for_point(Point::new(row + 1, 0))? - 1
};
Ok((row_end_offset - row_start_offset) as u32)
}
pub fn longest_row(&self) -> u32 {
self.fragments.summary().longest_row
}
pub fn max_point(&self) -> Point {
self.fragments.extent()
}
pub fn line(&self, row: u32) -> Result, Error> {
let mut iterator = self.iter_at_point(Point::new(row, 0)).peekable();
if iterator.peek().is_none() {
Err(Error::OffsetOutOfRange)
} else {
Ok(iterator.take_while(|c| *c != u16::from(b'\n')).collect())
}
}
pub fn to_u16_chars(&self) -> Vec {
self.iter().collect::>()
}
pub fn to_string(&self) -> String {
String::from_utf16_lossy(&self.to_u16_chars())
}
pub fn iter(&self) -> Iter {
Iter::new(self)
}
pub fn iter_at_point(&self, point: Point) -> Iter {
Iter::at_point(self, point)
}
pub fn selections_changed_since(&self, since: SelectionsVersion) -> bool {
self.selections_last_update != since
}
pub fn changes_since(&self, since: &time::Global) -> impl Iterator
- {
let since_2 = since.clone();
let cursor = self
.fragments
.filter(move |summary| summary.max_version.changed_since(&since_2));
ChangesIter {
cursor,
since: since.clone(),
}
}
pub fn deferred_ops_len(&self) -> usize {
self.deferred_ops.len()
}
pub fn edit(
&mut self,
old_ranges: I,
new_text: T,
local_clock: &mut time::Local,
lamport_clock: &mut time::Lamport,
) -> Vec
where
I: IntoIterator
- >,
T: Into,
{
let new_text = new_text.into();
let new_text = if new_text.len() > 0 {
Some(Arc::new(new_text))
} else {
None
};
self.anchor_cache.borrow_mut().clear();
self.offset_cache.borrow_mut().clear();
let ops = self.splice_fragments(
old_ranges
.into_iter()
.filter(|old_range| new_text.is_some() || old_range.end > old_range.start),
new_text.clone(),
local_clock,
lamport_clock,
);
if let Some(op) = ops.last() {
if let Operation::Edit {
local_timestamp, ..
} = op
{
self.last_edit = *local_timestamp;
self.version.observe(*local_timestamp);
} else {
unreachable!()
}
}
ops
}
pub fn edit_2d(
&mut self,
old_2d_ranges: I,
new_text: T,
local_clock: &mut time::Local,
lamport_clock: &mut time::Lamport,
) -> Vec
where
I: IntoIterator
- >,
T: Into,
{
let mut old_1d_ranges = SmallVec::<[_; 1]>::new();
for old_2d_range in old_2d_ranges {
let start = self.offset_for_point(old_2d_range.start);
let end = self.offset_for_point(old_2d_range.end);
if start.is_ok() && end.is_ok() {
old_1d_ranges.push(start.unwrap()..end.unwrap());
}
}
self.edit(old_1d_ranges, new_text, local_clock, lamport_clock)
}
pub fn add_selection_set(
&mut self,
ranges: I,
lamport_clock: &mut time::Lamport,
) -> Result<(SelectionSetId, Operation), Error>
where
I: IntoIterator
- >,
{
let selections = self.selections_from_ranges(ranges)?;
let lamport_timestamp = lamport_clock.tick();
self.selections
.insert(lamport_timestamp, selections.clone());
self.selections_last_update += 1;
Ok((
lamport_timestamp,
Operation::UpdateSelections {
set_id: lamport_timestamp,
selections: Some(selections),
lamport_timestamp,
},
))
}
pub fn replace_selection_set(
&mut self,
set_id: SelectionSetId,
ranges: I,
lamport_clock: &mut time::Lamport,
) -> Result
where
I: IntoIterator
- >,
{
self.selections
.remove(&set_id)
.ok_or(Error::InvalidSelectionSet(set_id))?;
let mut selections = self.selections_from_ranges(ranges)?;
self.merge_selections(&mut selections);
self.selections.insert(set_id, selections.clone());
let lamport_timestamp = lamport_clock.tick();
self.selections_last_update += 1;
Ok(Operation::UpdateSelections {
set_id,
selections: Some(selections),
lamport_timestamp,
})
}
pub fn remove_selection_set(
&mut self,
set_id: SelectionSetId,
lamport_clock: &mut time::Lamport,
) -> Result {
self.selections
.remove(&set_id)
.ok_or(Error::InvalidSelectionSet(set_id))?;
let lamport_timestamp = lamport_clock.tick();
self.selections_last_update += 1;
Ok(Operation::UpdateSelections {
set_id,
selections: None,
lamport_timestamp,
})
}
pub fn selection_ranges<'a>(
&'a self,
set_id: SelectionSetId,
) -> Result> + 'a, Error> {
let selections = self
.selections
.get(&set_id)
.ok_or(Error::InvalidSelectionSet(set_id))?;
Ok(selections.iter().map(move |selection| {
let start = self.point_for_anchor(&selection.start).unwrap();
let end = self.point_for_anchor(&selection.end).unwrap();
if selection.reversed {
end..start
} else {
start..end
}
}))
}
pub fn all_selections(&self) -> impl Iterator
- )> {
self.selections.iter()
}
pub fn all_selection_ranges<'a>(
&'a self,
) -> impl 'a + Iterator
- >)> {
self.selections
.keys()
.map(move |set_id| (*set_id, self.selection_ranges(*set_id).unwrap().collect()))
}
fn merge_selections(&mut self, selections: &mut Vec) {
let mut new_selections = Vec::with_capacity(selections.len());
{
let mut old_selections = selections.drain(..);
if let Some(mut prev_selection) = old_selections.next() {
for selection in old_selections {
if self
.cmp_anchors(&prev_selection.end, &selection.start)
.unwrap()
>= Ordering::Equal
{
if self
.cmp_anchors(&selection.end, &prev_selection.end)
.unwrap()
> Ordering::Equal
{
prev_selection.end = selection.end;
}
} else {
new_selections.push(mem::replace(&mut prev_selection, selection));
}
}
new_selections.push(prev_selection);
}
}
*selections = new_selections;
}
fn selections_from_ranges(&self, ranges: I) -> Result, Error>
where
I: IntoIterator
- >,
{
let mut ranges = ranges.into_iter().collect::>();
ranges.sort_unstable_by_key(|range| range.start);
let mut selections = Vec::with_capacity(ranges.len());
for range in ranges {
if range.start > range.end {
selections.push(Selection {
start: self.anchor_before_point(range.end)?,
end: self.anchor_before_point(range.start)?,
reversed: true,
});
} else {
selections.push(Selection {
start: self.anchor_before_point(range.start)?,
end: self.anchor_before_point(range.end)?,
reversed: false,
});
}
}
Ok(selections)
}
pub fn apply_ops>(
&mut self,
ops: I,
local_clock: &mut time::Local,
lamport_clock: &mut time::Lamport,
) -> Result<(), Error> {
let mut deferred_ops = Vec::new();
for op in ops {
if self.can_apply_op(&op) {
self.apply_op(op, local_clock, lamport_clock)?;
} else {
self.deferred_replicas.insert(op.replica_id());
deferred_ops.push(op);
}
}
self.deferred_ops.insert(deferred_ops);
self.flush_deferred_ops(local_clock, lamport_clock)?;
Ok(())
}
fn apply_op(
&mut self,
op: Operation,
local_clock: &mut time::Local,
lamport_clock: &mut time::Lamport,
) -> Result<(), Error> {
match op {
Operation::Edit {
start_id,
start_offset,
end_id,
end_offset,
new_text,
version_in_range,
local_timestamp,
lamport_timestamp,
} => {
if !self.version.observed(local_timestamp) {
self.apply_edit(
start_id,
start_offset,
end_id,
end_offset,
new_text.as_ref().cloned(),
&version_in_range,
local_timestamp,
lamport_timestamp,
local_clock,
lamport_clock,
)?;
self.anchor_cache.borrow_mut().clear();
self.offset_cache.borrow_mut().clear();
self.version.observe(local_timestamp);
}
}
Operation::UpdateSelections {
set_id,
selections,
lamport_timestamp,
} => {
if let Some(selections) = selections {
self.selections.insert(set_id, selections);
} else {
self.selections.remove(&set_id);
}
lamport_clock.observe(lamport_timestamp);
self.selections_last_update += 1;
}
}
Ok(())
}
fn apply_edit(
&mut self,
start_id: time::Local,
start_offset: usize,
end_id: time::Local,
end_offset: usize,
new_text: Option>,
version_in_range: &time::Global,
local_timestamp: time::Local,
lamport_timestamp: time::Lamport,
local_clock: &mut time::Local,
lamport_clock: &mut time::Lamport,
) -> Result<(), Error> {
let mut new_text = new_text.as_ref().cloned();
let start_fragment_id = self.resolve_fragment_id(start_id, start_offset)?;
let end_fragment_id = self.resolve_fragment_id(end_id, end_offset)?;
let old_fragments = self.fragments.clone();
let mut cursor = old_fragments.cursor();
let mut new_fragments = cursor.slice(&start_fragment_id, SeekBias::Left);
if start_offset == cursor.item().unwrap().end_offset {
new_fragments.push(cursor.item().unwrap());
cursor.next();
}
while let Some(mut fragment) = cursor.item() {
if new_text.is_none() && fragment.id > end_fragment_id {
break;
}
if fragment.id == start_fragment_id || fragment.id == end_fragment_id {
let split_start = if start_fragment_id == fragment.id {
start_offset
} else {
fragment.start_offset
};
let split_end = if end_fragment_id == fragment.id {
end_offset
} else {
fragment.end_offset
};
let (before_range, within_range, after_range) = self.split_fragment(
cursor.prev_item().as_ref().unwrap(),
&fragment,
split_start..split_end,
);
let insertion = if let Some(new_text) = new_text.take() {
Some(
self.build_fragment_to_insert(
before_range
.as_ref()
.or(cursor.prev_item().as_ref())
.unwrap(),
within_range.as_ref().or(after_range.as_ref()),
new_text,
local_timestamp,
lamport_timestamp,
),
)
} else {
None
};
if let Some(fragment) = before_range {
new_fragments.push(fragment);
}
if let Some(fragment) = insertion {
new_fragments.push(fragment);
}
if let Some(mut fragment) = within_range {
if version_in_range.observed(fragment.insertion.id) {
fragment.deletions.insert(local_timestamp);
}
new_fragments.push(fragment);
}
if let Some(fragment) = after_range {
new_fragments.push(fragment);
}
} else {
if new_text.is_some() && lamport_timestamp > fragment.insertion.lamport_timestamp {
new_fragments.push(self.build_fragment_to_insert(
cursor.prev_item().as_ref().unwrap(),
Some(&fragment),
new_text.take().unwrap(),
local_timestamp,
lamport_timestamp,
));
}
if fragment.id < end_fragment_id && version_in_range.observed(fragment.insertion.id)
{
fragment.deletions.insert(local_timestamp);
}
new_fragments.push(fragment);
}
cursor.next();
}
if let Some(new_text) = new_text {
new_fragments.push(self.build_fragment_to_insert(
cursor.prev_item().as_ref().unwrap(),
None,
new_text,
local_timestamp,
lamport_timestamp,
));
}
new_fragments.push_tree(cursor.slice(&old_fragments.extent::(), SeekBias::Right));
self.fragments = new_fragments;
local_clock.observe(local_timestamp);
lamport_clock.observe(lamport_timestamp);
Ok(())
}
fn flush_deferred_ops(
&mut self,
local_clock: &mut time::Local,
lamport_clock: &mut time::Lamport,
) -> Result<(), Error> {
self.deferred_replicas.clear();
let mut deferred_ops = Vec::new();
for op in self.deferred_ops.drain() {
if self.can_apply_op(&op) {
self.apply_op(op, local_clock, lamport_clock)?;
} else {
self.deferred_replicas.insert(op.replica_id());
deferred_ops.push(op);
}
}
self.deferred_ops.insert(deferred_ops);
Ok(())
}
fn can_apply_op(&self, op: &Operation) -> bool {
if self.deferred_replicas.contains(&op.replica_id()) {
false
} else {
match op {
Operation::Edit {
start_id,
end_id,
version_in_range,
..
} => {
self.version.observed(*start_id)
&& self.version.observed(*end_id)
&& *version_in_range <= self.version
}
Operation::UpdateSelections { selections, .. } => {
if let Some(selections) = selections {
selections.iter().all(|selection| {
let contains_start = match selection.start {
Anchor::Middle { insertion_id, .. } => {
self.version.observed(insertion_id)
}
_ => true,
};
let contains_end = match selection.end {
Anchor::Middle { insertion_id, .. } => {
self.version.observed(insertion_id)
}
_ => true,
};
contains_start && contains_end
})
} else {
true
}
}
}
}
}
fn resolve_fragment_id(
&self,
edit_id: time::Local,
offset: usize,
) -> Result {
let split_tree = self
.insertion_splits
.get(&edit_id)
.ok_or(Error::InvalidOperation)?;
let mut cursor = split_tree.cursor();
cursor.seek(&offset, SeekBias::Left);
Ok(cursor
.item()
.ok_or(Error::InvalidOperation)?
.fragment_id
.clone())
}
fn splice_fragments(
&mut self,
mut old_ranges: I,
new_text: Option>,
local_clock: &mut time::Local,
lamport_clock: &mut time::Lamport,
) -> Vec
where
I: Iterator
- >,
{
let mut cur_range = old_ranges.next();
if cur_range.is_none() {
return Vec::new();
}
let mut ops = Vec::with_capacity(old_ranges.size_hint().0);
let old_fragments = self.fragments.clone();
let mut cursor = old_fragments.cursor();
let mut new_fragments = btree::Tree::new();
new_fragments.push_tree(cursor.slice(&cur_range.as_ref().unwrap().start, SeekBias::Right));
let mut start_id = None;
let mut start_offset = None;
let mut end_id = None;
let mut end_offset = None;
let mut version_in_range = time::Global::new();
let mut local_timestamp = local_clock.tick();
let mut lamport_timestamp = lamport_clock.tick();
while cur_range.is_some() && cursor.item().is_some() {
let mut fragment = cursor.item().unwrap();
let mut fragment_start = cursor.start::();
let mut fragment_end = fragment_start + fragment.len();
let old_split_tree = self
.insertion_splits
.remove(&fragment.insertion.id)
.unwrap();
let mut splits_cursor = old_split_tree.cursor();
let mut new_split_tree = splits_cursor.slice(&fragment.start_offset, SeekBias::Right);
// Find all splices that start or end within the current fragment. Then, split the
// fragment and reassemble it in both trees accounting for the deleted and the newly
// inserted text.
while cur_range.as_ref().map_or(false, |r| r.start < fragment_end) {
let range = cur_range.clone().unwrap();
if range.start > fragment_start {
let mut prefix = fragment.clone();
prefix.end_offset = prefix.start_offset + (range.start - fragment_start);
prefix.id =
FragmentId::between(&new_fragments.last().unwrap().id, &fragment.id);
fragment.start_offset = prefix.end_offset;
new_fragments.push(prefix.clone());
new_split_tree.push(InsertionSplit {
extent: prefix.end_offset - prefix.start_offset,
fragment_id: prefix.id,
});
fragment_start = range.start;
}
if range.end == fragment_start {
end_id = Some(new_fragments.last().unwrap().insertion.id);
end_offset = Some(new_fragments.last().unwrap().end_offset);
} else if range.end == fragment_end {
end_id = Some(fragment.insertion.id);
end_offset = Some(fragment.end_offset);
}
if range.start == fragment_start {
start_id = Some(new_fragments.last().unwrap().insertion.id);
start_offset = Some(new_fragments.last().unwrap().end_offset);
if let Some(new_text) = new_text.clone() {
let new_fragment = self.build_fragment_to_insert(
&new_fragments.last().unwrap(),
Some(&fragment),
new_text,
local_timestamp,
lamport_timestamp,
);
new_fragments.push(new_fragment);
}
}
if range.end < fragment_end {
if range.end > fragment_start {
let mut prefix = fragment.clone();
prefix.end_offset = prefix.start_offset + (range.end - fragment_start);
prefix.id =
FragmentId::between(&new_fragments.last().unwrap().id, &fragment.id);
if fragment.is_visible() {
prefix.deletions.insert(local_timestamp);
}
fragment.start_offset = prefix.end_offset;
new_fragments.push(prefix.clone());
new_split_tree.push(InsertionSplit {
extent: prefix.end_offset - prefix.start_offset,
fragment_id: prefix.id,
});
fragment_start = range.end;
end_id = Some(fragment.insertion.id);
end_offset = Some(fragment.start_offset);
version_in_range.observe(fragment.insertion.id);
}
} else {
version_in_range.observe(fragment.insertion.id);
if fragment.is_visible() {
fragment.deletions.insert(local_timestamp);
}
}
// If the splice ends inside this fragment, we can advance to the next splice and
// check if it also intersects the current fragment. Otherwise we break out of the
// loop and find the first fragment that the splice does not contain fully.
if range.end <= fragment_end {
ops.push(Operation::Edit {
start_id: start_id.unwrap(),
start_offset: start_offset.unwrap(),
end_id: end_id.unwrap(),
end_offset: end_offset.unwrap(),
version_in_range,
new_text: new_text.clone(),
local_timestamp,
lamport_timestamp,
});
start_id = None;
start_offset = None;
end_id = None;
end_offset = None;
version_in_range = time::Global::new();
cur_range = old_ranges.next();
if cur_range.is_some() {
local_timestamp = local_clock.tick();
lamport_timestamp = lamport_clock.tick();
}
} else {
break;
}
}
new_split_tree.push(InsertionSplit {
extent: fragment.end_offset - fragment.start_offset,
fragment_id: fragment.id.clone(),
});
splits_cursor.next();
new_split_tree
.push_tree(splits_cursor.slice(&old_split_tree.extent::(), SeekBias::Right));
self.insertion_splits
.insert(fragment.insertion.id, new_split_tree);
new_fragments.push(fragment);
// Scan forward until we find a fragment that is not fully contained by the current splice.
cursor.next();
if let Some(range) = cur_range.clone() {
while let Some(mut fragment) = cursor.item() {
fragment_start = cursor.start::();
fragment_end = fragment_start + fragment.len();
if range.start < fragment_start && range.end >= fragment_end {
if fragment.is_visible() {
fragment.deletions.insert(local_timestamp);
}
version_in_range.observe(fragment.insertion.id);
new_fragments.push(fragment.clone());
cursor.next();
if range.end == fragment_end {
end_id = Some(fragment.insertion.id);
end_offset = Some(fragment.end_offset);
ops.push(Operation::Edit {
start_id: start_id.unwrap(),
start_offset: start_offset.unwrap(),
end_id: end_id.unwrap(),
end_offset: end_offset.unwrap(),
version_in_range,
new_text: new_text.clone(),
local_timestamp,
lamport_timestamp,
});
start_id = None;
start_offset = None;
end_id = None;
end_offset = None;
version_in_range = time::Global::new();
cur_range = old_ranges.next();
if cur_range.is_some() {
local_timestamp = local_clock.tick();
lamport_timestamp = lamport_clock.tick();
}
break;
}
} else {
break;
}
}
// If the splice we are currently evaluating starts after the end of the fragment
// that the cursor is parked at, we should seek to the next splice's start range
// and push all the fragments in between into the new tree.
if cur_range.as_ref().map_or(false, |r| r.start > fragment_end) {
new_fragments.push_tree(
cursor.slice(&cur_range.as_ref().unwrap().start, SeekBias::Right),
);
}
}
}
// Handle range that is at the end of the buffer if it exists. There should never be
// multiple because ranges must be disjoint.
if cur_range.is_some() {
debug_assert_eq!(old_ranges.next(), None);
let last_fragment = new_fragments.last().unwrap();
ops.push(Operation::Edit {
start_id: last_fragment.insertion.id,
start_offset: last_fragment.end_offset,
end_id: last_fragment.insertion.id,
end_offset: last_fragment.end_offset,
version_in_range: time::Global::new(),
new_text: new_text.clone(),
local_timestamp,
lamport_timestamp,
});
if let Some(new_text) = new_text {
new_fragments.push(self.build_fragment_to_insert(
&last_fragment,
None,
new_text,
local_timestamp,
lamport_timestamp,
));
}
} else {
new_fragments
.push_tree(cursor.slice(&old_fragments.extent::(), SeekBias::Right));
}
self.fragments = new_fragments;
ops
}
fn split_fragment(
&mut self,
prev_fragment: &Fragment,
fragment: &Fragment,
range: Range,
) -> (Option, Option, Option) {
debug_assert!(range.start >= fragment.start_offset);
debug_assert!(range.start <= fragment.end_offset);
debug_assert!(range.end <= fragment.end_offset);
debug_assert!(range.end >= fragment.start_offset);
if range.end == fragment.start_offset {
(None, None, Some(fragment.clone()))
} else if range.start == fragment.end_offset {
(Some(fragment.clone()), None, None)
} else if range.start == fragment.start_offset && range.end == fragment.end_offset {
(None, Some(fragment.clone()), None)
} else {
let mut prefix = fragment.clone();
let after_range = if range.end < fragment.end_offset {
let mut suffix = prefix.clone();
suffix.start_offset = range.end;
prefix.end_offset = range.end;
prefix.id = FragmentId::between(&prev_fragment.id, &suffix.id);
Some(suffix)
} else {
None
};
let within_range = if range.start != range.end {
let mut suffix = prefix.clone();
suffix.start_offset = range.start;
prefix.end_offset = range.start;
prefix.id = FragmentId::between(&prev_fragment.id, &suffix.id);
Some(suffix)
} else {
None
};
let before_range = if range.start > fragment.start_offset {
Some(prefix)
} else {
None
};
let old_split_tree = self
.insertion_splits
.remove(&fragment.insertion.id)
.unwrap();
let mut cursor = old_split_tree.cursor();
let mut new_split_tree = cursor.slice(&fragment.start_offset, SeekBias::Right);
if let Some(ref fragment) = before_range {
new_split_tree.push(InsertionSplit {
extent: range.start - fragment.start_offset,
fragment_id: fragment.id.clone(),
});
}
if let Some(ref fragment) = within_range {
new_split_tree.push(InsertionSplit {
extent: range.end - range.start,
fragment_id: fragment.id.clone(),
});
}
if let Some(ref fragment) = after_range {
new_split_tree.push(InsertionSplit {
extent: fragment.end_offset - range.end,
fragment_id: fragment.id.clone(),
});
}
cursor.next();
new_split_tree
.push_tree(cursor.slice(&old_split_tree.extent::(), SeekBias::Right));
self.insertion_splits
.insert(fragment.insertion.id, new_split_tree);
(before_range, within_range, after_range)
}
}
fn build_fragment_to_insert(
&mut self,
prev_fragment: &Fragment,
next_fragment: Option<&Fragment>,
text: Arc,
local_timestamp: time::Local,
lamport_timestamp: time::Lamport,
) -> Fragment {
let new_fragment_id = FragmentId::between(
&prev_fragment.id,
next_fragment
.map(|f| &f.id)
.unwrap_or(&FragmentId::max_value()),
);
let mut split_tree = btree::Tree::new();
split_tree.push(InsertionSplit {
extent: text.len(),
fragment_id: new_fragment_id.clone(),
});
self.insertion_splits.insert(local_timestamp, split_tree);
Fragment::new(
new_fragment_id,
Insertion {
id: local_timestamp,
parent_id: prev_fragment.insertion.id,
offset_in_parent: prev_fragment.end_offset,
text,
lamport_timestamp,
},
)
}
pub fn anchor_before_offset(&self, offset: usize) -> Result {
self.anchor_for_offset(offset, AnchorBias::Left)
}
pub fn anchor_after_offset(&self, offset: usize) -> Result {
self.anchor_for_offset(offset, AnchorBias::Right)
}
fn anchor_for_offset(&self, offset: usize, bias: AnchorBias) -> Result {
let max_offset = self.len();
if offset > max_offset {
return Err(Error::OffsetOutOfRange);
}
let seek_bias;
match bias {
AnchorBias::Left => {
if offset == 0 {
return Ok(Anchor::Start);
} else {
seek_bias = SeekBias::Left;
}
}
AnchorBias::Right => {
if offset == max_offset {
return Ok(Anchor::End);
} else {
seek_bias = SeekBias::Right;
}
}
};
let mut cursor = self.fragments.cursor();
cursor.seek(&offset, seek_bias);
let fragment = cursor.item().unwrap();
let offset_in_fragment = offset - cursor.start::();
let offset_in_insertion = fragment.start_offset + offset_in_fragment;
let point = cursor.start::() + &fragment.point_for_offset(offset_in_fragment)?;
let anchor = Anchor::Middle {
insertion_id: fragment.insertion.id,
offset: offset_in_insertion,
bias,
};
self.cache_position(Some(anchor.clone()), offset, point);
Ok(anchor)
}
pub fn anchor_before_point(&self, point: Point) -> Result {
self.anchor_for_point(point, AnchorBias::Left)
}
pub fn anchor_after_point(&self, point: Point) -> Result {
self.anchor_for_point(point, AnchorBias::Right)
}
fn anchor_for_point(&self, point: Point, bias: AnchorBias) -> Result {
let max_point = self.max_point();
if point > max_point {
return Err(Error::OffsetOutOfRange);
}
let seek_bias;
match bias {
AnchorBias::Left => {
if point.is_zero() {
return Ok(Anchor::Start);
} else {
seek_bias = SeekBias::Left;
}
}
AnchorBias::Right => {
if point == max_point {
return Ok(Anchor::End);
} else {
seek_bias = SeekBias::Right;
}
}
};
let mut cursor = self.fragments.cursor();
cursor.seek(&point, seek_bias);
let fragment = cursor.item().unwrap();
let offset_in_fragment = fragment.offset_for_point(point - &cursor.start::())?;
let offset_in_insertion = fragment.start_offset + offset_in_fragment;
let anchor = Anchor::Middle {
insertion_id: fragment.insertion.id,
offset: offset_in_insertion,
bias,
};
let offset = cursor.start::() + offset_in_fragment;
self.cache_position(Some(anchor.clone()), offset, point);
Ok(anchor)
}
pub fn offset_for_anchor(&self, anchor: &Anchor) -> Result {
Ok(self.position_for_anchor(anchor)?.0)
}
pub fn point_for_anchor(&self, anchor: &Anchor) -> Result {
Ok(self.position_for_anchor(anchor)?.1)
}
fn position_for_anchor(&self, anchor: &Anchor) -> Result<(usize, Point), Error> {
match anchor {
Anchor::Start => Ok((0, Point { row: 0, column: 0 })),
Anchor::End => Ok((self.len(), self.fragments.extent())),
Anchor::Middle {
ref insertion_id,
offset,
ref bias,
} => {
let cached_position = {
let anchor_cache = self.anchor_cache.try_borrow().ok();
anchor_cache
.as_ref()
.and_then(|cache| cache.get(anchor).cloned())
};
if let Some(cached_position) = cached_position {
Ok(cached_position)
} else {
let seek_bias = match bias {
AnchorBias::Left => SeekBias::Left,
AnchorBias::Right => SeekBias::Right,
};
let splits =
self.insertion_splits
.get(&insertion_id)
.ok_or(Error::InvalidAnchor(
"split does not exist for insertion id".into(),
))?;
let mut splits_cursor = splits.cursor();
splits_cursor.seek(offset, seek_bias);
splits_cursor
.item()
.ok_or(Error::InvalidAnchor("split offset is out of range".into()))
.and_then(|split| {
let mut fragments_cursor = self.fragments.cursor();
fragments_cursor.seek(&split.fragment_id, SeekBias::Left);
fragments_cursor
.item()
.ok_or(Error::InvalidAnchor("fragment id does not exist".into()))
.and_then(|fragment| {
let overshoot = if fragment.is_visible() {
offset - fragment.start_offset
} else {
0
};
let offset = fragments_cursor.start::() + overshoot;
let point = fragments_cursor.start::()
+ &fragment.point_for_offset(overshoot)?;
self.cache_position(Some(anchor.clone()), offset, point);
Ok((offset, point))
})
})
}
}
}
}
fn offset_for_point(&self, point: Point) -> Result {
let cached_offset = {
let offset_cache = self.offset_cache.try_borrow().ok();
offset_cache
.as_ref()
.and_then(|cache| cache.get(&point).cloned())
};
if let Some(cached_offset) = cached_offset {
Ok(cached_offset)
} else {
let mut fragments_cursor = self.fragments.cursor();
fragments_cursor.seek(&point, SeekBias::Left);
fragments_cursor
.item()
.ok_or(Error::OffsetOutOfRange)
.map(|fragment| {
let overshoot = fragment
.offset_for_point(point - &fragments_cursor.start::())
.unwrap();
let offset = &fragments_cursor.start::() + &overshoot;
self.cache_position(None, offset, point);
offset
})
}
}
pub fn cmp_anchors(&self, a: &Anchor, b: &Anchor) -> Result {
let a_offset = self.offset_for_anchor(a)?;
let b_offset = self.offset_for_anchor(b)?;
Ok(a_offset.cmp(&b_offset))
}
fn cache_position(&self, anchor: Option, offset: usize, point: Point) {
anchor.map(|anchor| {
if let Ok(mut anchor_cache) = self.anchor_cache.try_borrow_mut() {
anchor_cache.insert(anchor, (offset, point));
}
});
if let Ok(mut offset_cache) = self.offset_cache.try_borrow_mut() {
offset_cache.insert(point, offset);
}
}
}
impl Point {
pub fn new(row: u32, column: u32) -> Self {
Point { row, column }
}
pub fn zero() -> Self {
Point::new(0, 0)
}
pub fn is_zero(&self) -> bool {
self.row == 0 && self.column == 0
}
}
impl btree::Dimension for Point {
fn from_summary(summary: &FragmentSummary) -> Self {
summary.extent_2d
}
}
impl<'a> Add<&'a Self> for Point {
type Output = Point;
fn add(self, other: &'a Self) -> Self::Output {
if other.row == 0 {
Point::new(self.row, self.column + other.column)
} else {
Point::new(self.row + other.row, other.column)
}
}
}
impl<'a> Sub<&'a Self> for Point {
type Output = Point;
fn sub(self, other: &'a Self) -> Self::Output {
debug_assert!(*other <= self);
if self.row == other.row {
Point::new(0, self.column - other.column)
} else {
Point::new(self.row - other.row, self.column)
}
}
}
impl<'a> AddAssign<&'a Self> for Point {
fn add_assign(&mut self, other: &'a Self) {
if other.row == 0 {
self.column += other.column;
} else {
self.row += other.row;
self.column = other.column;
}
}
}
impl PartialOrd for Point {
fn partial_cmp(&self, other: &Point) -> Option {
Some(self.cmp(other))
}
}
impl Ord for Point {
#[cfg(target_pointer_width = "64")]
fn cmp(&self, other: &Point) -> Ordering {
let a = (self.row as usize) << 32 | self.column as usize;
let b = (other.row as usize) << 32 | other.column as usize;
a.cmp(&b)
}
#[cfg(target_pointer_width = "32")]
fn cmp(&self, other: &Point) -> Ordering {
match self.row.cmp(&other.row) {
Ordering::Equal => self.column.cmp(&other.column),
comparison @ _ => comparison,
}
}
}
impl Anchor {
fn to_flatbuf<'fbb>(
&self,
builder: &mut FlatBufferBuilder<'fbb>,
) -> WIPOffset> {
match self {
Anchor::Start => serialization::buffer::Anchor::create(
builder,
&serialization::buffer::AnchorArgs {
variant: serialization::buffer::AnchorVariant::Start,
..serialization::buffer::AnchorArgs::default()
},
),
Anchor::End => serialization::buffer::Anchor::create(
builder,
&serialization::buffer::AnchorArgs {
variant: serialization::buffer::AnchorVariant::End,
..serialization::buffer::AnchorArgs::default()
},
),
Anchor::Middle {
insertion_id,
offset,
bias,
} => serialization::buffer::Anchor::create(
builder,
&serialization::buffer::AnchorArgs {
variant: serialization::buffer::AnchorVariant::Middle,
insertion_id: Some(&insertion_id.to_flatbuf()),
offset: *offset as u64,
bias: bias.to_flatbuf(),
},
),
}
}
fn from_flatbuf<'fbb>(
message: &serialization::buffer::Anchor<'fbb>,
) -> Result {
match message.variant() {
serialization::buffer::AnchorVariant::Start => Ok(Anchor::Start),
serialization::buffer::AnchorVariant::End => Ok(Anchor::End),
serialization::buffer::AnchorVariant::Middle => Ok(Anchor::Middle {
insertion_id: time::Local::from_flatbuf(
message
.insertion_id()
.ok_or(crate::Error::DeserializeError)?,
),
offset: message.offset() as usize,
bias: AnchorBias::from_flatbuf(message.bias()),
}),
}
}
}
impl AnchorBias {
fn to_flatbuf(&self) -> serialization::buffer::AnchorBias {
match self {
AnchorBias::Left => serialization::buffer::AnchorBias::Left,
AnchorBias::Right => serialization::buffer::AnchorBias::Right,
}
}
fn from_flatbuf(message: serialization::buffer::AnchorBias) -> Self {
match message {
serialization::buffer::AnchorBias::Left => AnchorBias::Left,
serialization::buffer::AnchorBias::Right => AnchorBias::Right,
}
}
}
impl Iter {
fn new(buffer: &Buffer) -> Self {
let mut fragment_cursor = buffer.fragments.cursor();
fragment_cursor.seek(&0, SeekBias::Right);
Self {
fragment_cursor,
fragment_offset: 0,
reversed: false,
}
}
fn at_point(buffer: &Buffer, point: Point) -> Self {
let mut fragment_cursor = buffer.fragments.cursor();
fragment_cursor.seek(&point, SeekBias::Right);
let fragment_offset = if let Some(fragment) = fragment_cursor.item() {
let point_in_fragment = point - &fragment_cursor.start::();
fragment.offset_for_point(point_in_fragment).unwrap()
} else {
0
};
Self {
fragment_cursor,
fragment_offset,
reversed: false,
}
}
pub fn rev(mut self) -> Iter {
self.reversed = true;
self
}
pub fn into_string(self) -> String {
String::from_utf16_lossy(&self.collect::>())
}
}
impl Iterator for Iter {
type Item = u16;
fn next(&mut self) -> Option {
if self.reversed {
if let Some(fragment) = self.fragment_cursor.item() {
if self.fragment_offset > 0 {
self.fragment_offset -= 1;
if let Some(c) = fragment.code_unit(self.fragment_offset) {
return Some(c);
}
}
}
loop {
self.fragment_cursor.prev();
if let Some(fragment) = self.fragment_cursor.item() {
if fragment.len() > 0 {
self.fragment_offset = fragment.len() - 1;
return fragment.code_unit(self.fragment_offset);
}
} else {
break;
}
}
None
} else {
if let Some(fragment) = self.fragment_cursor.item() {
if let Some(c) = fragment.code_unit(self.fragment_offset) {
self.fragment_offset += 1;
return Some(c);
}
}
loop {
self.fragment_cursor.next();
if let Some(fragment) = self.fragment_cursor.item() {
if let Some(c) = fragment.code_unit(0) {
self.fragment_offset = 1;
return Some(c);
}
} else {
break;
}
}
None
}
}
}
impl bool> Iterator for ChangesIter {
type Item = Change;
fn next(&mut self) -> Option {
let mut change: Option = None;
while let Some(fragment) = self.cursor.item() {
let position = self.cursor.start();
if !fragment.was_visible(&self.since) && fragment.is_visible() {
if let Some(ref mut change) = change {
if change.range.start + &change.new_extent == position {
change.code_units.extend(fragment.code_units());
change.new_extent += &fragment.extent_2d();
} else {
break;
}
} else {
change = Some(Change {
range: position..position,
code_units: Vec::from(fragment.code_units()),
new_extent: fragment.extent_2d(),
});
}
} else if fragment.was_visible(&self.since) && !fragment.is_visible() {
if let Some(ref mut change) = change {
if change.range.start + &change.new_extent == position {
change.range.end += &fragment.extent_2d();
} else {
break;
}
} else {
change = Some(Change {
range: position..position + &fragment.extent_2d(),
code_units: Vec::new(),
new_extent: Point::zero(),
});
}
}
self.cursor.next();
}
change
}
}
pub fn diff(a: &[u16], b: &[u16]) -> Vec {
struct ChangeCollector<'a> {
a: &'a [u16],
b: &'a [u16],
position: Point,
changes: Vec,
}
impl<'a> diffs::Diff for ChangeCollector<'a> {
type Error = ();
fn equal(&mut self, old: usize, _: usize, len: usize) -> Result<(), ()> {
self.position += &Text::extent(&self.a[old..old + len]);
Ok(())
}
fn delete(&mut self, old: usize, len: usize) -> Result<(), ()> {
self.changes.push(Change {
range: self.position..self.position + &Text::extent(&self.a[old..old + len]),
code_units: Vec::new(),
new_extent: Point::zero(),
});
Ok(())
}
fn insert(&mut self, _: usize, new: usize, new_len: usize) -> Result<(), ()> {
let new_extent = Text::extent(&self.b[new..new + new_len]);
self.changes.push(Change {
range: self.position..self.position,
code_units: Vec::from(&self.b[new..new + new_len]),
new_extent,
});
self.position += &new_extent;
Ok(())
}
fn replace(
&mut self,
old: usize,
old_len: usize,
new: usize,
new_len: usize,
) -> Result<(), ()> {
let old_extent = Text::extent(&self.a[old..old + old_len]);
let new_extent = Text::extent(&self.b[new..new + new_len]);
self.changes.push(Change {
range: self.position..self.position + &old_extent,
code_units: Vec::from(&self.b[new..new + new_len]),
new_extent,
});
self.position += &new_extent;
Ok(())
}
}
let mut collector = diffs::Replace::new(ChangeCollector {
a,
b,
position: Point::zero(),
changes: Vec::new(),
});
diffs::myers::diff(&mut collector, a, 0, a.len(), b, 0, b.len()).unwrap();
collector.into_inner().changes
}
impl Selection {
pub fn head(&self) -> &Anchor {
if self.reversed {
&self.start
} else {
&self.end
}
}
pub fn set_head
(&mut self, buffer: &Buffer, cursor: Anchor) {
if buffer.cmp_anchors(&cursor, self.tail()).unwrap() < Ordering::Equal {
if !self.reversed {
mem::swap(&mut self.start, &mut self.end);
self.reversed = true;
}
self.start = cursor;
} else {
if self.reversed {
mem::swap(&mut self.start, &mut self.end);
self.reversed = false;
}
self.end = cursor;
}
}
pub fn tail(&self) -> &Anchor {
if self.reversed {
&self.end
} else {
&self.start
}
}
pub fn is_empty(&self, buffer: &Buffer) -> bool {
buffer.cmp_anchors(&self.start, &self.end).unwrap() == Ordering::Equal
}
pub fn anchor_range(&self) -> Range {
self.start.clone()..self.end.clone()
}
fn to_flatbuf<'fbb>(
&self,
builder: &mut FlatBufferBuilder<'fbb>,
) -> WIPOffset> {
let start = Some(self.start.to_flatbuf(builder));
let end = Some(self.end.to_flatbuf(builder));
serialization::buffer::Selection::create(
builder,
&serialization::buffer::SelectionArgs {
start,
end,
reversed: self.reversed,
},
)
}
fn from_flatbuf<'fbb>(
message: serialization::buffer::Selection<'fbb>,
) -> Result {
Ok(Self {
start: Anchor::from_flatbuf(&message.start().ok_or(crate::Error::DeserializeError)?)?,
end: Anchor::from_flatbuf(&message.end().ok_or(crate::Error::DeserializeError)?)?,
reversed: message.reversed(),
})
}
}
impl Text {
pub fn new(code_units: Vec) -> Self {
fn build_tree(index: usize, line_lengths: &[u32], mut tree: &mut [LineNode]) {
if line_lengths.is_empty() {
return;
}
let mid = if line_lengths.len() == 1 {
0
} else {
let depth = log2_fast(line_lengths.len());
let max_elements = (1 << (depth)) - 1;
let right_subtree_elements = 1 << (depth - 1);
cmp::min(line_lengths.len() - right_subtree_elements, max_elements)
};
let len = line_lengths[mid];
let lower = &line_lengths[0..mid];
let upper = &line_lengths[mid + 1..];
let left_child_index = index * 2 + 1;
let right_child_index = index * 2 + 2;
build_tree(left_child_index, lower, &mut tree);
build_tree(right_child_index, upper, &mut tree);
tree[index] = {
let mut left_child_longest_row = 0;
let mut left_child_longest_row_len = 0;
let mut left_child_offset = 0;
let mut left_child_rows = 0;
if let Some(left_child) = tree.get(left_child_index) {
left_child_longest_row = left_child.longest_row;
left_child_longest_row_len = left_child.longest_row_len;
left_child_offset = left_child.offset;
left_child_rows = left_child.rows;
}
let mut right_child_longest_row = 0;
let mut right_child_longest_row_len = 0;
let mut right_child_offset = 0;
let mut right_child_rows = 0;
if let Some(right_child) = tree.get(right_child_index) {
right_child_longest_row = right_child.longest_row;
right_child_longest_row_len = right_child.longest_row_len;
right_child_offset = right_child.offset;
right_child_rows = right_child.rows;
}
let mut longest_row = 0;
let mut longest_row_len = 0;
if left_child_longest_row_len > longest_row_len {
longest_row = left_child_longest_row;
longest_row_len = left_child_longest_row_len;
}
if len > longest_row_len {
longest_row = left_child_rows;
longest_row_len = len;
}
if right_child_longest_row_len > longest_row_len {
longest_row = left_child_rows + right_child_longest_row + 1;
longest_row_len = right_child_longest_row_len;
}
LineNode {
len,
longest_row,
longest_row_len,
offset: left_child_offset + len as usize + right_child_offset + 1,
rows: left_child_rows + right_child_rows + 1,
}
};
}
let mut line_lengths = Vec::new();
let mut prev_offset = 0;
for (offset, code_unit) in code_units.iter().enumerate() {
if code_unit == &u16::from(b'\n') {
line_lengths.push((offset - prev_offset) as u32);
prev_offset = offset + 1;
}
}
line_lengths.push((code_units.len() - prev_offset) as u32);
let mut nodes = Vec::new();
nodes.resize(
line_lengths.len(),
LineNode {
len: 0,
longest_row_len: 0,
longest_row: 0,
offset: 0,
rows: 0,
},
);
build_tree(0, &line_lengths, &mut nodes);
Self { code_units, nodes }
}
fn extent(code_units: &[u16]) -> Point {
let mut rows = 0;
let mut last_row_len = 0;
for ch in code_units {
if *ch == b'\n' as u16 {
rows += 1;
last_row_len = 0;
} else {
last_row_len += 1;
}
}
Point::new(rows, last_row_len)
}
fn len(&self) -> usize {
self.code_units.len()
}
fn longest_row_in_range(&self, target_range: Range) -> Result<(u32, u32), Error> {
let mut longest_row = 0;
let mut longest_row_len = 0;
self.search(|probe| {
if target_range.start <= probe.offset_range.end
&& probe.right_ancestor_start_offset <= target_range.end
{
if let Some(right_child) = probe.right_child {
if right_child.longest_row_len >= longest_row_len {
longest_row = probe.row + 1 + right_child.longest_row;
longest_row_len = right_child.longest_row_len;
}
}
}
if target_range.start < probe.offset_range.start {
if probe.offset_range.end < target_range.end && probe.node.len >= longest_row_len {
longest_row = probe.row;
longest_row_len = probe.node.len;
}
Ordering::Less
} else if target_range.start > probe.offset_range.end {
Ordering::Greater
} else {
let node_end = cmp::min(probe.offset_range.end, target_range.end);
let node_len = (node_end - target_range.start) as u32;
if node_len >= longest_row_len {
longest_row = probe.row;
longest_row_len = node_len;
}
Ordering::Equal
}
})
.ok_or(Error::OffsetOutOfRange)?;
self.search(|probe| {
if target_range.end >= probe.offset_range.start
&& probe.left_ancestor_end_offset >= target_range.start
{
if let Some(left_child) = probe.left_child {
if left_child.longest_row_len > longest_row_len {
let left_ancestor_row = probe.row - left_child.rows;
longest_row = left_ancestor_row + left_child.longest_row;
longest_row_len = left_child.longest_row_len;
}
}
}
if target_range.end < probe.offset_range.start {
Ordering::Less
} else if target_range.end > probe.offset_range.end {
if target_range.start < probe.offset_range.start && probe.node.len > longest_row_len
{
longest_row = probe.row;
longest_row_len = probe.node.len;
}
Ordering::Greater
} else {
let node_start = cmp::max(target_range.start, probe.offset_range.start);
let node_len = (target_range.end - node_start) as u32;
if node_len > longest_row_len {
longest_row = probe.row;
longest_row_len = node_len;
}
Ordering::Equal
}
})
.ok_or(Error::OffsetOutOfRange)?;
Ok((longest_row, longest_row_len))
}
fn point_for_offset(&self, offset: usize) -> Result {
let search_result = self.search(|probe| {
if offset < probe.offset_range.start {
Ordering::Less
} else if offset > probe.offset_range.end {
Ordering::Greater
} else {
Ordering::Equal
}
});
if let Some((offset_range, row, _)) = search_result {
Ok(Point::new(row, (offset - offset_range.start) as u32))
} else {
Err(Error::OffsetOutOfRange)
}
}
fn offset_for_point(&self, point: Point) -> Result